From 6ede6ed81df01f12cc98058d04edc291bd842863 Mon Sep 17 00:00:00 2001 From: "Wayne E. Seguin" Date: Wed, 6 Jan 2010 22:43:27 -0500 Subject: [PATCH] Initial strip down of ruby 1.9.1 for miniruby. --- .gitignore | 404 ++ COPYING | 56 + COPYING.ja | 51 + ChangeLog | 20 + GPL | 340 + KNOWNBUGS.rb | 4 + LEGAL | 404 ++ LGPL | 504 ++ Makefile.in | 289 + NEWS | 420 ++ README | 114 + README.EXT | 1337 ++++ README.EXT.ja | 1458 ++++ README.ja | 160 + ToDo | 124 + array.c | 3893 ++++++++++ bignum.c | 2723 +++++++ blockinlining.c | 464 ++ class.c | 1006 +++ common.mk | 760 ++ compar.c | 213 + compile.c | 5441 ++++++++++++++ complex.c | 1584 +++++ config.guess | 1532 ++++ config.sub | 1658 +++++ configure.in | 2193 ++++++ cont.c | 1032 +++ debug.c | 159 + debug.h | 36 + defs/keywords | 51 + defs/known_errors.def | 122 + defs/lex.c.src | 51 + defs/opt_insn_unif.def | 29 + defs/opt_operand.def | 59 + dir.c | 1976 ++++++ dln.c | 1693 +++++ dln.h | 41 + dmydln.c | 2 + dmyencoding.c | 2 + dmyext.c | 4 + enc/Makefile.in | 63 + enc/ascii.c | 93 + enc/big5.c | 216 + enc/cp949.c | 219 + enc/depend | 142 + enc/emacs_mule.c | 340 + enc/encdb.c | 25 + enc/euc_jp.c | 388 + enc/euc_kr.c | 192 + enc/euc_tw.c | 234 + enc/gb18030.c | 596 ++ enc/gb2312.c | 13 + enc/gbk.c | 222 + enc/iso_2022_jp.h | 6 + enc/iso_8859_1.c | 285 + enc/iso_8859_10.c | 244 + enc/iso_8859_11.c | 111 + enc/iso_8859_13.c | 243 + enc/iso_8859_14.c | 246 + enc/iso_8859_15.c | 240 + enc/iso_8859_16.c | 242 + enc/iso_8859_2.c | 250 + enc/iso_8859_3.c | 240 + enc/iso_8859_4.c | 242 + enc/iso_8859_5.c | 230 + enc/iso_8859_6.c | 107 + enc/iso_8859_7.c | 237 + enc/iso_8859_8.c | 107 + enc/iso_8859_9.c | 243 + enc/koi8_r.c | 219 + enc/koi8_u.c | 221 + enc/make_encmake.rb | 50 + enc/prelude.rb | 6 + enc/shift_jis.c | 399 ++ enc/trans/newline.trans | 135 + enc/unicode.c | 11359 ++++++++++++++++++++++++++++++ enc/us_ascii.c | 31 + enc/utf_16be.c | 258 + enc/utf_16le.c | 249 + enc/utf_32be.c | 195 + enc/utf_32le.c | 194 + enc/utf_7.h | 5 + enc/utf_8.c | 455 ++ enc/windows_1251.c | 208 + encoding.c | 1441 ++++ enum.c | 1870 +++++ enumerator.c | 813 +++ error.c | 1218 ++++ eval.c | 1077 +++ eval_error.c | 274 + eval_intern.h | 215 + eval_jump.c | 151 + ext/Setup | 32 + ext/Setup.atheos | 33 + ext/Setup.emx | 33 + ext/Setup.nt | 33 + ext/continuation/continuation.c | 8 + ext/continuation/extconf.rb | 3 + ext/coverage/coverage.c | 101 + ext/coverage/extconf.rb | 2 + ext/curses/curses.c | 2064 ++++++ ext/curses/depend | 1 + ext/curses/extconf.rb | 31 + ext/curses/hello.rb | 30 + ext/curses/mouse.rb | 53 + ext/curses/rain.rb | 76 + ext/curses/view.rb | 91 + ext/curses/view2.rb | 115 + ext/dbm/dbm.c | 746 ++ ext/dbm/depend | 1 + ext/dbm/extconf.rb | 53 + ext/etc/depend | 1 + ext/etc/etc.c | 551 ++ ext/etc/etc.txt | 72 + ext/etc/etc.txt.ja | 72 + ext/etc/extconf.rb | 28 + ext/extmk.rb | 590 ++ ext/fcntl/depend | 1 + ext/fcntl/extconf.rb | 2 + ext/fcntl/fcntl.c | 187 + ext/fiber/extconf.rb | 3 + ext/fiber/fiber.c | 8 + ext/gdbm/README | 1 + ext/gdbm/depend | 1 + ext/gdbm/extconf.rb | 7 + ext/gdbm/gdbm.c | 1255 ++++ ext/iconv/charset_alias.rb | 103 + ext/iconv/depend | 2 + ext/iconv/extconf.rb | 53 + ext/iconv/iconv.c | 1203 ++++ ext/iconv/mkwrapper.rb | 53 + ext/io/wait/extconf.rb | 18 + ext/io/wait/lib/nonblock.rb | 23 + ext/io/wait/wait.c | 141 + ext/mathn/complex/complex.c | 7 + ext/mathn/complex/extconf.rb | 3 + ext/mathn/rational/extconf.rb | 3 + ext/mathn/rational/rational.c | 7 + ext/pty/README | 65 + ext/pty/README.expect | 22 + ext/pty/README.expect.ja | 21 + ext/pty/README.ja | 89 + ext/pty/depend | 1 + ext/pty/expect_sample.rb | 48 + ext/pty/extconf.rb | 16 + ext/pty/lib/expect.rb | 36 + ext/pty/pty.c | 521 ++ ext/pty/script.rb | 37 + ext/pty/shl.rb | 92 + ext/purelib.rb | 10 + ext/racc/cparse/README | 10 + ext/racc/cparse/cparse.c | 824 +++ ext/racc/cparse/depend | 1 + ext/racc/cparse/extconf.rb | 5 + ext/readline/README | 10 + ext/readline/README.ja | 422 ++ ext/readline/depend | 1 + ext/readline/extconf.rb | 73 + ext/readline/readline.c | 1320 ++++ ext/sdbm/_sdbm.c | 923 +++ ext/sdbm/depend | 2 + ext/sdbm/extconf.rb | 3 + ext/sdbm/init.c | 713 ++ ext/sdbm/sdbm.h | 84 + ext/stringio/README | 18 + ext/stringio/depend | 3 + ext/stringio/extconf.rb | 2 + ext/stringio/stringio.c | 1398 ++++ ext/strscan/depend | 2 + ext/strscan/extconf.rb | 2 + ext/strscan/strscan.c | 1329 ++++ ext/syslog/depend | 2 + ext/syslog/extconf.rb | 10 + ext/syslog/syslog.c | 393 ++ ext/syslog/syslog.txt | 124 + ext/syslog/test.rb | 164 + file.c | 4820 +++++++++++++ gc.c | 2925 ++++++++ gc.h | 75 + gem_prelude.rb | 364 + golf_prelude.rb | 114 + goruby.c | 24 + hash.c | 2709 +++++++ ia64.s | 42 + id.c | 50 + id.h | 163 + include/ruby.h | 34 + include/ruby/backward/rubyio.h | 6 + include/ruby/backward/rubysig.h | 48 + include/ruby/backward/st.h | 6 + include/ruby/backward/util.h | 6 + include/ruby/defines.h | 281 + include/ruby/encoding.h | 297 + include/ruby/intern.h | 800 +++ include/ruby/io.h | 175 + include/ruby/missing.h | 172 + include/ruby/oniguruma.h | 792 +++ include/ruby/re.h | 66 + include/ruby/regex.h | 42 + include/ruby/ruby.h | 1221 ++++ include/ruby/st.h | 117 + include/ruby/util.h | 82 + include/ruby/vm.h | 22 + include/ruby/win32.h | 570 ++ inits.c | 63 + insns.def | 2047 ++++++ install-sh | 0 instruby.rb | 416 ++ io.c | 8802 +++++++++++++++++++++++ iseq.c | 1372 ++++ iseq.h | 103 + lex.c.blt | 217 + lib/English.rb | 155 + lib/README | 93 + lib/abbrev.rb | 103 + lib/base64.rb | 91 + lib/benchmark.rb | 573 ++ lib/cgi.rb | 274 + lib/cmath.rb | 233 + lib/complex.rb | 24 + lib/csv.rb | 2320 ++++++ lib/date.rb | 1834 +++++ lib/date/format.rb | 1313 ++++ lib/debug.rb | 907 +++ lib/delegate.rb | 311 + lib/drb.rb | 2 + lib/e2mmap.rb | 172 + lib/erb.rb | 902 +++ lib/fileutils.rb | 1592 +++++ lib/find.rb | 81 + lib/forwardable.rb | 270 + lib/getoptlong.rb | 610 ++ lib/gserver.rb | 253 + lib/ipaddr.rb | 813 +++ lib/irb.rb | 354 + lib/logger.rb | 732 ++ lib/mathn.rb | 206 + lib/matrix.rb | 1381 ++++ lib/minitest/autorun.rb | 9 + lib/minitest/mock.rb | 37 + lib/minitest/spec.rb | 89 + lib/minitest/unit.rb | 497 ++ lib/mkmf.rb | 1958 +++++ lib/monitor.rb | 265 + lib/mutex_m.rb | 91 + lib/net/ftp.rb | 981 +++ lib/net/http.rb | 2399 +++++++ lib/net/https.rb | 136 + lib/net/imap.rb | 3500 +++++++++ lib/net/pop.rb | 1000 +++ lib/net/protocol.rb | 382 + lib/net/smtp.rb | 1014 +++ lib/net/telnet.rb | 759 ++ lib/observer.rb | 193 + lib/open-uri.rb | 832 +++ lib/open3.rb | 98 + lib/optparse.rb | 1810 +++++ lib/optparse/date.rb | 17 + lib/optparse/shellwords.rb | 6 + lib/optparse/time.rb | 10 + lib/optparse/uri.rb | 6 + lib/optparse/version.rb | 70 + lib/ostruct.rb | 145 + lib/pathname.rb | 1099 +++ lib/pp.rb | 532 ++ lib/prettyprint.rb | 896 +++ lib/prime.rb | 471 ++ lib/profile.rb | 10 + lib/profiler.rb | 59 + lib/pstore.rb | 543 ++ lib/racc/parser.rb | 441 ++ lib/rake.rb | 2465 +++++++ lib/rake/classic_namespace.rb | 8 + lib/rake/clean.rb | 33 + lib/rake/gempackagetask.rb | 97 + lib/rake/loaders/makefile.rb | 35 + lib/rake/packagetask.rb | 185 + lib/rake/rake_test_loader.rb | 5 + lib/rake/rdoctask.rb | 147 + lib/rake/runtest.rb | 23 + lib/rake/tasklib.rb | 23 + lib/rake/testtask.rb | 161 + lib/rake/win32.rb | 34 + lib/rational.rb | 19 + lib/rbconfig/datadir.rb | 24 + lib/rdoc.rb | 395 ++ lib/resolv-replace.rb | 63 + lib/resolv.rb | 2262 ++++++ lib/rinda/rinda.rb | 283 + lib/rinda/ring.rb | 271 + lib/rinda/tuplespace.rb | 642 ++ lib/rss.rb | 19 + lib/rubygems.rb | 889 +++ lib/scanf.rb | 703 ++ lib/securerandom.rb | 182 + lib/set.rb | 1274 ++++ lib/shell.rb | 300 + lib/shell/builtin-command.rb | 160 + lib/shell/command-processor.rb | 593 ++ lib/shell/error.rb | 25 + lib/shell/filter.rb | 109 + lib/shell/process-controller.rb | 319 + lib/shell/system-command.rb | 159 + lib/shell/version.rb | 15 + lib/shellwords.rb | 156 + lib/singleton.rb | 313 + lib/sync.rb | 307 + lib/tempfile.rb | 218 + lib/test/unit.rb | 66 + lib/test/unit/assertions.rb | 122 + lib/test/unit/testcase.rb | 12 + lib/thread.rb | 367 + lib/thwait.rb | 168 + lib/time.rb | 869 +++ lib/timeout.rb | 108 + lib/tmpdir.rb | 138 + lib/tracer.rb | 166 + lib/tsort.rb | 290 + lib/ubygems.rb | 10 + lib/un.rb | 304 + lib/uri.rb | 29 + lib/uri/common.rb | 727 ++ lib/uri/ftp.rb | 198 + lib/uri/generic.rb | 1128 +++ lib/uri/http.rb | 100 + lib/uri/https.rb | 20 + lib/uri/ldap.rb | 190 + lib/uri/ldaps.rb | 12 + lib/uri/mailto.rb | 266 + lib/weakref.rb | 80 + lib/webrick.rb | 29 + lib/yaml.rb | 440 ++ lib/yaml/baseemitter.rb | 242 + lib/yaml/basenode.rb | 216 + lib/yaml/constants.rb | 45 + lib/yaml/dbm.rb | 111 + lib/yaml/encoding.rb | 33 + lib/yaml/error.rb | 34 + lib/yaml/loader.rb | 14 + lib/yaml/rubytypes.rb | 446 ++ lib/yaml/store.rb | 43 + lib/yaml/stream.rb | 40 + lib/yaml/stringio.rb | 83 + lib/yaml/syck.rb | 19 + lib/yaml/tag.rb | 91 + lib/yaml/types.rb | 192 + lib/yaml/yamlnode.rb | 54 + lib/yaml/ypath.rb | 52 + load.c | 725 ++ main.c | 37 + marshal.c | 1830 +++++ math.c | 746 ++ missing/acosh.c | 93 + missing/alloca.c | 194 + missing/cbrt.c | 10 + missing/crypt.c | 992 +++ missing/dup2.c | 60 + missing/erf.c | 88 + missing/file.h | 22 + missing/fileblocks.c | 1 + missing/finite.c | 7 + missing/flock.c | 129 + missing/hypot.c | 16 + missing/isinf.c | 69 + missing/isnan.c | 15 + missing/langinfo.c | 146 + missing/lgamma_r.c | 68 + missing/memcmp.c | 17 + missing/memmove.c | 21 + missing/os2.c | 138 + missing/strchr.c | 28 + missing/strerror.c | 16 + missing/strlcat.c | 73 + missing/strlcpy.c | 69 + missing/strstr.c | 25 + missing/strtol.c | 26 + missing/tgamma.c | 92 + missing/vsnprintf.c | 1217 ++++ mkconfig.rb | 195 + node.h | 516 ++ numeric.c | 3243 +++++++++ object.c | 2662 +++++++ pack.c | 2180 ++++++ parse.y | 10491 +++++++++++++++++++++++++++ prelude.rb | 35 + proc.c | 1951 +++++ process.c | 5243 ++++++++++++++ random.c | 560 ++ range.c | 964 +++ rational.c | 1668 +++++ re.c | 3434 +++++++++ regcomp.c | 6311 +++++++++++++++++ regenc.c | 931 +++ regenc.h | 207 + regerror.c | 387 + regexec.c | 3822 ++++++++++ regint.h | 842 +++ regparse.c | 5599 +++++++++++++++ regparse.h | 351 + regsyntax.c | 315 + ruby.c | 1858 +++++ rubytest.rb | 28 + runruby.rb | 79 + safe.c | 129 + signal.c | 1155 +++ sprintf.c | 1203 ++++ st.c | 975 +++ strftime.c | 1108 +++ string.c | 7275 +++++++++++++++++++ struct.c | 924 +++ template/encdb.h.tmpl | 79 + template/fake.rb.in | 15 + template/id.h.tmpl | 127 + template/insns.inc.tmpl | 20 + template/insns_info.inc.tmpl | 83 + template/known_errors.inc.tmpl | 14 + template/minsns.inc.tmpl | 14 + template/opt_sc.inc.tmpl | 32 + template/optinsn.inc.tmpl | 30 + template/optunifs.inc.tmpl | 35 + template/transdb.h.tmpl | 59 + template/vm.inc.tmpl | 29 + template/vmtc.inc.tmpl | 18 + template/yarvarch.en | 7 + template/yarvarch.ja | 454 ++ template/yasmdata.rb.tmpl | 20 + thread.c | 4015 +++++++++++ thread_pthread.c | 852 +++ thread_pthread.h | 24 + thread_win32.c | 573 ++ thread_win32.h | 33 + time.c | 2469 +++++++ tool/asm_parse.rb | 51 + tool/build-transcode | 16 + tool/compile_prelude.rb | 96 + tool/eval.rb | 161 + tool/file2lastrev.rb | 90 + tool/generic_erb.rb | 34 + tool/ifchange | 25 + tool/insns2vm.rb | 15 + tool/instruction.rb | 1385 ++++ tool/make-snapshot | 218 + tool/mdoc2man.rb | 465 ++ tool/node_name.rb | 4 + tool/parse.rb | 13 + tool/rmdirs | 11 + tool/transcode-tblgen.rb | 849 +++ tool/vtlh.rb | 15 + tool/ytab.sed | 30 + transcode.c | 4133 +++++++++++ transcode_data.h | 106 + util.c | 3830 ++++++++++ variable.c | 2027 ++++++ version.c | 52 + version.h | 55 + vm.c | 2029 ++++++ vm_core.h | 647 ++ vm_dump.c | 613 ++ vm_eval.c | 1481 ++++ vm_exec.c | 156 + vm_exec.h | 184 + vm_insnhelper.c | 1556 ++++ vm_insnhelper.h | 196 + vm_method.c | 1146 +++ vm_opts.h | 51 + 465 files changed, 265985 insertions(+) create mode 100644 .gitignore create mode 100644 COPYING create mode 100644 COPYING.ja create mode 100644 ChangeLog create mode 100644 GPL create mode 100644 KNOWNBUGS.rb create mode 100644 LEGAL create mode 100644 LGPL create mode 100644 Makefile.in create mode 100644 NEWS create mode 100644 README create mode 100644 README.EXT create mode 100644 README.EXT.ja create mode 100644 README.ja create mode 100644 ToDo create mode 100644 array.c create mode 100644 bignum.c create mode 100644 blockinlining.c create mode 100644 class.c create mode 100644 common.mk create mode 100644 compar.c create mode 100644 compile.c create mode 100644 complex.c create mode 100644 config.guess create mode 100644 config.sub create mode 100644 configure.in create mode 100644 cont.c create mode 100644 debug.c create mode 100644 debug.h create mode 100644 defs/keywords create mode 100644 defs/known_errors.def create mode 100644 defs/lex.c.src create mode 100644 defs/opt_insn_unif.def create mode 100644 defs/opt_operand.def create mode 100644 dir.c create mode 100644 dln.c create mode 100644 dln.h create mode 100644 dmydln.c create mode 100644 dmyencoding.c create mode 100644 dmyext.c create mode 100644 enc/Makefile.in create mode 100644 enc/ascii.c create mode 100644 enc/big5.c create mode 100644 enc/cp949.c create mode 100644 enc/depend create mode 100644 enc/emacs_mule.c create mode 100644 enc/encdb.c create mode 100644 enc/euc_jp.c create mode 100644 enc/euc_kr.c create mode 100644 enc/euc_tw.c create mode 100644 enc/gb18030.c create mode 100644 enc/gb2312.c create mode 100644 enc/gbk.c create mode 100644 enc/iso_2022_jp.h create mode 100644 enc/iso_8859_1.c create mode 100644 enc/iso_8859_10.c create mode 100644 enc/iso_8859_11.c create mode 100644 enc/iso_8859_13.c create mode 100644 enc/iso_8859_14.c create mode 100644 enc/iso_8859_15.c create mode 100644 enc/iso_8859_16.c create mode 100644 enc/iso_8859_2.c create mode 100644 enc/iso_8859_3.c create mode 100644 enc/iso_8859_4.c create mode 100644 enc/iso_8859_5.c create mode 100644 enc/iso_8859_6.c create mode 100644 enc/iso_8859_7.c create mode 100644 enc/iso_8859_8.c create mode 100644 enc/iso_8859_9.c create mode 100644 enc/koi8_r.c create mode 100644 enc/koi8_u.c create mode 100644 enc/make_encmake.rb create mode 100644 enc/prelude.rb create mode 100644 enc/shift_jis.c create mode 100644 enc/trans/newline.trans create mode 100644 enc/unicode.c create mode 100644 enc/us_ascii.c create mode 100644 enc/utf_16be.c create mode 100644 enc/utf_16le.c create mode 100644 enc/utf_32be.c create mode 100644 enc/utf_32le.c create mode 100644 enc/utf_7.h create mode 100644 enc/utf_8.c create mode 100644 enc/windows_1251.c create mode 100644 encoding.c create mode 100644 enum.c create mode 100644 enumerator.c create mode 100644 error.c create mode 100644 eval.c create mode 100644 eval_error.c create mode 100644 eval_intern.h create mode 100644 eval_jump.c create mode 100644 ext/Setup create mode 100644 ext/Setup.atheos create mode 100644 ext/Setup.emx create mode 100644 ext/Setup.nt create mode 100644 ext/continuation/continuation.c create mode 100644 ext/continuation/extconf.rb create mode 100644 ext/coverage/coverage.c create mode 100644 ext/coverage/extconf.rb create mode 100644 ext/curses/curses.c create mode 100644 ext/curses/depend create mode 100644 ext/curses/extconf.rb create mode 100644 ext/curses/hello.rb create mode 100644 ext/curses/mouse.rb create mode 100644 ext/curses/rain.rb create mode 100644 ext/curses/view.rb create mode 100644 ext/curses/view2.rb create mode 100644 ext/dbm/dbm.c create mode 100644 ext/dbm/depend create mode 100644 ext/dbm/extconf.rb create mode 100644 ext/etc/depend create mode 100644 ext/etc/etc.c create mode 100644 ext/etc/etc.txt create mode 100644 ext/etc/etc.txt.ja create mode 100644 ext/etc/extconf.rb create mode 100644 ext/extmk.rb create mode 100644 ext/fcntl/depend create mode 100644 ext/fcntl/extconf.rb create mode 100644 ext/fcntl/fcntl.c create mode 100644 ext/fiber/extconf.rb create mode 100644 ext/fiber/fiber.c create mode 100644 ext/gdbm/README create mode 100644 ext/gdbm/depend create mode 100644 ext/gdbm/extconf.rb create mode 100644 ext/gdbm/gdbm.c create mode 100644 ext/iconv/charset_alias.rb create mode 100644 ext/iconv/depend create mode 100644 ext/iconv/extconf.rb create mode 100644 ext/iconv/iconv.c create mode 100644 ext/iconv/mkwrapper.rb create mode 100644 ext/io/wait/extconf.rb create mode 100644 ext/io/wait/lib/nonblock.rb create mode 100644 ext/io/wait/wait.c create mode 100644 ext/mathn/complex/complex.c create mode 100644 ext/mathn/complex/extconf.rb create mode 100644 ext/mathn/rational/extconf.rb create mode 100644 ext/mathn/rational/rational.c create mode 100644 ext/pty/README create mode 100644 ext/pty/README.expect create mode 100644 ext/pty/README.expect.ja create mode 100644 ext/pty/README.ja create mode 100644 ext/pty/depend create mode 100644 ext/pty/expect_sample.rb create mode 100644 ext/pty/extconf.rb create mode 100644 ext/pty/lib/expect.rb create mode 100644 ext/pty/pty.c create mode 100644 ext/pty/script.rb create mode 100644 ext/pty/shl.rb create mode 100644 ext/purelib.rb create mode 100644 ext/racc/cparse/README create mode 100644 ext/racc/cparse/cparse.c create mode 100644 ext/racc/cparse/depend create mode 100644 ext/racc/cparse/extconf.rb create mode 100644 ext/readline/README create mode 100644 ext/readline/README.ja create mode 100644 ext/readline/depend create mode 100644 ext/readline/extconf.rb create mode 100644 ext/readline/readline.c create mode 100644 ext/sdbm/_sdbm.c create mode 100644 ext/sdbm/depend create mode 100644 ext/sdbm/extconf.rb create mode 100644 ext/sdbm/init.c create mode 100644 ext/sdbm/sdbm.h create mode 100644 ext/stringio/README create mode 100644 ext/stringio/depend create mode 100644 ext/stringio/extconf.rb create mode 100644 ext/stringio/stringio.c create mode 100644 ext/strscan/depend create mode 100644 ext/strscan/extconf.rb create mode 100644 ext/strscan/strscan.c create mode 100644 ext/syslog/depend create mode 100644 ext/syslog/extconf.rb create mode 100644 ext/syslog/syslog.c create mode 100644 ext/syslog/syslog.txt create mode 100644 ext/syslog/test.rb create mode 100644 file.c create mode 100644 gc.c create mode 100644 gc.h create mode 100644 gem_prelude.rb create mode 100644 golf_prelude.rb create mode 100644 goruby.c create mode 100644 hash.c create mode 100644 ia64.s create mode 100644 id.c create mode 100644 id.h create mode 100644 include/ruby.h create mode 100644 include/ruby/backward/rubyio.h create mode 100644 include/ruby/backward/rubysig.h create mode 100644 include/ruby/backward/st.h create mode 100644 include/ruby/backward/util.h create mode 100644 include/ruby/defines.h create mode 100644 include/ruby/encoding.h create mode 100644 include/ruby/intern.h create mode 100644 include/ruby/io.h create mode 100644 include/ruby/missing.h create mode 100644 include/ruby/oniguruma.h create mode 100644 include/ruby/re.h create mode 100644 include/ruby/regex.h create mode 100644 include/ruby/ruby.h create mode 100644 include/ruby/st.h create mode 100644 include/ruby/util.h create mode 100644 include/ruby/vm.h create mode 100644 include/ruby/win32.h create mode 100644 inits.c create mode 100644 insns.def create mode 100644 install-sh create mode 100755 instruby.rb create mode 100644 io.c create mode 100644 iseq.c create mode 100644 iseq.h create mode 100644 lex.c.blt create mode 100644 lib/English.rb create mode 100644 lib/README create mode 100644 lib/abbrev.rb create mode 100644 lib/base64.rb create mode 100644 lib/benchmark.rb create mode 100644 lib/cgi.rb create mode 100644 lib/cmath.rb create mode 100644 lib/complex.rb create mode 100644 lib/csv.rb create mode 100644 lib/date.rb create mode 100644 lib/date/format.rb create mode 100644 lib/debug.rb create mode 100644 lib/delegate.rb create mode 100644 lib/drb.rb create mode 100644 lib/e2mmap.rb create mode 100644 lib/erb.rb create mode 100644 lib/fileutils.rb create mode 100644 lib/find.rb create mode 100644 lib/forwardable.rb create mode 100644 lib/getoptlong.rb create mode 100644 lib/gserver.rb create mode 100644 lib/ipaddr.rb create mode 100644 lib/irb.rb create mode 100644 lib/logger.rb create mode 100644 lib/mathn.rb create mode 100644 lib/matrix.rb create mode 100644 lib/minitest/autorun.rb create mode 100644 lib/minitest/mock.rb create mode 100644 lib/minitest/spec.rb create mode 100644 lib/minitest/unit.rb create mode 100644 lib/mkmf.rb create mode 100644 lib/monitor.rb create mode 100644 lib/mutex_m.rb create mode 100644 lib/net/ftp.rb create mode 100644 lib/net/http.rb create mode 100644 lib/net/https.rb create mode 100644 lib/net/imap.rb create mode 100644 lib/net/pop.rb create mode 100644 lib/net/protocol.rb create mode 100644 lib/net/smtp.rb create mode 100644 lib/net/telnet.rb create mode 100644 lib/observer.rb create mode 100644 lib/open-uri.rb create mode 100644 lib/open3.rb create mode 100644 lib/optparse.rb create mode 100644 lib/optparse/date.rb create mode 100644 lib/optparse/shellwords.rb create mode 100644 lib/optparse/time.rb create mode 100644 lib/optparse/uri.rb create mode 100644 lib/optparse/version.rb create mode 100644 lib/ostruct.rb create mode 100644 lib/pathname.rb create mode 100644 lib/pp.rb create mode 100644 lib/prettyprint.rb create mode 100644 lib/prime.rb create mode 100644 lib/profile.rb create mode 100644 lib/profiler.rb create mode 100644 lib/pstore.rb create mode 100644 lib/racc/parser.rb create mode 100644 lib/rake.rb create mode 100644 lib/rake/classic_namespace.rb create mode 100644 lib/rake/clean.rb create mode 100644 lib/rake/gempackagetask.rb create mode 100644 lib/rake/loaders/makefile.rb create mode 100644 lib/rake/packagetask.rb create mode 100644 lib/rake/rake_test_loader.rb create mode 100644 lib/rake/rdoctask.rb create mode 100644 lib/rake/runtest.rb create mode 100644 lib/rake/tasklib.rb create mode 100644 lib/rake/testtask.rb create mode 100644 lib/rake/win32.rb create mode 100644 lib/rational.rb create mode 100644 lib/rbconfig/datadir.rb create mode 100644 lib/rdoc.rb create mode 100644 lib/resolv-replace.rb create mode 100644 lib/resolv.rb create mode 100644 lib/rinda/rinda.rb create mode 100644 lib/rinda/ring.rb create mode 100644 lib/rinda/tuplespace.rb create mode 100644 lib/rss.rb create mode 100644 lib/rubygems.rb create mode 100644 lib/scanf.rb create mode 100644 lib/securerandom.rb create mode 100644 lib/set.rb create mode 100644 lib/shell.rb create mode 100644 lib/shell/builtin-command.rb create mode 100644 lib/shell/command-processor.rb create mode 100644 lib/shell/error.rb create mode 100644 lib/shell/filter.rb create mode 100644 lib/shell/process-controller.rb create mode 100644 lib/shell/system-command.rb create mode 100644 lib/shell/version.rb create mode 100644 lib/shellwords.rb create mode 100644 lib/singleton.rb create mode 100644 lib/sync.rb create mode 100644 lib/tempfile.rb create mode 100644 lib/test/unit.rb create mode 100644 lib/test/unit/assertions.rb create mode 100644 lib/test/unit/testcase.rb create mode 100644 lib/thread.rb create mode 100644 lib/thwait.rb create mode 100644 lib/time.rb create mode 100644 lib/timeout.rb create mode 100644 lib/tmpdir.rb create mode 100644 lib/tracer.rb create mode 100644 lib/tsort.rb create mode 100644 lib/ubygems.rb create mode 100644 lib/un.rb create mode 100644 lib/uri.rb create mode 100644 lib/uri/common.rb create mode 100644 lib/uri/ftp.rb create mode 100644 lib/uri/generic.rb create mode 100644 lib/uri/http.rb create mode 100644 lib/uri/https.rb create mode 100644 lib/uri/ldap.rb create mode 100644 lib/uri/ldaps.rb create mode 100644 lib/uri/mailto.rb create mode 100644 lib/weakref.rb create mode 100644 lib/webrick.rb create mode 100644 lib/yaml.rb create mode 100644 lib/yaml/baseemitter.rb create mode 100644 lib/yaml/basenode.rb create mode 100644 lib/yaml/constants.rb create mode 100644 lib/yaml/dbm.rb create mode 100644 lib/yaml/encoding.rb create mode 100644 lib/yaml/error.rb create mode 100644 lib/yaml/loader.rb create mode 100644 lib/yaml/rubytypes.rb create mode 100644 lib/yaml/store.rb create mode 100644 lib/yaml/stream.rb create mode 100644 lib/yaml/stringio.rb create mode 100644 lib/yaml/syck.rb create mode 100644 lib/yaml/tag.rb create mode 100644 lib/yaml/types.rb create mode 100644 lib/yaml/yamlnode.rb create mode 100644 lib/yaml/ypath.rb create mode 100644 load.c create mode 100644 main.c create mode 100644 marshal.c create mode 100644 math.c create mode 100644 missing/acosh.c create mode 100644 missing/alloca.c create mode 100644 missing/cbrt.c create mode 100644 missing/crypt.c create mode 100644 missing/dup2.c create mode 100644 missing/erf.c create mode 100644 missing/file.h create mode 100644 missing/fileblocks.c create mode 100644 missing/finite.c create mode 100644 missing/flock.c create mode 100644 missing/hypot.c create mode 100644 missing/isinf.c create mode 100644 missing/isnan.c create mode 100644 missing/langinfo.c create mode 100644 missing/lgamma_r.c create mode 100644 missing/memcmp.c create mode 100644 missing/memmove.c create mode 100644 missing/os2.c create mode 100644 missing/strchr.c create mode 100644 missing/strerror.c create mode 100644 missing/strlcat.c create mode 100644 missing/strlcpy.c create mode 100644 missing/strstr.c create mode 100644 missing/strtol.c create mode 100644 missing/tgamma.c create mode 100644 missing/vsnprintf.c create mode 100755 mkconfig.rb create mode 100644 node.h create mode 100644 numeric.c create mode 100644 object.c create mode 100644 pack.c create mode 100644 parse.y create mode 100644 prelude.rb create mode 100644 proc.c create mode 100644 process.c create mode 100644 random.c create mode 100644 range.c create mode 100644 rational.c create mode 100644 re.c create mode 100644 regcomp.c create mode 100644 regenc.c create mode 100644 regenc.h create mode 100644 regerror.c create mode 100644 regexec.c create mode 100644 regint.h create mode 100644 regparse.c create mode 100644 regparse.h create mode 100644 regsyntax.c create mode 100644 ruby.c create mode 100755 rubytest.rb create mode 100755 runruby.rb create mode 100644 safe.c create mode 100644 signal.c create mode 100644 sprintf.c create mode 100644 st.c create mode 100644 strftime.c create mode 100644 string.c create mode 100644 struct.c create mode 100644 template/encdb.h.tmpl create mode 100644 template/fake.rb.in create mode 100644 template/id.h.tmpl create mode 100644 template/insns.inc.tmpl create mode 100644 template/insns_info.inc.tmpl create mode 100644 template/known_errors.inc.tmpl create mode 100644 template/minsns.inc.tmpl create mode 100644 template/opt_sc.inc.tmpl create mode 100644 template/optinsn.inc.tmpl create mode 100644 template/optunifs.inc.tmpl create mode 100644 template/transdb.h.tmpl create mode 100644 template/vm.inc.tmpl create mode 100644 template/vmtc.inc.tmpl create mode 100644 template/yarvarch.en create mode 100644 template/yarvarch.ja create mode 100644 template/yasmdata.rb.tmpl create mode 100644 thread.c create mode 100644 thread_pthread.c create mode 100644 thread_pthread.h create mode 100644 thread_win32.c create mode 100644 thread_win32.h create mode 100644 time.c create mode 100644 tool/asm_parse.rb create mode 100755 tool/build-transcode create mode 100644 tool/compile_prelude.rb create mode 100644 tool/eval.rb create mode 100644 tool/file2lastrev.rb create mode 100644 tool/generic_erb.rb create mode 100755 tool/ifchange create mode 100755 tool/insns2vm.rb create mode 100644 tool/instruction.rb create mode 100755 tool/make-snapshot create mode 100755 tool/mdoc2man.rb create mode 100755 tool/node_name.rb create mode 100644 tool/parse.rb create mode 100755 tool/rmdirs create mode 100644 tool/transcode-tblgen.rb create mode 100644 tool/vtlh.rb create mode 100755 tool/ytab.sed create mode 100644 transcode.c create mode 100644 transcode_data.h create mode 100644 util.c create mode 100644 variable.c create mode 100644 version.c create mode 100644 version.h create mode 100644 vm.c create mode 100644 vm_core.h create mode 100644 vm_dump.c create mode 100644 vm_eval.c create mode 100644 vm_exec.c create mode 100644 vm_exec.h create mode 100644 vm_insnhelper.c create mode 100644 vm_insnhelper.h create mode 100644 vm_method.c create mode 100644 vm_opts.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bb7ad5d --- /dev/null +++ b/.gitignore @@ -0,0 +1,404 @@ +# / +*.bak +*.dylib +*.inc +*.orig +*.rej +*.sav +*.swp +*~ +.*-* +.*.list +.*.time +.DS_Store +.ccmalloc +.ext +.pc +.ppack +.svn +/.git +/*_prelude.c +/COPYING.LIB +/ChangeLog-1.8.0 +/ChangeLog.pre-alpha +/ChangeLog.pre1_1 +/Doxyfile +/GNUmakefile +/Makefile +/README.atheos +/README.fat-patch +/README.v6 +/TAGS +/archive +/autom4te*.cache +/automake +/beos +/breakpoints.gdb +/config.cache +/config.h +/config.h.in +/config.log +/config.status +/config.status.lineno +/configure +/enc.mk +/enc/trans/*.c +/encdb.h +/ext/win32ole/.document +/largefile.h +/lex.c +/libruby*.* +/miniprelude.c +/miniruby +/newdate.rb +/newline.c +/newver.rb +/parse.c +/parse.h +/patches +/patches-master +/pitest.rb +/ppack +/prelude.c +/preview +/rbconfig.rb +/rename2.h +/repack +/revision.h +/riscos +/rubicon +/ruby +/ruby-man.rd.gz +/test.rb +/tmp +/transdb.h +/uncommon.mk +/web +/y.output +/y.tab.c +/yasmdata.rb + +# /benchmark/ +/benchmark/bmx_*.rb + +# /enc/trans/ +/enc/trans/*.c + +# /ext/ +/ext/extinit.c +/ext/*.log + +# /ext/bigdecimal/ +/ext/bigdecimal/*.def +/ext/bigdecimal/Makefile +/ext/bigdecimal/bigdecimal.a +/ext/bigdecimal/extconf.h +/ext/bigdecimal/mkmf.log +/ext/bigdecimal/conftest.dSYM + +# /ext/continuation/ +/ext/continuation/extconf.h +/ext/continuation/Makefile + +# /ext/coverage/ +/ext/coverage/extconf.h +/ext/coverage/Makefile + +# /ext/curses/ +/ext/curses/*.def +/ext/curses/Makefile +/ext/curses/curses.a +/ext/curses/extconf.h +/ext/curses/mkmf.log +/ext/curses/conftest.dSYM + +# /ext/dbm/ +/ext/dbm/Makefile +/ext/dbm/mkmf.log +/ext/dbm/*.def +/ext/dbm/extconf.h +/ext/dbm/conftest.dSYM + +# /ext/digest/ +/ext/digest/*.def +/ext/digest/Makefile +/ext/digest/digest.a +/ext/digest/extconf.h +/ext/digest/mkmf.log + +# /ext/digest/bubblebabble/ +/ext/digest/bubblebabble/Makefile +/ext/digest/bubblebabble/mkmf.log +/ext/digest/bubblebabble/*.def +/ext/digest/bubblebabble/extconf.h + +# /ext/digest/md5/ +/ext/digest/md5/*.def +/ext/digest/md5/Makefile +/ext/digest/md5/extconf.h +/ext/digest/md5/md5.a +/ext/digest/md5/mkmf.log +/ext/digest/md5/conftest.dSYM + +# /ext/digest/rmd160/ +/ext/digest/rmd160/*.def +/ext/digest/rmd160/Makefile +/ext/digest/rmd160/extconf.h +/ext/digest/rmd160/mkmf.log +/ext/digest/rmd160/rmd160.a +/ext/digest/rmd160/conftest.dSYM + +# /ext/digest/sha1/ +/ext/digest/sha1/*.def +/ext/digest/sha1/Makefile +/ext/digest/sha1/extconf.h +/ext/digest/sha1/mkmf.log +/ext/digest/sha1/sha1.a +/ext/digest/sha1/conftest.dSYM + +# /ext/digest/sha2/ +/ext/digest/sha2/*.def +/ext/digest/sha2/Makefile +/ext/digest/sha2/extconf.h +/ext/digest/sha2/mkmf.log +/ext/digest/sha2/sha2.a + +# /ext/dl/ +/ext/dl/*.def +/ext/dl/*.func +/ext/dl/*.o +/ext/dl/*~ +/ext/dl/Makefile +/ext/dl/dl.a +/ext/dl/extconf.h +/ext/dl/mkmf.log +/ext/dl/conftest.dSYM + +# /ext/dl/callback/ +/ext/dl/callback/*.def +/ext/dl/callback/*.func +/ext/dl/callback/*.o +/ext/dl/callback/*~ +/ext/dl/callback/Makefile +/ext/dl/callback/callback-*.c +/ext/dl/callback/callback.a +/ext/dl/callback/callback.c +/ext/dl/callback/extconf.h +/ext/dl/callback/mkmf.log +/ext/dl/callback/conftest.dSYM + +# /ext/dl/win32/ +/ext/dl/win32/Makefile + +# /ext/etc/ +/ext/etc/*.def +/ext/etc/Makefile +/ext/etc/etc.a +/ext/etc/extconf.h +/ext/etc/mkmf.log +/ext/etc/conftest.dSYM + +# /ext/fcntl/ +/ext/fcntl/*.def +/ext/fcntl/Makefile +/ext/fcntl/extconf.h +/ext/fcntl/fcntl.a +/ext/fcntl/mkmf.log + +# /ext/fiber/ +/ext/fiber/extconf.h +/ext/fiber/Makefile + +# /ext/gdbm/ +/ext/gdbm/Makefile +/ext/gdbm/mkmf.log +/ext/gdbm/*.def +/ext/gdbm/extconf.h +/ext/gdbm/conftest.dSYM + +# /ext/iconv/ +/ext/iconv/*.def +/ext/iconv/Makefile +/ext/iconv/config.charset +/ext/iconv/extconf.h +/ext/iconv/iconv.a +/ext/iconv/iconv.rb +/ext/iconv/mkmf.log +/ext/iconv/conftest.dSYM + +# /ext/io/wait/ +/ext/io/wait/Makefile +/ext/io/wait/extconf.h +/ext/io/wait/mkmf.log +/ext/io/wait/wait.a + +# /ext/json/ +/ext/json/Makefile +/ext/json/extconf.h + +# /ext/json/ext/generator/ +/ext/json/ext/generator/*.def +/ext/json/ext/generator/Makefile +/ext/json/ext/generator/extconf.h +/ext/json/ext/generator/mkmf.log +/ext/json/ext/generator/generator.a + +# /ext/json/ext/parser/ +/ext/json/ext/parser/*.def +/ext/json/ext/parser/Makefile +/ext/json/ext/parser/extconf.h +/ext/json/ext/parser/mkmf.log +/ext/json/ext/parser/parser.a + +# /ext/mathn/complex/ +/ext/mathn/complex/Makefile +/ext/mathn/complex/extconf.h + +# /ext/mathn/rational/ +/ext/mathn/rational/Makefile +/ext/mathn/rational/extconf.h + +# /ext/nkf/ +/ext/nkf/*.def +/ext/nkf/Makefile +/ext/nkf/extconf.h +/ext/nkf/mkmf.log +/ext/nkf/nkf.a + +# /ext/objspace/ +/ext/objspace/Makefile +/ext/objspace/extconf.h + +# /ext/openssl/ +/ext/openssl/GNUmakefile +/ext/openssl/Makefile +/ext/openssl/dep +/ext/openssl/extconf.h +/ext/openssl/mkmf.log +/ext/openssl/openssl.a +/ext/openssl/conftest.dSYM + +# /ext/pty/ +/ext/pty/*.def +/ext/pty/Makefile +/ext/pty/extconf.h +/ext/pty/mkmf.log +/ext/pty/pty.a +/ext/pty/conftest.dSYM + +# /ext/racc/cparse/ +/ext/racc/cparse/*.def +/ext/racc/cparse/Makefile +/ext/racc/cparse/cparse.a +/ext/racc/cparse/extconf.h +/ext/racc/cparse/mkmf.log +/ext/racc/cparse/conftest.dSYM + +# /ext/readline/ +/ext/readline/*.def +/ext/readline/Makefile +/ext/readline/extconf.h +/ext/readline/mkmf.log +/ext/readline/readline.a +/ext/readline/conftest.dSYM + +# /ext/ripper/ +/ext/ripper/Makefile +/ext/ripper/mkmf.log +/ext/ripper/eventids1.c +/ext/ripper/eventids2table.c +/ext/ripper/ripper.* +/ext/ripper/ids1 +/ext/ripper/ids2 +/ext/ripper/extconf.h +/ext/ripper/y.output + +# /ext/sdbm/ +/ext/sdbm/*.def +/ext/sdbm/Makefile +/ext/sdbm/extconf.h +/ext/sdbm/mkmf.log +/ext/sdbm/sdbm.a + +# /ext/socket/ +/ext/socket/*.def +/ext/socket/Makefile +/ext/socket/constants.h +/ext/socket/constdefs.h +/ext/socket/constdefs.c +/ext/socket/extconf.h +/ext/socket/mkmf.log +/ext/socket/socket.a +/ext/socket/conftest.dSYM + +# /ext/stringio/ +/ext/stringio/*.def +/ext/stringio/Makefile +/ext/stringio/extconf.h +/ext/stringio/mkmf.log +/ext/stringio/stringio.a + +# /ext/strscan/ +/ext/strscan/*.def +/ext/strscan/Makefile +/ext/strscan/extconf.h +/ext/strscan/mkmf.log +/ext/strscan/strscan.a + +# /ext/syck/ +/ext/syck/*.def +/ext/syck/Makefile +/ext/syck/extconf.h +/ext/syck/mkmf.log +/ext/syck/syck.a + +# /ext/syslog/ +/ext/syslog/*.def +/ext/syslog/Makefile +/ext/syslog/extconf.h +/ext/syslog/mkmf.log +/ext/syslog/syslog.a +/ext/syslog/conftest.dSYM + +# /ext/tk/ +/ext/tk/Makefile +/ext/tk/*.log +/ext/tk/*.def +/ext/tk/conftest.dSYM +/ext/tk/extconf.h +/ext/tk/config_list + +# /ext/tk/tkutil/ +/ext/tk/tkutil/Makefile +/ext/tk/tkutil/*.log +/ext/tk/tkutil/*.def +/ext/tk/tkutil/conftest.dSYM +/ext/tk/tkutil/extconf.h + +# /ext/win32ole/ +/ext/win32ole/Makefile +/ext/win32ole/mkmf.log +/ext/win32ole/*.log +/ext/win32ole/.document + +# /ext/zlib/ +/ext/zlib/*.def +/ext/zlib/Makefile +/ext/zlib/extconf.h +/ext/zlib/mkmf.log +/ext/zlib/zlib.a +/ext/zlib/conftest.dSYM + +# /lib/rexml/ + +# /spec/ +/spec/mspec +/spec/rubyspec + +# /win32/ +/win32/*.ico +.cvsignore +.document + +*.log +Makefile diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..9043404 --- /dev/null +++ b/COPYING @@ -0,0 +1,56 @@ +Ruby is copyrighted free software by Yukihiro Matsumoto . +You can redistribute it and/or modify it under either the terms of the GPL +version 2 (see the file GPL), or the conditions below: + + 1. You may make and give away verbatim copies of the source form of the + software without restriction, provided that you duplicate all of the + original copyright notices and associated disclaimers. + + 2. You may modify your copy of the software in any way, provided that + you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise + make them Freely Available, such as by posting said + modifications to Usenet or an equivalent medium, or by allowing + the author to include your modifications in the software. + + b) use the modified software only within your corporation or + organization. + + c) give non-standard binaries non-standard names, with + instructions on where to get the original software distribution. + + d) make other distribution arrangements with the author. + + 3. You may distribute the software in object code or binary form, + provided that you do at least ONE of the following: + + a) distribute the binaries and library files of the software, + together with instructions (in the manual page or equivalent) + on where to get the original distribution. + + b) accompany the distribution with the machine-readable source of + the software. + + c) give non-standard binaries non-standard names, with + instructions on where to get the original software distribution. + + d) make other distribution arrangements with the author. + + 4. You may modify and include the part of the software into any other + software (possibly commercial). But some files in the distribution + are not written by the author, so that they are not under these terms. + + For the list of those files and their copying conditions, see the + file LEGAL. + + 5. The scripts and library files supplied as input to or produced as + output from the software do not automatically fall under the + copyright of the software, but belong to whomever generated them, + and may be sold commercially, and may be aggregated with this + software. + + 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE. diff --git a/COPYING.ja b/COPYING.ja new file mode 100644 index 0000000..aa2a163 --- /dev/null +++ b/COPYING.ja @@ -0,0 +1,51 @@ +本プログラムはフリーソフトウェアです.GPL (the GNU General +Public License)バージョン2または以下に示す条件で本プログラム +を再配布できます.GPLについてはGPLファイルを参照して下さい. + + 1. 複製は制限なく自由です. + + 2. 以下の条件のいずれかを満たす時に本プログラムのソースを + 自由に変更できます. + + (a) ネットニューズにポストしたり,作者に変更を送付する + などの方法で,変更を公開する. + + (b) 変更した本プログラムを自分の所属する組織内部だけで + 使う. + + (c) 変更点を明示したうえ,ソフトウェアの名前を変更する. + そのソフトウェアを配布する時には変更前の本プログラ + ムも同時に配布する.または変更前の本プログラムのソー + スの入手法を明示する. + + (d) その他の変更条件を作者と合意する. + + 3. 以下の条件のいずれかを満たす時に本プログラムをコンパイ + ルしたオブジェクトコードや実行形式でも配布できます. + + (a) バイナリを受け取った人がソースを入手できるように, + ソースの入手法を明示する. + + (b) 機械可読なソースコードを添付する. + + (c) 変更を行ったバイナリは名前を変更したうえ,オリジナ + ルのソースコードの入手法を明示する. + + (d) その他の配布条件を作者と合意する. + + 4. 他のプログラムへの引用はいかなる目的であれ自由です.た + だし,本プログラムに含まれる他の作者によるコードは,そ + れぞれの作者の意向による制限が加えられる場合があります. + + それらファイルの一覧とそれぞれの配布条件などに付いては + LEGALファイルを参照してください. + + 5. 本プログラムへの入力となるスクリプトおよび,本プログラ + ムからの出力の権利は本プログラムの作者ではなく,それぞ + れの入出力を生成した人に属します.また,本プログラムに + 組み込まれるための拡張ライブラリについても同様です. + + 6. 本プログラムは無保証です.作者は本プログラムをサポート + する意志はありますが,プログラム自身のバグあるいは本プ + ログラムの実行などから発生するいかなる損害に対しても責 + 任を持ちません. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..bd93a52 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,20 @@ +Sat Dec 5 18:52:56 2009 Yuki Sonoda (Yugui) + + * string.c (rb_str_justify): CVE-2009-4124. + Fixes a bug reported by + Emmanouel Kellinis , KPMG London; + Patch by nobu. + +For ChangeLog entries see the mainline ruby + +Local variables: +add-log-time-format: (lambda () + (let* ((time (current-time)) + (system-time-locale "C") + (diff (+ (cadr time) 32400)) + (lo (% diff 65536)) + (hi (+ (car time) (/ diff 65536)))) + (format-time-string "%a %b %e %H:%M:%S %Y" (list hi lo) t))) +indent-tabs-mode: t +tab-width: 8 +end: diff --git a/GPL b/GPL new file mode 100644 index 0000000..5b6e7c6 --- /dev/null +++ b/GPL @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/KNOWNBUGS.rb b/KNOWNBUGS.rb new file mode 100644 index 0000000..39dc6a9 --- /dev/null +++ b/KNOWNBUGS.rb @@ -0,0 +1,4 @@ +# +# This test file concludes tests which point out known bugs. +# So all tests will cause failure. +# diff --git a/LEGAL b/LEGAL new file mode 100644 index 0000000..7a7e825 --- /dev/null +++ b/LEGAL @@ -0,0 +1,404 @@ +LEGAL NOTICE INFORMATION +------------------------ + +All the files in this distribution are covered under either the Ruby's +license (see the file COPYING) or public-domain except some files +mentioned below. + +include/ruby/oniguruma.h: +regcomp.c: +regenc.[ch]: +regerror.c: +regexec.c: +regint.h: +regparse.[ch]: +enc/ascii.c +enc/big5.c +enc/cp949.c +enc/emacs_mule.c +enc/encdb.c +enc/euc_jp.c +enc/euc_kr.c +enc/euc_tw.c +enc/gb18030.c +enc/gb2312.c +enc/gbk.c +enc/iso_8859_1.c +enc/iso_8859_10.c +enc/iso_8859_11.c +enc/iso_8859_13.c +enc/iso_8859_14.c +enc/iso_8859_15.c +enc/iso_8859_16.c +enc/iso_8859_2.c +enc/iso_8859_3.c +enc/iso_8859_4.c +enc/iso_8859_5.c +enc/iso_8859_6.c +enc/iso_8859_7.c +enc/iso_8859_8.c +enc/iso_8859_9.c +enc/koi8_r.c +enc/koi8_u.c +enc/shift_jis.c +enc/unicode.c +enc/us_ascii.c +enc/utf_16be.c +enc/utf_16le.c +enc/utf_32be.c +enc/utf_32le.c +enc/utf_8.c +enc/windows_1251.c + +Oniguruma ---- (C) K.Kosako + +http://www.geocities.jp/kosako3/oniguruma/ +http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ +http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ + + When this software is partly used or it is distributed with Ruby, + this of Ruby follows the license of Ruby. + +configure: + + This file is free software. + + Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. + + This configure script is free software; the Free Software Foundation + gives unlimited permission to copy, distribute and modify it. + +config.guess: +config.sub: +parse.c: + + As long as you distribute these files with the file configure, they + are covered under the Ruby's license. + + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999 + Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + As a special exception to the GNU General Public License, if you + distribute this file as part of a program that contains a + configuration script generated by Autoconf, you may include it under + the same distribution terms that you use for the rest of that program. + +util.c (partly): + + Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + + Permission to use, copy, modify, and distribute this software for any + purpose without fee is hereby granted, provided that this entire notice + is included in all copies of any software which is or includes a copy + or modification of this software and in all copies of the supporting + documentation for such software. + + THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + +util.c (partly): +win32/win32.[ch]: + + You can apply the Artistic License to these files. (or GPL, + alternatively) + + Copyright (c) 1993, Intergraph Corporation + + You may distribute under the terms of either the GNU General Public + License or the Artistic License, as specified in the perl README file. + +random.c + + This file is under the new-style BSD license. + + A C-program for MT19937, with initialization improved 2002/2/10. + Coded by Takuji Nishimura and Makoto Matsumoto. + This is a faster version by taking Shawn Cokus's optimization, + Matthe Bellew's simplification, Isaku Wada's real version. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp + +st.[ch]: +missing/alloca.c: +missing/dup2.c: +missing/finite.c: +missing/hypot.c: +missing/isinf.c: +missing/isnan.c: +missing/memcmp.c: +missing/memmove.c: +missing/strchr.c: +missing/strstr.c: +missing/strtol.c: +ext/digest/sha1/sha1.[ch]: + + These files are all under public domain. + +missing/erf.c: +missing/tgamma.c: +missing/lgamma_r.c: +missing/crypt.c: +missing/vsnprintf.c: + + This file is under the old-style BSD license. Note that the + paragraph 3 below is now null and void. + + Copyright (c) 1990, 1993 + The Regents of the University of California. All rights reserved. + + This code is derived from software contributed to Berkeley by + Chris Torek. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + IMPORTANT NOTE: + -------------- + From ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change + paragraph 3 above is now null and void. + +missing/strlcat.c +missing/strlcpy.c + + These files are under the new-style BSD license. + + Copyright (c) 1998 Todd C. Miller + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +missing/langinfo.c + + This file is from http://www.cl.cam.ac.uk/~mgk25/ucs/langinfo.c. + Ruby uses a modified version. The file contains the following + author/copyright notice: + + Markus.Kuhn@cl.cam.ac.uk -- 2002-03-11 + Permission to use, copy, modify, and distribute this software + for any purpose and without fee is hereby granted. The author + disclaims all warranties with regard to this software. + +ext/digest/md5/md5.[ch]: + + These files are under the following license. Ruby uses modified + versions of them. + + Copyright (C) 1999, 2000 Aladdin Enterprises. All rights reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + L. Peter Deutsch + ghost@aladdin.com + +ext/digest/rmd160/rmd160.[ch]: + + These files have the following copyright information, and by the + author we are allowed to use it under the new-style BSD license. + + AUTHOR: Antoon Bosselaers, ESAT-COSIC + (Arranged for libc by Todd C. Miller) + DATE: 1 March 1996 + + Copyright (c) Katholieke Universiteit Leuven + 1996, All Rights Reserved + +ext/digest/sha2/sha2.[ch]: + + These files are under the new-style BSD license. + + Copyright 2000 Aaron D. Gifford. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +ext/nkf/nkf-utf8/config.h: +ext/nkf/nkf-utf8/nkf.c: +ext/nkf/nkf-utf8/utf8tbl.c: + + These files are under the following license. So to speak, it is + copyrighted semi-public-domain software. + + Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA) + Everyone is permitted to do anything on this program + including copying, modifying, improving, + as long as you don't try to pretend that you wrote it. + i.e., the above copyright notice has to appear in all copies. + Binary distribution requires original version messages. + You don't have to ask before copying, redistribution or publishing. + THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. + +ext/socket/addrinfo.h: +ext/socket/getaddrinfo.c: +ext/socket/getnameinfo.c: + + These files are under the new-style BSD license. + + Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the project nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +ext/win32ole/win32ole.c: + + You can apply the Artistic License to this file. (or GPL, + alternatively) + + (c) 1995 Microsoft Corporation. All rights reserved. + Developed by ActiveWare Internet Corp., http://www.ActiveWare.com + + Other modifications Copyright (c) 1997, 1998 by Gurusamy Sarathy + and Jan Dubois + + You may distribute under the terms of either the GNU General Public + License or the Artistic License, as specified in the README file + of the Perl distribution. diff --git a/LGPL b/LGPL new file mode 100644 index 0000000..b1e3f5a --- /dev/null +++ b/LGPL @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..c0a9fe5 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,289 @@ +SHELL = /bin/sh +NULLCMD = : +RUNCMD = $(SHELL) +exec = exec + +#### Start of system configuration section. #### + +srcdir = @srcdir@ +top_srcdir = $(srcdir) +hdrdir = $(srcdir)/include + +CC = @CC@ +YACC = bison +PURIFY = +AUTOCONF = autoconf +@SET_MAKE@ +MKFILES = @MAKEFILES@ +BASERUBY = @BASERUBY@ + +prefix = @prefix@ +exec_prefix = @exec_prefix@ +bindir = @bindir@ +sbindir = @sbindir@ +libdir = @libdir@ +libexecdir = @libexecdir@ +datarootdir = @datarootdir@ +datadir = @datadir@ +arch = @arch@ +sitearch = @sitearch@ +sitedir = @sitedir@ +ruby_version = @ruby_version@ + +TESTUI = console +TESTS = +RDOCTARGET = @RDOCTARGET@ + +EXTOUT = @EXTOUT@ +RIDATADIR = $(DESTDIR)$(datadir)/ri/$(MAJOR).$(MINOR)/system +arch_hdrdir = $(EXTOUT)/include/$(arch) +VPATH = $(arch_hdrdir)/ruby:$(hdrdir)/ruby:$(srcdir):$(srcdir)/enc:$(srcdir)/missing + +empty = +OUTFLAG = @OUTFLAG@$(empty) +COUTFLAG = @COUTFLAG@$(empty) +CFLAGS = @CFLAGS@ @ARCH_FLAG@ +cflags = @cflags@ +optflags = @optflags@ +debugflags = @debugflags@ +warnflags = @warnflags@ +XCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir) -I$(srcdir) @XCFLAGS@ +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @STATIC@ $(CFLAGS) @LDFLAGS@ +EXTLDFLAGS = +XLDFLAGS = @XLDFLAGS@ $(EXTLDFLAGS) +EXTLIBS = +LIBS = @LIBS@ $(EXTLIBS) +MISSING = @LIBOBJS@ @ALLOCA@ +LDSHARED = @LIBRUBY_LDSHARED@ +DLDFLAGS = @LIBRUBY_DLDFLAGS@ $(EXTLDFLAGS) @ARCH_FLAG@ +SOLIBS = @SOLIBS@ +MAINLIBS = @MAINLIBS@ +ARCHMINIOBJS = @MINIOBJS@ +BUILTIN_ENCOBJS = @BUILTIN_ENCOBJS@ +BUILTIN_TRANSSRCS = @BUILTIN_TRANSSRCS@ +BUILTIN_TRANSOBJS = @BUILTIN_TRANSOBJS@ + +RUBY_INSTALL_NAME=@RUBY_INSTALL_NAME@ +RUBY_SO_NAME=@RUBY_SO_NAME@ +EXEEXT = @EXEEXT@ +PROGRAM=$(RUBY_INSTALL_NAME)$(EXEEXT) +RUBY = $(RUBY_INSTALL_NAME) +MINIRUBY = @MINIRUBY@ $(MINIRUBYOPT) +RUNRUBY = @RUNRUBY@ $(RUNRUBYOPT) -- + +#### End of system configuration section. #### + +MAJOR= @MAJOR@ +MINOR= @MINOR@ +TEENY= @TEENY@ + +LIBRUBY_A = @LIBRUBY_A@ +LIBRUBY_SO = @LIBRUBY_SO@ +LIBRUBY_ALIASES= @LIBRUBY_ALIASES@ +LIBRUBY = @LIBRUBY@ +LIBRUBYARG = @LIBRUBYARG@ +LIBRUBYARG_STATIC = @LIBRUBYARG_STATIC@ +LIBRUBYARG_SHARED = @LIBRUBYARG_SHARED@ + +THREAD_MODEL = @THREAD_MODEL@ + +PREP = @PREP@ +ARCHFILE = @ARCHFILE@ +SETUP = +EXTSTATIC = @EXTSTATIC@ +SET_LC_MESSAGES = env LC_MESSAGES=C + +MAKEDIRS = @MAKEDIRS@ +CP = cp +MV = mv +RM = rm -f +RMDIRS = @RMDIRS@ +RMALL = @RMALL@ +NM = @NM@ +AR = @AR@ +ARFLAGS = rcu +RANLIB = @RANLIB@ +AS = @AS@ +ASFLAGS = @ASFLAGS@ +IFCHANGE = $(srcdir)/tool/ifchange +SET_LC_MESSAGES = env LC_MESSAGES=C +OBJDUMP = @OBJDUMP@ +OBJCOPY = @OBJCOPY@ +VCS = @VCS@ +VCSUP = @VCSUP@ + +OBJEXT = @OBJEXT@ +ASMEXT = S +DLEXT = @DLEXT@ +MANTYPE = @MANTYPE@ + +INSTALLED_LIST= .installed.list + +MKMAIN_CMD = mkmain.sh + +SRC_FILE = $< +#### End of variables + +all: + +.DEFAULT: all + +# Prevent GNU make v3 from overflowing arg limit on SysV. +.NOEXPORT: + +miniruby$(EXEEXT): + @-if test -f $@; then mv -f $@ $@.old; $(RM) $@.old; fi + $(PURIFY) $(CC) $(LDFLAGS) $(XLDFLAGS) $(MAINLIBS) $(MAINOBJ) $(MINIOBJS) $(COMMONOBJS) $(DMYEXT) $(LIBS) $(OUTFLAG)$@ + +$(PROGRAM): + @$(RM) $@ + $(PURIFY) $(CC) $(LDFLAGS) $(XLDFLAGS) $(MAINLIBS) $(MAINOBJ) $(EXTOBJS) $(LIBRUBYARG) $(LIBS) $(OUTFLAG)$@ + +# We must `rm' the library each time this rule is invoked because "updating" a +# MAB library on Apple/NeXT (see --enable-fat-binary in configure) is not +# supported. +$(LIBRUBY_A): + @$(RM) $@ + $(AR) $(ARFLAGS) $@ $(OBJS) $(DMYEXT) + @-$(RANLIB) $@ 2> /dev/null || true + +$(LIBRUBY_SO): + @-$(PRE_LIBRUBY_UPDATE) + $(LDSHARED) $(DLDFLAGS) $(OBJS) $(DLDOBJS) $(SOLIBS) $(OUTFLAG)$@ + -$(OBJCOPY) -w -L '@EXPORT_PREFIX@Init_*' $@ + @-$(MINIRUBY) -e 'ARGV.each{|link| File.delete link if File.exist? link; \ + File.symlink "$(LIBRUBY_SO)", link}' \ + $(LIBRUBY_ALIASES) || true + +fake: $(arch)-fake.rb +$(arch)-fake.rb: config.status + @./config.status --file=$@:$(srcdir)/template/fake.rb.in + +Makefile: $(srcdir)/Makefile.in $(srcdir)/enc/Makefile.in + +$(MKFILES): config.status + MAKE=$(MAKE) $(SHELL) ./config.status + @{ \ + echo "all:; -@rm -f conftest.mk"; \ + echo "conftest.mk: .force; @echo AUTO_REMAKE"; \ + echo ".force:"; \ + } > conftest.mk || exit 1; \ + $(MAKE) -f conftest.mk | grep '^AUTO_REMAKE$$' >/dev/null 2>&1 || \ + { echo "Makefile updated, restart."; exit 1; } + +uncommon.mk: $(srcdir)/common.mk + sed 's/{\$$([^(){}]*)[^{}]*}//g' $< > $@ + +config.status: $(srcdir)/configure $(srcdir)/enc/Makefile.in + MINIRUBY="$(MINIRUBY)" $(SHELL) ./config.status --recheck + +$(srcdir)/configure: $(srcdir)/configure.in + cd $(srcdir) && $(AUTOCONF) + +# Things which should be considered: +# * with gperf v.s. without gperf +# * ./configure v.s. ../ruby/configure +# * GNU make v.s. HP-UX make # HP-UX make invokes the action if lex.c and keywords has same mtime. +# * svn checkout generate a file with mtime as current time +# * XFS has a mtime with fractional part +lex.c: defs/keywords + @\ + if cmp -s $(srcdir)/defs/lex.c.src $?; then \ + set -x; \ + cp $(srcdir)/lex.c.blt $@; \ + else \ + set -x; \ + gperf -C -p -j1 -i 1 -g -o -t -N rb_reserved_word -k1,3,$$ $? > $@.tmp && \ + mv $@.tmp $@ && \ + cp $? $(srcdir)/defs/lex.c.src && \ + cp $@ $(srcdir)/lex.c.blt; \ + fi + +.c.@OBJEXT@: + $(CC) $(CFLAGS) $(XCFLAGS) $(CPPFLAGS) $(COUTFLAG)$@ -c $< + +.s.@OBJEXT@: + $(AS) $(ASFLAGS) -o $@ $< + +.c.S: + $(CC) $(CFLAGS) $(XCFLAGS) $(CPPFLAGS) $(COUTFLAG)$@ -S $< + +clean-local:: + @$(RM) ext/extinit.c ext/extinit.$(OBJEXT) ext/ripper/y.output + +distclean-local:: + @$(RM) ext/config.cache $(RBCONFIG) + @-$(RM) run.gdb + @-$(RM) $(INSTALLED_LIST) $(arch_hdrdir)/ruby/config.h + @-$(RMDIRS) $(arch_hdrdir)/ruby + +distclean-rdoc: + @$(RMALL) $(RDOCOUT:/=\) + +clean-ext distclean-ext realclean-ext:: + @set dummy ${EXTS}; shift; \ + if test "$$#" = 0; then \ + set dummy `find ext -name Makefile | sed 's:^ext/::;s:/Makefile$$::' | sort`; \ + shift; \ + fi; \ + for dir; do \ + [ -f "ext/$$dir/Makefile" ] || continue; \ + echo $(@:-ext=)ing "$$dir"; \ + (cd "ext/$$dir" && exec $(MAKE) $(MFLAGS) $(@:-ext=)) && \ + case "$@" in \ + *distclean-ext*|*realclean-ext*) \ + $(RMDIRS) "ext/$$dir";; \ + esac; \ + done + +distclean-ext realclean-ext:: + @-rmdir ext 2> /dev/null || true + +ext/extinit.$(OBJEXT): ext/extinit.c $(SETUP) + $(CC) $(CFLAGS) $(XCFLAGS) $(CPPFLAGS) $(COUTFLAG)$@ -c ext/extinit.c + +up:: + @LC_TIME=C cd "$(srcdir)" && $(VCSUP) + +update-rubyspec: + @if [ -d $(srcdir)/spec/mspec ]; then \ + cd $(srcdir)/spec/mspec; \ + echo updating mspec ...; \ + git pull; \ + cd ../..; \ + else \ + echo retrieving mspec ...; \ + git clone $(MSPEC_GIT_URL) $(srcdir)/spec/mspec; \ + fi + @if [ -d $(srcdir)/spec/rubyspec ]; then \ + cd $(srcdir)/spec/rubyspec; \ + echo updating rubyspec ...; \ + git pull; \ + else \ + echo retrieving rubyspec ...; \ + git clone $(RUBYSPEC_GIT_URL) $(srcdir)/spec/rubyspec; \ + fi + +test-rubyspec: + @if [ ! -d $(srcdir)/spec/rubyspec ]; then echo No rubyspec here. make update-rubyspec first.; exit 1; fi + $(RUNRUBY) $(srcdir)/spec/mspec/bin/mspec -B $(srcdir)/spec/default.mspec $(MSPECOPT) + +INSNS = opt_sc.inc optinsn.inc optunifs.inc insns.inc insns_info.inc \ + vmtc.inc vm.inc + +$(INSNS): + @$(RM) $(PROGRAM) + $(BASERUBY) -Ks $(srcdir)/tool/insns2vm.rb $(INSNS2VMOPT) $@ + +node_name.inc: + $(BASERUBY) -n $(srcdir)/tool/node_name.rb $? > $@ + +known_errors.inc: + $(BASERUBY) $(srcdir)/tool/generic_erb.rb -c -o $@ $(srcdir)/template/known_errors.inc.tmpl $(srcdir)/defs/known_errors.def + +miniprelude.c: + $(BASERUBY) -I$(srcdir) $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb $@ + +newline.c: + $(BASERUBY) "$(srcdir)/tool/transcode-tblgen.rb" -vo newline.c $(srcdir)/enc/trans/newline.trans diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..0575233 --- /dev/null +++ b/NEWS @@ -0,0 +1,420 @@ += NEWS + +This document is a list of user visible feature changes made between +releases except for bug fixes. + +Note that each entry is kept so brief that no reason behind or +reference information is supplied with. For a full list of changes +with all sufficient information, see the ChangeLog file. + +== Changes since the 1.8.7 release +See doc/NEWS-1.8.7 for changes between 1.8.6 and 1.8.7. + +=== Compatibility issues + +* language core + + * New syntax and semantics + o Block arguments are always local + o New semantics for block arguments + o defined? and local variables + o Parser expects that your source code has only valid byte + sequence in some character encoding. Use magic comments + to tell the parser which encoding you use. + o New semantics for constant definition in instance_eval + or in module_eval. + + * Deprecated syntax + o colon (:) instead of "then" in if/unless or case expression. + o retry in a loop or an iterator. + +* builtin classes and objects + + * Kernel and Object + o Kernel#methods and #singleton_methods used to return an + array of strings but now they return an array of symbols. + * Class and Module + o Module#attr works as Module#attr_reader by default. + Optional boolean argument is obsolete. + o Module#instance_methods, #private_instance_methods and + #public_instance_methods used to return an array of + strings but now they return an array of symbols. + o Extra subclassing check when binding UnboundMethods + + * Exceptions + o Exceptions are equal to each other if they belong to + the same class and have the same message and backtrace. + o SystemStackError used to be a subclass of StandardError + but not it is a direct subclass of Exception. + o SecurityError: ditto + o Removed Exception#to_str [Ruby2] + + * Enumerable and Enumerator + o Enumerable::Enumerator, compatibility alias of Enumerator, + is removed. + o Enumerable#{map,collect} called without a block returns + an enumerator. + o Even more builtin and bundled libraries have been made to + return an enumerator when called without a block. + * Array + o Array#nitems was removed (use count {|i| !i.nil?}) + o Array#choice was removed (use sample) + o Array#[m,n] = nil places nil in the array. + * Hash + o Hash#to_s is equivalent to Hash#inspect + o Semantics for Hash#each and Hash#each_pair + o Hash#select returns a hash + o Hash#key is the new name for #index which has been + deprecated. + o Hash preserves order. It enumerates its elements in the + order in which the keys are inserted. + o Most of the changes in Hash apply to hash like interfaces + such as ENV and *DBM. + * IO operations + o Many methods used to act byte-wise but now some of those act + character-wise. You can use alternate byte-wise methods. + o IO#getc + o Non-blocking IO + o Kernel#open takes "t" for newline conversion + o Kernel#open takes encoding specified + o IO automatically converts byte sequence from a character + encodings into another if specified. + o StringIO#readpartial + o IO.try_convert + o IO.binread + o IO.copy_stream + o IO#binmode? + o IO#close_on_exec= and IO#close_on_exec? + o Limit input in IO#gets, IO#readline, IO#readlines, + IO#each_line, IO#lines, IO.foreach, IO.readlines, + StringIO#gets, StringIO#readline, StringIO#each, + StringIO#readlines + o IO#ungetc, StringIO#ungetc + o IO#ungetbyte, StringIO#ungetbyte + o IO#internal_encoding, IO#external_encoding, + IO#set_encoding + o IO.pipe takes encoding option + o Directive %u behaves like %d for negative values in + printf-style formatting. + * File and Dir operations + o #to_path is called as necessary in File.path, File.chmod, + File.lchmod, File.chown, File.lchown, File.utime, + File.unlink, etc.. + o File.world_readable? + o File.world_writable? + o Dir.[], Dir.glob + o Dir.exist? + o Dir.exists? + * File::Stat + o File::Stat#world_readable? + o File::Stat#world_writable? + * String + o No longer an Enumerable: use each_line/lines for line + oriented operation + o Encoding-awareness + o Character-wise semantics in many methods instead of + byte-wise. + o String#[]: Indexing a String with an integer returns a + single character String instead of an integer. + o String#[]=: No longer takes an integer as right + side value. Note that "str[i] = ?c" because of + the following change. + o ?c is evaluated to a single character string + instead of an integer. + * Regexp + o Encoding-awareness + o Regexp matches only with strings which is encoded in a + compatible character encoding to the regexp's. + o Regexp#kcode is removed. use Regexp#encoding. + * Symbols: restriction on literal symbols + * Numeric + o Numeric#div always rounds as Integer#div has done. + o Numeric#fdiv: ditto. + * Integer + o Integer(nil) raises TypeError + * Fixnum + o Fixnum#id2name removed + o Fixnum#to_sym removed + * Struct + o Struct#inspect + * Time + o New format in Time#to_s + o Timezone information preserved on Marshal.dump/load + * $SAFE and bound methods + o New trusted/untrusted model in addition to + tainted/untainted model. + + * Deprecation + o $= (global flag for case-sensitiveness on string matching) + o Kernel#to_a + o Kernel#getc, #gsub, #sub + o Kernel#callcc and Continuation now become 'continuation' + bundled library. + o Object#type + o Removed Array and Hash #indices, #indexes + o Hash#index + o ENV.index + o Process::Status#to_int + o Numeric#rdiv + o Precision is removed. Don't cry, it will be redesigned + and come back in future version. + o Symbol#to_int and Symbol#to_i + o $KCODE is no longer effective. Use Encoding related + features of each class. + o VERSION and friends + +* bundled libraries + + * Pathname + o No longer has #to_str nor #=~. + * time and date + o Time.parse and Date.parse interprets slashed numerical dates + as "dd/mm/yyyy". + * Readline + o If Readline uses libedit, Readline::HISTORY[0] returns the + first of the history. + * Continuation + o as above + + * Deprecation + o Complex#image: use Complex#imag + o All SSL-related class methods in Net::SMTP + o Prime#cache, Prime#primes, Prime#primes_so_far + o mailread library: use tmail gem. + o cgi-lib library: use cgi. + o date2 library: use date. + o eregex library + o finalize library: use ObjectSpace.define_finalizer if you + really need a finalizer. really? + o ftools library: use fileutils. + o generator library: use Enumerator. + o importenv library and Env library + o jcode library: use multilingualization support of String + o parsedate library + o ping library + o readbytes library + o getopts library and parsearg library: use optparse or + getoptlong. + o soap, wsdl and xsd libraries: use soap4r gem. + o Win32API library: use dl. + o dl library: Reimplemented and API changed. use the new + version of dl or ffi gem. + o rubyunit library and runit library: use minitest or + test/unit. Or use anything you love through RubyGems. + o test/unit is reimplemented on top of minitest. This is + not fully compatible with the original. + +=== Language core changes + + * New syntax and semantics + o Magic comments to declare in which encoding your source + code is written + o New literal hash syntax and new syntax for hash style + arguments + o New syntax for lambdas + o .() and calling Procs without #call/#[] + o Block in block arguments + o Block local variables + o Mandatory arguments after optional arguments allowed + o Multiple splats allowed + o #[] can take splatted arguments, hash style arguments + and a block. + o New directives in printf-style formatted strings (%). + o Newlines allowed before ternary colon operator (:) and + method call dot operator (.) + o Negative operators such as !, != and !~ are now + overloadable + o Encoding.default_external and default_internal + o __ENCODING__: New pseudo variable to hold the current + script's encoding + +=== Library updates + +* builtin classes and objects + * Kernel and Object + o BasicObject + o Object#=~ returns nil instead of false by default. + o Kernel#define_singleton_method + o Kernel#load can load a library from the highest versions + of gems by default. + * Class and Module + o Module#const_defined?, #const_get and #method_defined? + take an optional parameter. + o #class_variable_{set,get} are public. + o Class of singleton classes + + * Errno::EXXX + o All of those are always defined. Errno::EXXX will be + defined as an alias to Errno::NOERROR if your platform + does not have one. + + * Binding#eval + * Blocks and Procs + o Arity of blocks without arguments + o proc is now a synonym of Proc.new + o Proc#yield + o Passing blocks to #[] + o Proc#lambda? + o Proc#curry + * Fiber: coroutines/micro-threads + * Thread + o Thread.critical and Thread.critical= removed + o Thread#exit!, Thread#kill! and Thread#terminate! removed. + + * Enumerable and Enumerator + o Enumerator#enum_cons and Enumerator#enum_slice are + removed. Use #each_cons and #each_slice without a block. + o Enumerable#each_with_index can take optional arguments + and passes them to #each. + o Enumerable#each_with_object + o Enumerator#with_object + o Enumerator.new { ... } + * Array + o Array#delete returns a deleted element rather than a given + object + o Array#to_s is equivalent to Array#inspect + o Array.try_convert + o Array#pack('m0') complies with RFC 4648. + * Hash + o preserving item insertion order + o Hash#default_proc= + o Hash#_compare_by_identity and Hash#compare_by_identity? + o Hash.try_convert + o Hash#assoc + o Hash#rassoc + o Hash#flatten + * Range + o Range#cover? + o Range#include? iterates over elements and compares the + given value with each element unless the range is numeric. + Use Range#cover? for the old behavior, i.e. comparison + with boundary values. + o Range#min, Range#max + + * File and Dir operations + o New methods + * Process + o Process.spawn + o Process.daemon + * String + o String#clear + o String#ord + o String#getbyte, String#setbyte + o String#chars and String#each_char act as character-wise. + o String#codepoints, String#each_codepoint + o String#unpack with a block + o String#hash + o String.try_convert + o String#encoding + o String#force_encoding, String#encode and String#encode! + o String#ascii_only? + o String#valid_encoding? + o String#match + * Symbol + o Zero-length symbols allowed + o Symbol#intern + o Symbol#encoding + o Symbol methods similar to those in String + * Regexp + o Regexp#=== matches symbols + o Regexp.try_convert + o Regexp#match + o Regexp#fixed_encoding? + o Regexp#encoding + o Regexp#named_captures + o Regexp#names + * MatchData + o MatchData#names + o MatchData#regexp + * Encoding + * Encoding::Converter + o supports conversion between many encodings + * Numeric + o Numeric#upto, #downto, #times, #step + o Numeric#real?, Complex#real? + o Numeric#magnitude + * Rational / Complex + o They are in the core library now + * Math + o Math#log takes an optional argument. + o Math#log2 + o Math#cbrt, Math#lgamma, Math#gamma + * Time + o Time.times removed. Use Process.times. + o Time#sunday? + o Time#monday? + o Time#tuesday? + o Time#wednesday? + o Time#thursday? + o Time#friday? + o Time#saturday? + o Time#tv_nsec and Time#nsec + * Misc. new methods + o RUBY_ENGINE to distinguish between Ruby processor implementation + o public_method + o public_send + o GC.count + o ObjectSpace.count_objects + o Method#hash, Proc#hash + o Method#source_location, UnboundMethod#source_location and + Proc#source_location + o __callee__ + o Elements in $LOAD_PATH and $LOADED_FEATURES are expanded + +* bundled libraries + * RubyGems + o Package management system for Ruby. + o Integrated with Ruby's library loader. + * Rake + o Ruby make. A simple ruby build program with capabilities + similar to make. + * minitest + o Our new testing library which is faster, cleaner and easier + to read than the old test/unit. + o You can introduce the old test/unit as testunit gem through + RubyGems if you want. + * CMath + o Complex number version of Math + * Prime + o Extracted from Mathn and improved. You can easily enumerate + prime numbers. + o Prime.new is obsolete. Use its class methods. + * ripper + o Ruby script parser + * Readline + o Readline.vi_editing_mode? + o Readline.emacs_editing_mode? + o Readline::HISTORY.clear + * Tk + o TkXXX widget classes are removed and redefined as aliases of + Tk::XXX classes. + * RDoc + o Updated to version 2.2.2. See: + http://rubyforge.org/frs/shownotes.php?group_id=627&release_id=26434 + +* commandline options + * -E, --encoding + * -U + * --enable-gems, --disable-gems + * --enable-rubyopt, --disable-rubyopt + * long options are allowed in RUBYOPT environment variable. + +=== Implementation changes + +* Memory Diet + * Object Compaction - Object, Array, String, Hash, Struct, Class, + Module + * st_table compaction (inlining small tables) +* YARV + * Ruby codes are compiled into opcodes before executed. + * Native thread + +* Platform supports + * Support levels + (0) Supported + (1) Best effort + (2) Perhaps + (3) Not supported + * Dropped + o No longer supports djgpp, bcc32, human68k, MacOS 9 or earlier, + VMS nor Windows CE. diff --git a/README b/README new file mode 100644 index 0000000..7e8370e --- /dev/null +++ b/README @@ -0,0 +1,114 @@ +* What's Ruby + +Ruby is the interpreted scripting language for quick and +easy object-oriented programming. It has many features to +process text files and to do system management tasks (as in +Perl). It is simple, straight-forward, and extensible. + + +* Features of Ruby + + + Simple Syntax + + *Normal* Object-Oriented features(ex. class, method calls) + + *Advanced* Object-Oriented features(ex. Mix-in, Singleton-method) + + Operator Overloading + + Exception Handling + + Iterators and Closures + + Garbage Collection + + Dynamic Loading of Object files(on some architecture) + + Highly Portable (works on many Unix-like/POSIX compatible platforms + as well as Windows, Mac OS X, BeOS etc.) + cf. http://redmine.ruby-lang.org/wiki/ruby-19/SupportedPlatforms + + +* How to get Ruby + +The Ruby distribution files can be found in the following FTP site: + + ftp://ftp.ruby-lang.org/pub/ruby/ + +The trunk of the Ruby source tree can be checked out with the +following command: + + $ svn co http://svn.ruby-lang.org/repos/ruby/trunk/ ruby + +There are some other branches under development. Try the following +command and see the list of branches: + + $ svn ls http://svn.ruby-lang.org/repos/ruby/branches/ + + +* Ruby home-page + +The URL of the Ruby home-page is: + + http://www.ruby-lang.org/ + + +* Mailing list + +There is a mailing list to talk about Ruby. +To subscribe this list, please send the following phrase + + subscribe YourFirstName YourFamilyName +e.g. + subscribe Joseph Smith + +in the mail body (not subject) to the address . + + +* How to compile and install + +This is what you need to do to compile and install Ruby: + + 1. If ./configure does not exist or is older than configure.in, + run autoconf to (re)generate configure. + + 2. Run ./configure, which will generate config.h and Makefile. + + Some C compiler flags may be added by default depending on your + environment. Specify optflags=.. and warnflags=.. as necessary + to override them. + + 3. Edit defines.h if you need. Usually this step will not be needed. + + 4. Remove comment mark(#) before the module names from ext/Setup (or + add module names if not present), if you want to link modules + statically. + + If you don't want to compile non static extension modules + (probably on architectures which does not allow dynamic loading), + remove comment mark from the line "#option nodynamic" in + ext/Setup. + + 5. Run make. + + 6. Optionally, run 'make test' to check whether the compiled Ruby + interpreter works well. If you see the message "test succeeded", + your ruby works as it should (hopefully). + + 7. Run 'make install' + + You may have to be a super user to install ruby. + +If you fail to compile ruby, please send the detailed error report with +the error log and machine/OS type, to help others. + + +* Copying + +See the file COPYING. + + +* The Author + +Feel free to send comments and bug reports to the author. Here is the +author's latest mail address: + + matz@netlab.jp + +------------------------------------------------------- +created at: Thu Aug 3 11:57:36 JST 1995 +Local variables: +mode: indented-text +end: diff --git a/README.EXT b/README.EXT new file mode 100644 index 0000000..1cf9ad0 --- /dev/null +++ b/README.EXT @@ -0,0 +1,1337 @@ +.\" README.EXT - -*- Text -*- created at: Mon Aug 7 16:45:54 JST 1995 + +This document explains how to make extension libraries for Ruby. + +1. Basic knowledge + +In C, variables have types and data do not have types. In contrast, +Ruby variables do not have a static type, and data themselves have +types, so data will need to be converted between the languages. + +Data in Ruby are represented by the C type `VALUE'. Each VALUE data +has its data-type. + +To retrieve C data from a VALUE, you need to: + + (1) Identify the VALUE's data type + (2) Convert the VALUE into C data + +Converting to the wrong data type may cause serious problems. + + +1.1 Data-types + +The Ruby interpreter has the following data types: + + T_NIL nil + T_OBJECT ordinary object + T_CLASS class + T_MODULE module + T_FLOAT floating point number + T_STRING string + T_REGEXP regular expression + T_ARRAY array + T_HASH associative array + T_STRUCT (Ruby) structure + T_BIGNUM multi precision integer + T_FIXNUM Fixnum(31bit or 63bit integer) + T_COMPLEX complex number + T_RATIONAL rational number + T_FILE IO + T_TRUE true + T_FALSE false + T_DATA data + T_SYMBOL symbol + +In addition, there are several other types used internally: + + T_ICLASS + T_MATCH + T_UNDEF + T_NODE + T_ZOMBIE + +Most of the types are represented by C structures. + +1.2 Check Data Type of the VALUE + +The macro TYPE() defined in ruby.h shows the data type of the VALUE. +TYPE() returns the constant number T_XXXX described above. To handle +data types, your code will look something like this: + + switch (TYPE(obj)) { + case T_FIXNUM: + /* process Fixnum */ + break; + case T_STRING: + /* process String */ + break; + case T_ARRAY: + /* process Array */ + break; + default: + /* raise exception */ + rb_raise(rb_eTypeError, "not valid value"); + break; + } + +There is the data-type check function + + void Check_Type(VALUE value, int type) + +which raises an exception if the VALUE does not have the type +specified. + +There are also faster check macros for fixnums and nil. + + FIXNUM_P(obj) + NIL_P(obj) + +1.3 Convert VALUE into C data + +The data for type T_NIL, T_FALSE, T_TRUE are nil, true, false +respectively. They are singletons for the data type. + +The T_FIXNUM data is a 31bit length fixed integer (63bit length on +some machines), which can be converted to a C integer by using the +FIX2INT() macro or FIX2LONG(). Though you have to check that the +data is really FIXNUM before using them, they are faster. FIX2LONG() +never raises exceptions, but FIX2INT() raises RangeError if the +result is bigger or smaller than the size of int. +There are also NUM2INT() and NUM2LONG() which converts any Ruby +numbers into C integers. These macros includes a type check, +so an exception will be raised if the conversion failed. NUM2DBL() +can be used to retrieve the double float value in the same way. + +In version 1.7 or later it is recommended that you use the new macros +StringValue() and StringValuePtr() to get a char* from a VALUE. +StringValue(var) replaces var's value with the result of "var.to_str()". +StringValuePtr(var) does same replacement and returns char* +representation of var. These macros will skip the replacement if var +is a String. Notice that the macros take only the lvalue as their +argument, to change the value of var in place. + +You can also use the macro named StringValueCStr(). This is just +like StringValuePtr(), but always add nul character at the end of +the result. If the result contains nul character, this macro causes +the ArgumentError exception. +StringValuePtr() doesn't gurantee to exist nul at the end of the +result, and the result may contain nul. + +In version 1.6 or earlier, STR2CSTR() was used to do the same thing +but now it is deprecated in version 1.7, because STR2CSTR() has a risk +of a dangling pointer problem in the to_str() implicit conversion. + +Other data types have corresponding C structures, e.g. struct RArray +for T_ARRAY etc. The VALUE of the type which has the corresponding +structure can be cast to retrieve the pointer to the struct. The +casting macro will be of the form RXXXX for each data type; for +instance, RARRAY(obj). See "ruby.h". + +There are some accessing macros for structure members, for example +`RSTRING_LEN(s)' to to get the size of the Ruby String object. The +allocated region can be accessed by `RSTRING_PTR(str). For arrays, use +`RARRAY_LEN(ary) and `RARRAY_PTR(ary) respectively. + +Notice: Do not change the value of the structure directly, unless you +are responsible for the result. This ends up being the cause of +interesting bugs. + +1.4 Convert C data into VALUE + +To convert C data to Ruby values: + + * FIXNUM + + left shift 1 bit, and turn on LSB. + + * Other pointer values + + cast to VALUE. + +You can determine whether a VALUE is pointer or not by checking its LSB. + +Notice Ruby does not allow arbitrary pointer values to be a VALUE. They +should be pointers to the structures which Ruby knows about. The known +structures are defined in . + +To convert C numbers to Ruby values, use these macros. + + INT2FIX() for integers within 31bits. + INT2NUM() for arbitrary sized integer. + +INT2NUM() converts an integer into a Bignum if it is out of the FIXNUM +range, but is a bit slower. + +1.5 Manipulating Ruby data + +As I already mentioned, it is not recommended to modify an object's +internal structure. To manipulate objects, use the functions supplied +by the Ruby interpreter. Some (not all) of the useful functions are +listed below: + + String functions + + rb_str_new(const char *ptr, long len) + + Creates a new Ruby string. + + rb_str_new2(const char *ptr) + rb_str_new_cstr(const char *ptr) + + Creates a new Ruby string from a C string. This is equivalent to + rb_str_new(ptr, strlen(ptr)). + + rb_tainted_str_new(const char *ptr, long len) + + Creates a new tainted Ruby string. Strings from external data + sources should be tainted. + + rb_tainted_str_new2(const char *ptr) + rb_tainted_str_new_cstr(const char *ptr) + + Creates a new tainted Ruby string from a C string. + + rb_sprintf(const char *format, ...) + rb_vsprintf(const char *format, va_list ap) + + Creates a new Ruby string with printf(3) format. + + rb_str_cat(VALUE str, const char *ptr, long len) + + Appends len bytes of data from ptr to the Ruby string. + + rb_str_cat2(VALUE str, const char* ptr) + + Appends C string ptr to Ruby string str. This function is + equivalent to rb_str_cat(str, ptr, strlen(ptr)). + + rb_str_catf(VALUE str, const char* format, ...) + rb_str_vcatf(VALUE str, const char* format, va_list ap) + + Appends C string format and successive arguments to Ruby string + str according to a printf-like format. These functions are + equivalent to rb_str_cat2(str, rb_sprintf(format, ...)) and + rb_str_cat2(str, rb_vsprintf(format, ap)), respectively. + + rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) + + Creates a new Ruby string with the specified encoding. + + rb_usascii_str_new(const char *ptr, long len) + rb_usascii_str_new_cstr(const char *ptr) + + Creates a new Ruby string with encoding US-ASCII. + + Array functions + + rb_ary_new() + + Creates an array with no elements. + + rb_ary_new2(long len) + + Creates an array with no elements, allocating internal buffer + for len elements. + + rb_ary_new3(long n, ...) + + Creates an n-element array from the arguments. + + rb_ary_new4(long n, VALUE *elts) + + Creates an n-element array from a C array. + + rb_ary_to_ary(VALUE obj) + + Converts the object into an array. + Equivalent to Object#to_ary. + + There are many functions to operate an array. + They may dump core if other types are given. + + rb_ary_aref(argc, VALUE *argv, VALUE ary) + + Equivaelent to Array#[]. + + rb_ary_entry(VALUE ary, long offset) + + ary[offset] + + rb_ary_subseq(VALUE ary, long beg, long len) + + ary[beg, len] + + rb_ary_push(VALUE ary, VALUE val) + rb_ary_pop(VALUE ary) + rb_ary_shift(VALUE ary) + rb_ary_unshift(VALUE ary, VALUE val) + + +2. Extending Ruby with C + +2.1 Adding new features to Ruby + +You can add new features (classes, methods, etc.) to the Ruby +interpreter. Ruby provides APIs for defining the following things: + + * Classes, Modules + * Methods, Singleton Methods + * Constants + +2.1.1 Class/module definition + +To define a class or module, use the functions below: + + VALUE rb_define_class(const char *name, VALUE super) + VALUE rb_define_module(const char *name) + +These functions return the newly created class or module. You may +want to save this reference into a variable to use later. + +To define nested classes or modules, use the functions below: + + VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super) + VALUE rb_define_module_under(VALUE outer, const char *name) + +2.1.2 Method/singleton method definition + +To define methods or singleton methods, use these functions: + + void rb_define_method(VALUE klass, const char *name, + VALUE (*func)(), int argc) + + void rb_define_singleton_method(VALUE object, const char *name, + VALUE (*func)(), int argc) + +The `argc' represents the number of the arguments to the C function, +which must be less than 17. But I doubt you'll need that many. + +If `argc' is negative, it specifies the calling sequence, not number of +the arguments. + +If argc is -1, the function will be called as: + + VALUE func(int argc, VALUE *argv, VALUE obj) + +where argc is the actual number of arguments, argv is the C array of +the arguments, and obj is the receiver. + +If argc is -2, the arguments are passed in a Ruby array. The function +will be called like: + + VALUE func(VALUE obj, VALUE args) + +where obj is the receiver, and args is the Ruby array containing +actual arguments. + +There are some more functions to define methods. One takes an ID +as the name of method to be defined. See 2.2.2 for IDs. + + void rb_define_method_id(VALUE klass, ID name, + VALUE (*func)(ANYARGS), int argc) + +There are two functions to define private/protected methods: + + void rb_define_private_method(VALUE klass, const char *name, + VALUE (*func)(), int argc) + void rb_define_protected_method(VALUE klass, const char *name, + VALUE (*func)(), int argc) + +At last, rb_define_module_funcion defines a module functions, +which are private AND singleton methods of the module. +For example, sqrt is the module function defined in Math module. +It can be called in the following way: + + Math.sqrt(4) + +or + + include Math + sqrt(4) + +To define module functions, use: + + void rb_define_module_function(VALUE module, const char *name, + VALUE (*func)(), int argc) + +Oh, in addition, function-like methods, which are private methods defined +in the Kernel module, can be defined using: + + void rb_define_global_function(const char *name, VALUE (*func)(), int argc) + +To define an alias for the method, + + void rb_define_alias(VALUE module, const char* new, const char* old); + +To define an reader/writer to an attribute, + + void rb_define_attr(VALUE klass, const char *name, int read, int write) + +To define and undefine the `allocate' class method, + + void rb_define_alloc_func(VALUE klass, VALUE (*func)(VALUE klass)); + void rb_undef_alloc_func(VALUE klass); + +func have to take the klass as the argument and return a newly +allocated instance. This instance should be empty as possible, +without any expensive (including external) resources. + +2.1.3 Constant definition + +We have 2 functions to define constants: + + void rb_define_const(VALUE klass, const char *name, VALUE val) + void rb_define_global_const(const char *name, VALUE val) + +The former is to define a constant under specified class/module. The +latter is to define a global constant. + +2.2 Use Ruby features from C + +There are several ways to invoke Ruby's features from C code. + +2.2.1 Evaluate Ruby Programs in a String + +The easiest way to use Ruby's functionality from a C program is to +evaluate the string as Ruby program. This function will do the job: + + VALUE rb_eval_string(const char *str) + +Evaluation is done under the current context, thus current local variables +of the innermost method (which is defined by Ruby) can be accessed. + +Note that the evaluation can raise an exception. There is a safer +function: + + VALUE rb_eval_string_protect(const char *str, int *state) + +It returns nil when an error occur. And *state is zero if str was +successfully evaluated, or nonzero otherwise. + + +2.2.2 ID or Symbol + +You can invoke methods directly, without parsing the string. First I +need to explain about ID. ID is the integer number to represent +Ruby's identifiers such as variable names. The Ruby data type +corresponding to ID is Symbol. It can be accessed from Ruby in the +form: + + :Identifier +or + :"any kind of string" + +You can get the ID value from a string within C code by using + + rb_intern(const char *name) + +You can retrieve ID from Ruby object (Symbol or String) given as an +argument by using + + rb_to_id(VALUE symbol) + +You can convert C ID to Ruby Symbol by using + + VALUE ID2SYM(ID id) + +and to convert Ruby Symbol object to ID, use + + ID SYM2ID(VALUE symbol) + +2.2.3 Invoke Ruby method from C + +To invoke methods directly, you can use the function below + + VALUE rb_funcall(VALUE recv, ID mid, int argc, ...) + +This function invokes a method on the recv, with the method name +specified by the symbol mid. + +2.2.4 Accessing the variables and constants + +You can access class variables and instance variables using access +functions. Also, global variables can be shared between both +environments. There's no way to access Ruby's local variables. + +The functions to access/modify instance variables are below: + + VALUE rb_ivar_get(VALUE obj, ID id) + VALUE rb_ivar_set(VALUE obj, ID id, VALUE val) + +id must be the symbol, which can be retrieved by rb_intern(). + +To access the constants of the class/module: + + VALUE rb_const_get(VALUE obj, ID id) + +See 2.1.3 for defining new constant. + +3. Information sharing between Ruby and C + +3.1 Ruby constants that C can be accessed from C + +The following Ruby constants can be referred from C. + + Qtrue + Qfalse + +Boolean values. Qfalse is false in C also (i.e. 0). + + Qnil + +Ruby nil in C scope. + +3.2 Global variables shared between C and Ruby + +Information can be shared between the two environments using shared global +variables. To define them, you can use functions listed below: + + void rb_define_variable(const char *name, VALUE *var) + +This function defines the variable which is shared by both environments. +The value of the global variable pointed to by `var' can be accessed +through Ruby's global variable named `name'. + +You can define read-only (from Ruby, of course) variables using the +function below. + + void rb_define_readonly_variable(const char *name, VALUE *var) + +You can defined hooked variables. The accessor functions (getter and +setter) are called on access to the hooked variables. + + void rb_define_hooked_variable(const char *name, VALUE *var, + VALUE (*getter)(), void (*setter)()) + +If you need to supply either setter or getter, just supply 0 for the +hook you don't need. If both hooks are 0, rb_define_hooked_variable() +works just like rb_define_variable(). + +The prototypes of the getter and setter functions are as follows: + + VALUE (*getter)(ID id, VALUE *var); + void (*setter)(VALUE val, ID id, VALUE *var); + + +Also you can define a Ruby global variable without a corresponding C +variable. The value of the variable will be set/get only by hooks. + + void rb_define_virtual_variable(const char *name, + VALUE (*getter)(), void (*setter)()) + +The prototypes of the getter and setter functions are as follows: + + VALUE (*getter)(ID id); + void (*setter)(VALUE val, ID id); + + +3.3 Encapsulate C data into a Ruby object + +To wrap and objectify a C pointer as a Ruby object (so called +DATA), use Data_Wrap_Struct(). + + Data_Wrap_Struct(klass, mark, free, ptr) + +Data_Wrap_Struct() returns a created DATA object. The klass argument +is the class for the DATA object. The mark argument is the function +to mark Ruby objects pointed by this data. The free argument is the +function to free the pointer allocation. If this is -1, the pointer +will be just freed. The functions mark and free will be called from +garbage collector. + +These mark / free functions are invoked during GC execution. No +object allocations are allowed during it, so do not allocate ruby +objects inside them. + +You can allocate and wrap the structure in one step. + + Data_Make_Struct(klass, type, mark, free, sval) + +This macro returns an allocated Data object, wrapping the pointer to +the structure, which is also allocated. This macro works like: + + (sval = ALLOC(type), Data_Wrap_Struct(klass, mark, free, sval)) + +Arguments klass, mark, and free work like their counterparts in +Data_Wrap_Struct(). A pointer to the allocated structure will be +assigned to sval, which should be a pointer of the type specified. + +To retrieve the C pointer from the Data object, use the macro +Data_Get_Struct(). + + Data_Get_Struct(obj, type, sval) + +A pointer to the structure will be assigned to the variable sval. + +See the example below for details. + +4. Example - Creating dbm extension + +OK, here's the example of making an extension library. This is the +extension to access DBMs. The full source is included in the ext/ +directory in the Ruby's source tree. + +(1) make the directory + + % mkdir ext/dbm + +Make a directory for the extension library under ext directory. + +(2) design the library + +You need to design the library features, before making it. + +(3) write C code. + +You need to write C code for your extension library. If your library +has only one source file, choosing ``LIBRARY.c'' as a file name is +preferred. On the other hand, in case your library has multiple source +files, avoid choosing ``LIBRARY.c'' for a file name. It may conflict +with an intermediate file ``LIBRARY.o'' on some platforms. + +Ruby will execute the initializing function named ``Init_LIBRARY'' in +the library. For example, ``Init_dbm()'' will be executed when loading +the library. + +Here's the example of an initializing function. + +-- +void +Init_dbm(void) +{ + /* define DBM class */ + cDBM = rb_define_class("DBM", rb_cObject); + /* DBM includes Enumerate module */ + rb_include_module(cDBM, rb_mEnumerable); + + /* DBM has class method open(): arguments are received as C array */ + rb_define_singleton_method(cDBM, "open", fdbm_s_open, -1); + + /* DBM instance method close(): no args */ + rb_define_method(cDBM, "close", fdbm_close, 0); + /* DBM instance method []: 1 argument */ + rb_define_method(cDBM, "[]", fdbm_fetch, 1); + : + + /* ID for a instance variable to store DBM data */ + id_dbm = rb_intern("dbm"); +} +-- + +The dbm extension wraps the dbm struct in the C environment using +Data_Make_Struct. + +-- +struct dbmdata { + int di_size; + DBM *di_dbm; +}; + + +obj = Data_Make_Struct(klass, struct dbmdata, 0, free_dbm, dbmp); +-- + +This code wraps the dbmdata structure into a Ruby object. We avoid +wrapping DBM* directly, because we want to cache size information. + +To retrieve the dbmdata structure from a Ruby object, we define the +following macro: + +-- +#define GetDBM(obj, dbmp) {\ + Data_Get_Struct(obj, struct dbmdata, dbmp);\ + if (dbmp->di_dbm == 0) closed_dbm();\ +} +-- + +This sort of complicated macro does the retrieving and close checking for +the DBM. + +There are three kinds of way to receive method arguments. First, +methods with a fixed number of arguments receive arguments like this: + +-- +static VALUE +fdbm_delete(VALUE obj, VALUE keystr) +{ + : +} +-- + +The first argument of the C function is the self, the rest are the +arguments to the method. + +Second, methods with an arbitrary number of arguments receive +arguments like this: + +-- +static VALUE +fdbm_s_open(int argc, VALUE *argv, VALUE klass) +{ + : + if (rb_scan_args(argc, argv, "11", &file, &vmode) == 1) { + mode = 0666; /* default value */ + } + : +} +-- + +The first argument is the number of method arguments, the second +argument is the C array of the method arguments, and the third +argument is the receiver of the method. + +You can use the function rb_scan_args() to check and retrieve the +arguments. For example, "11" means that the method requires at least one +argument, and at most receives two arguments. + +Methods with an arbitrary number of arguments can receive arguments +by Ruby's array, like this: + +-- +static VALUE +fdbm_indexes(VALUE obj, VALUE args) +{ + : +} +-- + +The first argument is the receiver, the second one is the Ruby array +which contains the arguments to the method. + +** Notice + +GC should know about global variables which refer to Ruby's objects, but +are not exported to the Ruby world. You need to protect them by + + void rb_global_variable(VALUE *var) + +(4) prepare extconf.rb + +If the file named extconf.rb exists, it will be executed to generate +Makefile. + +extconf.rb is the file for checking compilation conditions etc. You +need to put + + require 'mkmf' + +at the top of the file. You can use the functions below to check +various conditions. + + have_library(lib, func): check whether library containing function exists. + have_func(func, header): check whether function exists + have_header(header): check whether header file exists + create_makefile(target): generate Makefile + +The value of the variables below will affect the Makefile. + + $CFLAGS: included in CFLAGS make variable (such as -O) + $CPPFLAGS: included in CPPFLAGS make variable (such as -I, -D) + $LDFLAGS: included in LDFLAGS make variable (such as -L) + $objs: list of object file names + +Normally, the object files list is automatically generated by searching +source files, but you must define them explicitly if any sources will +be generated while building. + +If a compilation condition is not fulfilled, you should not call +``create_makefile''. The Makefile will not be generated, compilation will +not be done. + +(5) prepare depend (optional) + +If the file named depend exists, Makefile will include that file to +check dependencies. You can make this file by invoking + + % gcc -MM *.c > depend + +It's harmless. Prepare it. + +(6) generate Makefile + +Try generating the Makefile by: + + ruby extconf.rb + +If the library should be installed under vendor_ruby directory +instead of site_ruby directory, use --vendor option as follows. + + ruby extconf.rb --vendor + +You don't need this step if you put the extension library under the ext +directory of the ruby source tree. In that case, compilation of the +interpreter will do this step for you. + +(7) make + +Type + + make + +to compile your extension. You don't need this step either if you have +put the extension library under the ext directory of the ruby source tree. + +(8) debug + +You may need to rb_debug the extension. Extensions can be linked +statically by adding the directory name in the ext/Setup file so that +you can inspect the extension with the debugger. + +(9) done, now you have the extension library + +You can do anything you want with your library. The author of Ruby +will not claim any restrictions on your code depending on the Ruby API. +Feel free to use, modify, distribute or sell your program. + +Appendix A. Ruby source files overview + +ruby language core + + class.c : classes and modules + error.c : exception classes and exception mechanism + gc.c : memory management + load.c : library loading + object.c : objects + variable.c : variables and constants + +ruby syntax parser + parse.y + -> parse.c : automatically generated + keywords : reserved keywords + -> lex.c : automatically generated + +ruby evaluator (a.k.a. YARV) + blockinlining.c + compile.c + eval.c + eval_error.c + eval_jump.c + eval_safe.c + insns.def : definition of VM instructions + iseq.c : implementation of VM::ISeq + thread.c : thread management and context swiching + thread_win32.c : thread implementation + thread_pthread.c : ditto + vm.c + vm_dump.c + vm_eval.c + vm_exec.c + vm_insnhelper.c + vm_method.c + + opt_insns_unif.def : instruction unification + opt_operand.def : definitions for optimization + + -> insn*.inc : automatically generated + -> opt*.inc : automatically generated + -> vm.inc : automatically generated + +regular expression engine (oniguruma) + regex.c + regcomp.c + regenc.c + regerror.c + regexec.c + regparse.c + regsyntax.c + +utility functions + + debug.c : debug symbols for C debuggger + dln.c : dynamic loading + st.c : general purpose hash table + strftime.c : formatting times + util.c : misc utilities + +ruby interpreter implementation + + dmyext.c + dmydln.c + dmyencoding.c + id.c + inits.c + main.c + ruby.c + version.c + + gem_prelude.rb + prelude.rb + + +class library + + array.c : Array + bignum.c : Bignum + compar.c : Comparable + complex.c : Complex + cont.c : Fiber, Continuation + dir.c : Dir + enum.c : Enumerable + enumerator.c : Enumerator + file.c : File + hash.c : Hash + io.c : IO + marshal.c : Marshal + math.c : Math + numeric.c : Numeric, Integer, Fixnum, Float + pack.c : Array#pack, String#unpack + proc.c : Binding, Proc + process.c : Process + random.c : random number + range.c : Range + rational.c : Rational + re.c : Regexp, MatchData + signal.c : Signal + sprintf.c : + string.c : String + struct.c : Struct + time.c : Time + + defs/knwon_errors.def : Errno::* exception classes + -> known_errors.inc : automatically generated + +multilingualization + encoding.c : Encoding + transcode.c : Encoding::Converter + enc/*.c : encoding classes + enc/trans/* : codepoint mapping tables + +goruby interpreter implementation + + goruby.c + golf_prelude.rb : goruby specific libraries. + -> golf_prelude.c : automatically generated + + +Appendix B. Ruby extension API reference + +** Types + + VALUE + +The type for the Ruby object. Actual structures are defined in ruby.h, +such as struct RString, etc. To refer the values in structures, use +casting macros like RSTRING(obj). + +** Variables and constants + + Qnil + +const: nil object + + Qtrue + +const: true object(default true value) + + Qfalse + +const: false object + +** C pointer wrapping + + Data_Wrap_Struct(VALUE klass, void (*mark)(), void (*free)(), void *sval) + +Wrap a C pointer into a Ruby object. If object has references to other +Ruby objects, they should be marked by using the mark function during +the GC process. Otherwise, mark should be 0. When this object is no +longer referred by anywhere, the pointer will be discarded by free +function. + + Data_Make_Struct(klass, type, mark, free, sval) + +This macro allocates memory using malloc(), assigns it to the variable +sval, and returns the DATA encapsulating the pointer to memory region. + + Data_Get_Struct(data, type, sval) + +This macro retrieves the pointer value from DATA, and assigns it to +the variable sval. + +** Checking data types + +TYPE(value) +FIXNUM_P(value) +NIL_P(value) +void Check_Type(VALUE value, int type) +void Check_SafeStr(VALUE value) + +** Data type conversion + +FIX2INT(value) +FIX2LONG(value) +INT2FIX(i) +NUM2INT(value) +NUM2LONG(value) +INT2NUM(i) +NUM2DBL(value) +rb_float_new(f) +StringValue(value) +StringValuePtr(value) +StringValueCStr(value) +rb_str_new2(s) + +** defining class/module + + VALUE rb_define_class(const char *name, VALUE super) + +Defines a new Ruby class as a subclass of super. + + VALUE rb_define_class_under(VALUE module, const char *name, VALUE super) + +Creates a new Ruby class as a subclass of super, under the module's +namespace. + + VALUE rb_define_module(const char *name) + +Defines a new Ruby module. + + VALUE rb_define_module_under(VALUE module, const char *name) + +Defines a new Ruby module under the module's namespace. + + void rb_include_module(VALUE klass, VALUE module) + +Includes module into class. If class already includes it, just +ignored. + + void rb_extend_object(VALUE object, VALUE module) + +Extend the object with the module's attributes. + +** Defining Global Variables + + void rb_define_variable(const char *name, VALUE *var) + +Defines a global variable which is shared between C and Ruby. If name +contains a character which is not allowed to be part of the symbol, +it can't be seen from Ruby programs. + + void rb_define_readonly_variable(const char *name, VALUE *var) + +Defines a read-only global variable. Works just like +rb_define_variable(), except the defined variable is read-only. + + void rb_define_virtual_variable(const char *name, + VALUE (*getter)(), VALUE (*setter)()) + +Defines a virtual variable, whose behavior is defined by a pair of C +functions. The getter function is called when the variable is +referenced. The setter function is called when the variable is set to a +value. The prototype for getter/setter functions are: + + VALUE getter(ID id) + void setter(VALUE val, ID id) + +The getter function must return the value for the access. + + void rb_define_hooked_variable(const char *name, VALUE *var, + VALUE (*getter)(), VALUE (*setter)()) + +Defines hooked variable. It's a virtual variable with a C variable. +The getter is called as + + VALUE getter(ID id, VALUE *var) + +returning a new value. The setter is called as + + void setter(VALUE val, ID id, VALUE *var) + +GC requires C global variables which hold Ruby values to be marked. + + void rb_global_variable(VALUE *var) + +Tells GC to protect these variables. + +** Constant Definition + + void rb_define_const(VALUE klass, const char *name, VALUE val) + +Defines a new constant under the class/module. + + void rb_define_global_const(const char *name, VALUE val) + +Defines a global constant. This is just the same as + + rb_define_const(cKernal, name, val) + +** Method Definition + + rb_define_method(VALUE klass, const char *name, VALUE (*func)(), int argc) + +Defines a method for the class. func is the function pointer. argc +is the number of arguments. if argc is -1, the function will receive +3 arguments: argc, argv, and self. if argc is -2, the function will +receive 2 arguments, self and args, where args is a Ruby array of +the method arguments. + + rb_define_private_method(VALUE klass, const char *name, VALUE (*func)(), int argc) + +Defines a private method for the class. Arguments are same as +rb_define_method(). + + rb_define_singleton_method(VALUE klass, const char *name, VALUE (*func)(), int argc) + +Defines a singleton method. Arguments are same as rb_define_method(). + + rb_scan_args(int argc, VALUE *argv, const char *fmt, ...) + +Retrieve argument from argc, argv. The fmt is the format string for +the arguments, such as "12" for 1 non-optional argument, 2 optional +arguments. If `*' appears at the end of fmt, it means the rest of +the arguments are assigned to the corresponding variable, packed in +an array. + +** Invoking Ruby method + + VALUE rb_funcall(VALUE recv, ID mid, int narg, ...) + +Invokes a method. To retrieve mid from a method name, use rb_intern(). + + VALUE rb_funcall2(VALUE recv, ID mid, int argc, VALUE *argv) + +Invokes a method, passing arguments by an array of values. + + VALUE rb_eval_string(const char *str) + +Compiles and executes the string as a Ruby program. + + ID rb_intern(const char *name) + +Returns ID corresponding to the name. + + char *rb_id2name(ID id) + +Returns the name corresponding ID. + + char *rb_class2name(VALUE klass) + +Returns the name of the class. + + int rb_respond_to(VALUE object, ID id) + +Returns true if the object responds to the message specified by id. + +** Instance Variables + + VALUE rb_iv_get(VALUE obj, const char *name) + +Retrieve the value of the instance variable. If the name is not +prefixed by `@', that variable shall be inaccessible from Ruby. + + VALUE rb_iv_set(VALUE obj, const char *name, VALUE val) + +Sets the value of the instance variable. + +** Control Structure + + VALUE rb_block_call(VALUE recv, ID mid, int argc, VALUE * argv, + VALUE (*func) (ANYARGS), VALUE data2) + +Calls a method on the recv, with the method name specified by the +symbol mid, supplying func as the block. func will receive the +value from yield as the first argument, data2 as the second, and +argc/argv as the third/fourth arguments. + + [OBSOLETE] VALUE rb_iterate(VALUE (*func1)(), void *arg1, VALUE (*func2)(), void *arg2) + +Calls the function func1, supplying func2 as the block. func1 will be +called with the argument arg1. func2 receives the value from yield as +the first argument, arg2 as the second argument. + +When rb_iterate is used in 1.9, func1 has to call some Ruby-level method. +This function is obsolete since 1.9; use rb_block_call instead. + + VALUE rb_yield(VALUE val) + +Evaluates the block with value val. + + VALUE rb_rescue(VALUE (*func1)(), void *arg1, VALUE (*func2)(), void *arg2) + +Calls the function func1, with arg1 as the argument. If an exception +occurs during func1, it calls func2 with arg2 as the argument. The +return value of rb_rescue() is the return value from func1 if no +exception occurs, from func2 otherwise. + + VALUE rb_ensure(VALUE (*func1)(), void *arg1, void (*func2)(), void *arg2) + +Calls the function func1 with arg1 as the argument, then calls func2 +with arg2 if execution terminated. The return value from +rb_ensure() is that of func1. + +** Exceptions and Errors + + void rb_warn(const char *fmt, ...) + +Prints a warning message according to a printf-like format. + + void rb_warning(const char *fmt, ...) + +Prints a warning message according to a printf-like format, if +$VERBOSE is true. + +void rb_raise(rb_eRuntimeError, const char *fmt, ...) + +Raises RuntimeError. The fmt is a format string just like printf(). + + void rb_raise(VALUE exception, const char *fmt, ...) + +Raises a class exception. The fmt is a format string just like printf(). + + void rb_fatal(const char *fmt, ...) + +Raises a fatal error, terminates the interpreter. No exception handling +will be done for fatal errors, but ensure blocks will be executed. + + void rb_bug(const char *fmt, ...) + +Terminates the interpreter immediately. This function should be +called under the situation caused by the bug in the interpreter. No +exception handling nor ensure execution will be done. + +** Initialize and Start the Interpreter + +The embedding API functions are below (not needed for extension libraries): + + void ruby_init() + +Initializes the interpreter. + + void ruby_options(int argc, char **argv) + +Process command line arguments for the interpreter. + + void ruby_run() + +Starts execution of the interpreter. + + void ruby_script(char *name) + +Specifies the name of the script ($0). + +** Hooks for the Interpreter Events + + void rb_add_event_hook(rb_event_hook_func_t func, rb_event_t events) + +Adds a hook function for the specified interpreter events. +events should be Or'ed value of: + + RUBY_EVENT_LINE + RUBY_EVENT_CLASS + RUBY_EVENT_END + RUBY_EVENT_CALL + RUBY_EVENT_RETURN + RUBY_EVENT_C_CALL + RUBY_EVENT_C_RETURN + RUBY_EVENT_RAISE + RUBY_EVENT_ALL + +The definition of rb_event_hook_func_t is below: + + typedef void (*rb_event_hook_func_t)(rb_event_t event, NODE *node, + VALUE self, ID id, VALUE klass) + + int rb_remove_event_hook(rb_event_hook_func_t func) + +Removes the specified hook function. + +Appendix C. Functions Available in extconf.rb + +These functions are available in extconf.rb: + + have_macro(macro, headers) + +Checks whether macro is defined with header. Returns true if the macro +is defined. + + have_library(lib, func) + +Checks whether the library exists, containing the specified function. +Returns true if the library exists. + + find_library(lib, func, path...) + +Checks whether a library which contains the specified function exists in +path. Returns true if the library exists. + + have_func(func, header) + +Checks whether func exists with header. Returns true if the function +exists. To check functions in an additional library, you need to +check that library first using have_library(). + + have_var(var, header) + +Checks whether var exists with header. Returns true if the variable +exists. To check variables in an additional library, you need to +check that library first using have_library(). + + have_header(header) + +Checks whether header exists. Returns true if the header file exists. + + find_header(header, path...) + +Checks whether header exists in path. Returns true if the header file +exists. + + have_struct_member(type, member, header) + +Checks whether type has member with header. Returns true if the type +is defined and has the member. + + have_type(type, header, opt) + +Checks whether type is defined with header. Returns true if the type +is defined. + + check_sizeof(type, header) + +Checks the size of type in char with header. Returns the size if the +type is defined, otherwise nil. + + create_makefile(target) + +Generates the Makefile for the extension library. If you don't invoke +this method, the compilation will not be done. + + find_executable(bin, path) + +Finds command in path, which is File::PATH_SEPARATOR-separated list of +directories. If path is nil or omitted, environment variable PATH +will be used. Returns the path name of the command if it is found, +otherwise nil. + + with_config(withval[, default=nil]) + +Parses the command line options and returns the value specified by +--with-. + + enable_config(config, *defaults) + disable_config(config, *defaults) + +Parses the command line options for boolean. Returns true if +--enable- is given, or false if --disable- is given. +Otherwise, yields defaults to the given block and returns the result +if it is called with a block, or returns defaults. + + dir_config(target[, default_dir]) + dir_config(target[, default_include, default_lib]) + +Parses the command line options and adds the directories specified by +--with--dir, --with--include, and/or --with--lib +to $CFLAGS and/or $LDFLAGS. --with--dir=/path is equivalent to +--with--include=/path/include --with--lib=/path/lib. +Returns an array of the added directories ([include_dir, lib_dir]). + + pkg_config(pkg) + +Obtains the information for pkg by pkg-config command. The actual +command name can be overridden by --with-pkg-config command line +option. + +/* + * Local variables: + * fill-column: 70 + * end: + */ diff --git a/README.EXT.ja b/README.EXT.ja new file mode 100644 index 0000000..810167d --- /dev/null +++ b/README.EXT.ja @@ -0,0 +1,1458 @@ +.\" README.EXT.ja - -*- Text -*- created at: Mon Aug 7 16:45:54 JST 1995 + +Ruby + +1 + +C +int +Ruby +CRuby + + +RubyVALUECVALUE + +()Ruby + + +VALUEC + + (1) VALUE + (2) VALUEC + +(1) +core dump + +1.1 + +Ruby + + T_NIL nil + T_OBJECT + T_CLASS + T_MODULE + T_FLOAT + T_STRING + T_REGEXP + T_ARRAY + T_HASH + T_STRUCT (Ruby) + T_BIGNUM + T_FIXNUM Fixnum(31bit63bit) + T_COMPLEX + T_RATIONAL + T_FILE + T_TRUE + T_FALSE + T_DATA + T_SYMBOL + + + + T_ICLASS + T_MATCH + T_UNDEF + T_NODE + T_ZOMBIE + +C + +1.2 VALUE + +ruby.hTYPE()VALUE +TYPE()T_XXXX +VALUE +TYPE() + + switch (TYPE(obj)) { + case T_FIXNUM: + /* FIXNUM */ + break; + case T_STRING: + /* */ + break; + case T_ARRAY: + /* */ + break; + default: + /* */ + rb_raise(rb_eTypeError, "not valid value"); + break; + } + + + + + void Check_Type(VALUE value, int type) + +valuetype +VALUE + + +FIXNUMNIL + + FIXNUM_P(obj) + NIL_P(obj) + +1.3 VALUEC + +T_NILT_FALSET_TRUE +nilfalsetrue + + +T_FIXNUM31bit63bit +long32bit +31bitlong64bit63bit +. FIXNUM C +FIX2INT()FIX2LONG() +FIXNUM + +FIX2LONG()FIX2INT() +int +FIXNUMRuby +NUM2INT()NUM2LONG() + +() +doubleNUM2DBL() + +char* version 1.6 STR2CSTR() + to_str() + GC version 1.7 +obsolete StringValue() StringValuePtr() +StringValue(var) var String + var var.to_str() +StringValuePtr(var) var +String var char* +var +var lvalue +StringValuePtr() StringValueCStr() +StringValueCStr(var) var String + var char* + nul nul + ArgumentError +StringValuePtr() nul + nul + +C +VALUE() + + +struct RXxxxxruby.h +struct RString + + +ruby.hRXXXXX()( +)(: RSTRING()) + + +strRSTRING_LEN(str)str +char*RSTRING_PTR(str) +RARRAY_LEN(ary)RARRAY_PTR(ary) + + +Ruby + + + + +1.4 CVALUE + +VALUE + + * FIXNUM + + 1bitLSB + + * + + VALUE + +LSBVALUEFIXNUM +(LSB +) + +FIXNUMRubyVALUE +VALUE +VALUE +Ruby(ruby.hstruct RXxxx +) + +FIXNUMC +VALUE + + + INT2FIX() 31bit63bit + + INT2NUM() VALUE + +INT2NUM()FIXNUMBignum +() + +1.5 Ruby + +Ruby +Ruby +Ruby + +/ +() + + + + rb_str_new(const char *ptr, long len) + + Ruby + + rb_str_new2(const char *ptr) + rb_str_new_cstr(const char *ptr) + + CRuby + rb_str_new(ptr, strlen(ptr)) + + rb_tainted_str_new(const char *ptr, long len) + rb_tainted_str_new_cstr(const char *ptr) + + Ruby + + + + rb_tainted_str_new2(const char *ptr) + + CRuby + + rb_sprintf(const char *format, ...) + rb_vsprintf(const char *format, va_list ap) + + Cformatprintf(3) + Ruby + + rb_str_cat(VALUE str, const char *ptr, long len) + + Rubystrlenptr + + rb_str_cat2(VALUE str, const char* ptr) + + RubystrCptr + rb_str_cat(str, ptr, strlen(ptr)) + + rb_str_catf(VALUE str, const char* format, ...) + rb_str_vcatf(VALUE str, const char* format, va_list ap) + + Cformatprintf(3) + Rubystr + rb_str_cat2(str, rb_sprintf(format, ...)) + rb_str_cat2(str, rb_vsprintf(format, ap)) + + rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) + + Ruby. + + rb_usascii_str_new(const char *ptr, long len) + rb_usascii_str_new_cstr(const char *ptr) + + US-ASCIIRuby. + + + + + rb_ary_new() + + 0 + + rb_ary_new2(long len) + + 0len + + + rb_ary_new3(long n, ...) + + n + + rb_ary_new4(long n, VALUE *elts) + + n + + rb_ary_to_ary(VALUE obj) + + . + Object#to_ary. + + . + ary. + . + + rb_ary_aref(argc, VALUE *argv, VALUE ary) + + Array#[]. + + rb_ary_entry(VALUE ary, long offset) + + ary[offset] + + rb_ary_subseq(VALUE ary, long beg, long len) + + ary[beg, len] + + rb_ary_push(VALUE ary, VALUE val) + rb_ary_pop(VALUE ary) + rb_ary_shift(VALUE ary) + rb_ary_unshift(VALUE ary, VALUE val) + +2Ruby + +RubyCRubyC + +Ruby + + +2.1 Ruby + +RubyRuby +Ruby + + + * + * + * + + + +2.1.1 / + + + + VALUE rb_define_class(const char *name, VALUE super) + VALUE rb_define_module(const char *name) + + + + + + + + + VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super) + VALUE rb_define_module_under(VALUE outer, const char *name) + +2.1.2 / + + + + void rb_define_method(VALUE klass, const char *name, + VALUE (*func)(), int argc) + + void rb_define_singleton_method(VALUE object, const char *name, + VALUE (*func)(), int argc) + + + +RubySmalltalk + + + + argcC( +)argc0 +16( +)self +1 + +argc +argc-1argc-2 +Ruby + +. +ID. ID2.2.2. + + void rb_define_method_id(VALUE klass, ID name, + VALUE (*func)(ANYARGS), int argc) + +private/protected. + + void rb_define_private_method(VALUE klass, const char *name, + VALUE (*func)(), int argc) + void rb_define_protected_method(VALUE klass, const char *name, + VALUE (*func)(), int argc) + +private + + + rb_define_module + +privateMath +sqrt() + + Math.sqrt(4) + + + + include Math + sqrt(4) + + + + + void rb_define_module_function(VALUE module, const char *name, + VALUE (*func)(), int argc) + +(Kernelprivate method) + + + void rb_define_global_function(const char *name, VALUE (*func)(), int argc) + + + + + void rb_define_alias(VALUE module, const char* new, const char* old); + + + + void rb_define_attr(VALUE klass, const char *name, int read, int write) + +allocate + + + void rb_define_alloc_func(VALUE klass, VALUE (*func)(VALUE klass)); + void rb_undef_alloc_func(VALUE klass); + +func + + + + +2.1.3 + + + + + void rb_define_const(VALUE klass, const char *name, VALUE val) + void rb_define_global_const(const char *name, VALUE val) + +/ + + +2.2 RubyC + +1.5 Ruby +Ruby + + +# +# + +Ruby + +2.2.1 Rubyeval + +CRuby +Ruby + + VALUE rb_eval_string(const char *str) + + + + +. +. + + VALUE rb_eval_string_protect(const char *str, int *state) + +nil +*state + + +2.2.2 ID + +CRuby +Ruby +ID + +IDRuby + + : + + :"" + +C + + rb_intern(const char *name) + +Ruby( +)ID + + rb_to_id(VALUE symbol) + +2.2.3 CRuby + +CRuby + + + VALUE rb_funcall(VALUE recv, ID mid, int argc, ...) + +recvmid + + + VALUE rb_funcall2(VALUE recv, ID mid, int argc, VALUE *argv) + VALUE rb_apply(VALUE recv, ID mid, VALUE args) + +applyRuby + +2.2.4 // + +C +C + + + + + + VALUE rb_ivar_get(VALUE obj, ID id) + VALUE rb_ivar_set(VALUE obj, ID id, VALUE val) + +idrb_intern() + + + + VALUE rb_const_get(VALUE obj, ID id) + +2.1.3 + + +3RubyC + +CRuby + +3.1 CRuby + +RubyC + + Qtrue + Qfalse + + QfalseC(0) + + Qnil + + Cnil + +3.2 CRuby + +CRuby + +rb_define_variable() + + void rb_define_variable(const char *name, VALUE *var) + +RubyC +`$' +Ruby + +Rubyread only + + + void rb_define_readonly_variable(const char *name, VALUE *var) + +hookhook +hook +hook + + void rb_define_hooked_variable(const char *name, VALUE *var, + VALUE (*getter)(), void (*setter)()) + +Chook +getter +setterhookgetter +setter0 +# gettersetter0rb_define_variable() + +gettersetter + + VALUE (*getter)(ID id, VALUE *var); + void (*setter)(VALUE val, ID id, VALUE *var); + + +CRuby +. +. + + void rb_define_virtual_variable(const char *name, + VALUE (*getter)(), void (*setter)()) + +Ruby +gettersetter + +gettersetter + + (*getter)(ID id); + (*setter)(VALUE val, ID id); + +3.3 CRuby + +C()Ruby +Data +RubyC()Ruby + + +DataRuby + + + Data_Wrap_Struct(klass, mark, free, ptr) + +Data + +klassDataptr +CmarkRuby + +0 + +# + +free +-1 + + +markfreeGC. +, GCRuby +. , markfreeRuby +. + +CData + + + Data_Make_Struct(klass, type, mark, free, sval) + +Data + +klass, mark, freeData_Wrap_Structtype +Csval + (type*) + +Data + + + Data_Get_Struct(obj, type, sval) + +Csval + +Data + + +4 - dbm + + +Rubyextdbm + + +(1) + + % mkdir ext/dbm + +Ruby 1.1 +Ruby +Rubyext + + + +(2) + + + + + + +(3) C + +CC +.cC +.c + +.o + + +RubyInit_ +dbmInit_dbm + +dbm.c + +-- +void +Init_dbm(void) +{ + /* DBM */ + cDBM = rb_define_class("DBM", rb_cObject); + /* DBMEnumerate */ + rb_include_module(cDBM, rb_mEnumerable); + + /* DBMopen(): C */ + rb_define_singleton_method(cDBM, "open", fdbm_s_open, -1); + + /* DBMclose(): */ + rb_define_method(cDBM, "close", fdbm_close, 0); + /* DBM[]: 1 */ + rb_define_method(cDBM, "[]", fdbm_fetch, 1); + : + + /* DBMID */ + id_dbm = rb_intern("dbm"); +} +-- + +DBMdbm +CdbmRuby + + +dbm.cData_Make_Struct + +-- +struct dbmdata { + int di_size; + DBM *di_dbm; +}; + + +obj = Data_Make_Struct(klass, struct dbmdata, 0, free_dbm, dbmp); +-- + +dbmstructData +DBM*close() + + +Datadbmstruct + + +-- +#define GetDBM(obj, dbmp) {\ + Data_Get_Struct(obj, struct dbmdata, dbmp);\ + if (dbmp->di_dbm == 0) closed_dbm();\ +} +-- + +dbmdata +close + + +DBM3 + +deletedelete +fdbm_delete() + +-- +static VALUE +fdbm_delete(VALUE obj, VALUE keystr) +{ + : +} +-- + +1self2 + + +CRuby +dbmC +DBMopen() +fdbm_s_open() + +-- +static VALUE +fdbm_s_open(int argc, VALUE *argv, VALUE klass) +{ + : + if (rb_scan_args(argc, argv, "11", &file, &vmode) == 1) { + mode = 0666; /* default value */ + } + : +} +-- + +12 +self3 + + +open() +rb_scan_args()3 +4 +12 +3 +"*"23 +dbm.c"11"1 +2 +nil(CQnil) + +Rubyindexes + + +-- +static VALUE +fdbm_indexes(VALUE obj, VALUE args) +{ + : +} +-- + +1self2Ruby + +** + +RubyRuby +CRuby +GC + + void rb_global_variable(VALUE *var) + +(4) extconf.rb + +Makefileextconf.rb +extconf.rb + + + require 'mkmf' + +extconf.rbextconf.rbRuby + + + have_library(lib, func): + have_func(func, header): + have_header(header): + create_makefile(target): Makefile + + + + $CFLAGS: (-O) + $CPPFLAGS: (-I-D) + $LDFLAGS: (-L) + $objs: + + +make + + + +create_makefileMakefile + + +(5) depend + +depend +Makefile + + % gcc -MM *.c > depend + + + +(6) Makefile + +Makefile + + ruby extconf.rb + +extconf.rb require 'mkmf' + + + ruby -r mkmf extconf.rb + + + +site_ruby +vendor_ruby + --vendor + + ruby extconf.rb --vendor + +extRubymake +Makefile + +(7) make + +make + make install + +extRuby +makeMakefilemake +Ruby +extconf.rbMakefile +Rubymake + +make installRuby +Ruby +Ruby + lib + .rb + + +(8) + +ext/Setup + + + +(9) + + +Ruby + + +Appendix A. Ruby + +Ruby + + + + +Ruby + + class.c : + error.c : + gc.c : + load.c : + object.c : + variable.c : + +Ruby + parse.y : + -> parse.c : + keywords : + -> lex.c : + +Ruby(YARV) + blockinlining.c + compile.c + eval.c + eval_error.c + eval_jump.c + eval_safe.c + insns.def : + iseq.c : VM::ISeq + thread.c : + thread_win32.c : + thread_pthread.c : + vm.c + vm_dump.c + vm_eval.c + vm_exec.c + vm_insnhelper.c + vm_method.c + + opt_insns_unif.def : + opt_operand.def : + + -> insn*.inc : + -> opt*.inc : + -> vm.inc : + + () + regex.c + regcomp.c + regenc.c + regerror.c + regexec.c + regparse.c + regsyntax.c + + + + debug.c : C + dln.c : + st.c : + strftime.c : + util.c : + +Ruby + + dmyext.c + dmydln.c + dmyencoding.c + id.c + inits.c + main.c + ruby.c + version.c + + gem_prelude.rb + prelude.rb + + + + array.c : Array + bignum.c : Bignum + compar.c : Comparable + complex.c : Complex + cont.c : Fiber, Continuation + dir.c : Dir + enum.c : Enumerable + enumerator.c : Enumerator + file.c : File + hash.c : Hash + io.c : IO + marshal.c : Marshal + math.c : Math + numeric.c : Numeric, Integer, Fixnum, Float + pack.c : Array#pack, String#unpack + proc.c : Binding, Proc + process.c : Process + random.c : + range.c : Range + rational.c : Rational + re.c : Regexp, MatchData + signal.c : Signal + sprintf.c : + string.c : String + struct.c : Struct + time.c : Time + + defs/knwon_errors.def : Errno::* + -> known_errors.inc : + + + encoding.c : Encoding + transcode.c : Encoding::Converter + enc/*.c : + enc/trans/* : + +goruby + + goruby.c + golf_prelude.rb : goruby + -> golf_prelude.c : + + +Appendix B. + +CRubyAPI + +** + +VALUE + + Ruby + Cruby.hR + VALUER + + +** + +Qnil + + : nil + +Qtrue + + : true() + +Qfalse + + : false + +** C + +Data_Wrap_Struct(VALUE klass, void (*mark)(), void (*free)(), void *sval) + + CRuby + Rubyfree + Ruby + mark + + +Data_Make_Struct(klass, type, mark, free, sval) + + typemallocsval + + +Data_Get_Struct(data, type, sval) + + datatypesval + +** + +TYPE(value) +FIXNUM_P(value) +NIL_P(value) +void Check_Type(VALUE value, int type) +void Check_SafeStr(VALUE value) + +** + +FIX2INT(value) +FIX2LONG(value) +INT2FIX(i) +NUM2INT(value) +NUM2LONG(value) +INT2NUM(i) +NUM2DBL(value) +rb_float_new(f) +StringValue(value) +StringValuePtr(value) +StringValueCStr(value) +rb_str_new2(s) + +** / + +VALUE rb_define_class(const char *name, VALUE super) + + superRuby + +VALUE rb_define_class_under(VALUE module, const char *name, VALUE super) + + superRubymodule + + +VALUE rb_define_module(const char *name) + + Ruby + +VALUE rb_define_module_under(VALUE module, const char *name) + + Rubymodule + +void rb_include_module(VALUE klass, VALUE module) + + classmodule + () + +void rb_extend_object(VALUE object, VALUE module) + + () + +** + +void rb_define_variable(const char *name, VALUE *var) + + RubyC`$' + nameRuby + (` ')Ruby + + +void rb_define_readonly_variable(const char *name, VALUE *var) + + RubyCread only + read onlyrb_define_variable() + +void rb_define_virtual_variable(const char *name, + VALUE (*getter)(), void (*setter)()) + + Ruby + gettersetter + + +void rb_define_hooked_variable(const char *name, VALUE *var, + VALUE (*getter)(), void (*setter)()) + + hook + getter + settergettersetter0hook + + +void rb_global_variable(VALUE *var) + + GCRuby, Ruby + + +** + +void rb_define_const(VALUE klass, const char *name, VALUE val) + + + +void rb_define_global_const(const char *name, VALUE val) + + + + rb_define_const(rb_cObject, name, val) + + + +** + +rb_define_method(VALUE klass, const char *name, VALUE (*func)(), int argc) + + argcselfargc-1, + (self)1, 2 + (3self)argc-2, + 1self, 2args(argsRuby) + + +rb_define_private_method(VALUE klass, const char *name, VALUE (*func)(), int argc) + + privaterb_define_method() + +rb_define_singleton_method(VALUE klass, const char *name, VALUE (*func)(), int argc) + + rb_define_method() + +rb_scan_args(int argc, VALUE *argv, const char *fmt, ...) + + argc, argvfmt, + , , " + *" 2 "*" + 03 + , + Qnil + + +** Ruby + +VALUE rb_funcall(VALUE recv, ID mid, int narg, ...) + + midrb_intern() + + +VALUE rb_funcall2(VALUE recv, ID mid, int argc, VALUE *argv) + + argc, argv + +VALUE rb_eval_string(const char *str) + + Ruby + +ID rb_intern(const char *name) + + ID + +char *rb_id2name(ID id) + + ID() + +char *rb_class2name(VALUE klass) + + () + , + +int rb_respond_to(VALUE obj, ID id) + + objid + +** + +VALUE rb_iv_get(VALUE obj, const char *name) + + obj`@' + Ruby + ( + ) + +VALUE rb_iv_set(VALUE obj, const char *name, VALUE val) + + objval + +** + +VALUE rb_block_call(VALUE obj, ID mid, int argc, VALUE * argv, + VALUE (*func) (ANYARGS), VALUE data2) + + func, obj, argcargv + mid. funcyield + , data2, , argcargv. + +[OBSOLETE] VALUE rb_iterate(VALUE (*func1)(), VALUE arg1, VALUE (*func2)(), VALUE arg2) + + func2, func1 + func1 arg1, func21 + , 2arg2 + + 1.9rb_iterate, func1Ruby + . + 1.9obsolete. rb_block_call. + +VALUE rb_yield(VALUE val) + + val + +VALUE rb_rescue(VALUE (*func1)(), VALUE arg1, VALUE (*func2)(), VALUE arg2) + + func1arg1func1 + func2arg2 + func1, func2 + + +VALUE rb_ensure(VALUE (*func1)(), VALUE arg1, void (*func2)(), VALUE arg2) + + func1arg1, ( + ) func2arg2func1 + () + +** + +void rb_warning(const char *fmt, ...) + + rb_verbose + printf() + +void rb_raise(rb_eRuntimeError, const char *fmt, ...) + + RuntimeErrorprintf() + +void rb_raise(VALUE exception, const char *fmt, ...) + + exceptionfmt + printf() + +void rb_fatal(const char *fmt, ...) + + , + (ensure + ) + +void rb_bug(const char *fmt, ...) + + + + + +** Ruby + +Ruby + + +void ruby_init() + + Ruby + +void ruby_options(int argc, char **argv) + + Ruby + +void ruby_run() + + Ruby + +void ruby_script(char *name) + + Ruby($0) + +** + + void rb_add_event_hook(rb_event_hook_func_t func, rb_event_t events) + + +eventsor: + + RUBY_EVENT_LINE + RUBY_EVENT_CLASS + RUBY_EVENT_END + RUBY_EVENT_CALL + RUBY_EVENT_RETURN + RUBY_EVENT_C_CALL + RUBY_EVENT_C_RETURN + RUBY_EVENT_RAISE + RUBY_EVENT_ALL + +rb_event_hook_func_t: + + typedef void (*rb_event_hook_func_t)(rb_event_t event, NODE *node, + VALUE self, ID id, VALUE klass) + + int rb_remove_event_hook(rb_event_hook_func_t func) + + + + +Appendix C. extconf.rb + +extconf.rb + + +have_macro(macro, headers) + + headermacro + true + + +have_library(lib, func) + + funclib + true + +find_library(lib, func, path...) + + funclib -Lpath + true + +have_func(func, header) + + headerfunc + func + have_library + true + +have_var(var, header) + + headervar + var + have_library + true + +have_header(header) + + + true + +find_header(header, path...) + + header -Ipath + true + +have_struct_member(type, member, header) + + headertypemember + typemember + true + +have_type(type, header, opt) + + headertype + typetrue + +check_sizeof(type, header) + + headertypechar + type + nil + +create_makefile(target) + + Makefile + target + + +find_executable(command, path) + + commandFile::PATH_SEPARATOR + pathpathnil + PATH + nil + +with_config(withval[, default=nil]) + + --with- + + +enable_config(config, *defaults) +disable_config(config, *defaults) + + --enable- + --disable- + --enable-true + --disable-false + + *defaultsyield + *defaults + +dir_config(target[, default_dir]) +dir_config(target[, default_include, default_lib]) + + --with--dir, --with--include, + --with--lib + $CFLAGS $LDFLAGS --with--dir=/path + --with--include=/path/include --with--lib=/path/lib + include lib + ([include_dir, lib_dir]) + +pkg_config(pkg) + + pkg-configpkg + pkg-config--with-pkg-config + + +/* + * Local variables: + * fill-column: 60 + * end: + */ diff --git a/README.ja b/README.ja new file mode 100644 index 0000000..aeb1707 --- /dev/null +++ b/README.ja @@ -0,0 +1,160 @@ +* Ruby + +Ruby +Ruby + + + +RubyPerl + + + + +* Ruby + + + + + () + + (Mixin, ) + + + + + + + + + + () + + Unix-like/POSIX + Windows Mac OS XBeOS + cf. http://redmine.ruby-lang.org/wiki/ruby-19/SupportedPlatformsJa + +* + +** FTP + + + + ftp://ftp.ruby-lang.org/pub/ruby/ + +** Subversion + + + + $ svn co http://svn.ruby-lang.org/repos/ruby/trunk/ ruby + + + + $ svn ls http://svn.ruby-lang.org/repos/ruby/branches/ + + +* + +RubyURL + + http://www.ruby-lang.org/ + + + + +* + +Ruby + + ruby-list-ctl@ruby-lang.org + + + + subscribe YourFirstName YourFamilyName + + + +Rubyruby + + + + ruby-dev-ctl@ruby-lang.org + +ruby-list + +Rubyruby-ext +ruby-math +ruby-talk + + + +* + + + + 1. configure + configure.inautoconf + configure + + 2. configureMakefile + + C + configure optflags=.. warnflags=.. + + + 3. ()defines.h + + + + 4. ()ext/Setup + + + ext/Setup + + + Setup1option nodynamic + + + + + 5. make + + 6. make test + + test succeeded + + + 7. make install + + root + + +OS + + + +* + +UNIXconfigure +() + + +GCRubyGC +setjmp() jmp_buf +jmp_buf32bit + +gc.c + +defined(THINK_C) + + +# RubyThink C + +CPU + + + + +* + +COPYING.ja + + +* + + matz@netlab.jp +------------------------------------------------------- +created at: Thu Aug 3 11:57:36 JST 1995 +Local variables: +mode: indented-text +end: diff --git a/ToDo b/ToDo new file mode 100644 index 0000000..7e5ef52 --- /dev/null +++ b/ToDo @@ -0,0 +1,124 @@ +Language Spec. + +- Class#allocate - basicNew +- class Foo::Bar value hash in the form of {symbol: value, ...} ?? +* operator !! for rescue. ??? +* objectify characters +* ../... outside condition invokes operator method too. +* ... inside condition turns off just before right condition.??? +* package or access control for global variables?? +* named arguments like foo(nation:="german") or foo(nation: "german"). +* method to retrieve argument information (needs new C API) +* multiple return values, yield values. maybe incompatible ??? +* cascading method invocation ??? +* def Class#method .. end ?? +* def Foo::Bar::baz() .. end ?? +* I18N (or M17N) script/string/regexp +* discourage use of symbol variables (e.g. $/, etc.) in manual +* discourage use of Perlish features by giving warnings. +* non confusing in-block local variable (is it possible?) + + remove scope by block + + variables appears within block may have independent values. +* Regexp: make /o thread safe. +* decide whether begin with rescue or ensure make do..while loop. +* unify == and eql? again +* to_i returns nil if str contains no digit. +* jar like combined library package. -> RubyGems? +* method combination, e.g. before, after, around, etc. +* .. or something like defadvice in Emacs. +* property - for methods, or for objects in general. +* "in" modifier, to annotate, or to encourage assertion. +* selector namespace - something like generic-flet in CLOS, to help RubyBehavior +* private instance variable (as in Python?) @_foo in class Foo => @_Foo_foo +* warn/error "bare word" method, like "foo", you should type "foo()" + +Hacking Interpreter + +- generational GC +* non-blocking open (e.g. for named pipe) for thread +* avoid blocking with gethostbyname/gethostbyaddr (use fork ???) +* objectify interpreters ??? +* remove rb_eval() recursions +* syntax tree -> bytecode ??? +* scrambled script, or script filter +* setuid ruby +* performance tune for in-block (dynamic) local variables. +* give warnings to assign magic variables. +* export rb_io_{addstr,printf,puts,print} +* autoload should work with threads [ruby-talk:4589] +* remove stdio dependency from IOs. +* warn for inconsistent local variable usage (lv m and method m at the same time). +* MicroRuby +* Built-in Interactive Ruby. +* Parser API +* trap every method invocation, which can be enabled by e.g. trap_call :method. +* unify Errno exceptions of same errno, or new exception comparison scheme. +* 2.times{|i| if i==0 then a = 15 else puts eval("a") end} should print nil. +* Thread#max_stack_size attribute (possible??) + +Standard Libraries + +- Module#define_method which takes a name and a body (block, proc or method). +- Enume#inject +- Array#fetch +- IO::for_fd +- Process::waitall [ruby-talk:4557] +- Process::Status +- File::lchown, File::lchmod; xxx - still need work for non existing platforms +- move Time::times to Process. +- Enumerable#sort_by for Schwartzian transformation +- fork_and_kill_other_threads. +- signal list (Signal::trap, Signal::list). +- move NameError under StandardError. +- Integer#to_s(base) +- Hash::new{default} +- hash etc. should handle self referenceing array/hash +- Array#select(n1,n2...) works like Array#indexes(n1,n2...) +- use Mersenne Twister RNG for random. +- deprecate Array#indexes, and Array#indices. +- remove dependency on MAXPATHLEN. +- String#scanf(?) +* Object#fmt(?) +* Time::strptime +* Integer[num], Float[num]; Fixnum[num]? +* method to retrieve non-number trailer for to_i/to_f. +* Stream or Port, abstract superclass of IO ? +* String#{pred,prev}, String#downto +* optional stepsize argument for succ() +* Ruby module -- Ruby::Version, Ruby::Interpreter +* introduce Boolean class; super of TrueClass, FalseClass +* synchronized method - synchronized{...}, synchronized :foo, :bar +* Array#&, Array#| to allow duplication. ??? +* way to specify immortal (fork endurance) thread; +* or raise ForkException to every thread but fork caller. +* new user-defined marshal scheme. _dump(dumper), _load(restorer) +* library to load per-user profile seeking .ruby_profile or ruby.ini file. +* warning framework (warn, warning for Ruby level) +* marshal should not depend on sprintf (works bad with locale). +* ternary arg pow: a.pow(b,c) == a**b%c +* new caller(), e.g. call_stack; needs better name. +* pointer share mechanism similar to one in String for Array. +* require "1.6" etc. by /usr/lib/ruby/1.6/1.6.rb ;-) +* save both "feature names" and "normalized path" in $" +* implement Mutex_m (or MutexMixin) using Mutex. + +Extension Libraries + +* ptk.rb pTk wrapper that is compatible to tk.rb +* Berkeley DB extension +* BitVector +* thread-safe fcgi + +Ruby Libraries + +- urllib.rb, nttplib.rb, etc. +* format like perl's + +Tools + +* freeze or undump to bundle everything +* bundle using zlib diff --git a/array.c b/array.c new file mode 100644 index 0000000..e2361ce --- /dev/null +++ b/array.c @@ -0,0 +1,3893 @@ +/********************************************************************** + + array.c - + + $Author: yugui $ + created at: Fri Aug 6 09:46:12 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/util.h" +#include "ruby/st.h" + +#ifndef ARRAY_DEBUG +# define NDEBUG +#endif +#include + +VALUE rb_cArray; + +static ID id_cmp; + +#define ARY_DEFAULT_SIZE 16 +#define ARY_MAX_SIZE (LONG_MAX / sizeof(VALUE)) + +void +rb_mem_clear(register VALUE *mem, register long size) +{ + while (size--) { + *mem++ = Qnil; + } +} + +static inline void +memfill(register VALUE *mem, register long size, register VALUE val) +{ + while (size--) { + *mem++ = val; + } +} + +# define ARY_SHARED_P(ary) \ + (assert(!FL_TEST(ary, ELTS_SHARED) || !FL_TEST(ary, RARRAY_EMBED_FLAG)), \ + FL_TEST(ary,ELTS_SHARED)) +# define ARY_EMBED_P(ary) \ + (assert(!FL_TEST(ary, ELTS_SHARED) || !FL_TEST(ary, RARRAY_EMBED_FLAG)), \ + FL_TEST(ary, RARRAY_EMBED_FLAG)) + +#define ARY_HEAP_PTR(a) (assert(!ARY_EMBED_P(a)), RARRAY(a)->as.heap.ptr) +#define ARY_HEAP_LEN(a) (assert(!ARY_EMBED_P(a)), RARRAY(a)->as.heap.len) +#define ARY_EMBED_PTR(a) (assert(ARY_EMBED_P(a)), RARRAY(a)->as.ary) +#define ARY_EMBED_LEN(a) \ + (assert(ARY_EMBED_P(a)), \ + (long)((RBASIC(a)->flags >> RARRAY_EMBED_LEN_SHIFT) & \ + (RARRAY_EMBED_LEN_MASK >> RARRAY_EMBED_LEN_SHIFT))) + +#define ARY_OWNS_HEAP_P(a) (!FL_TEST(a, ELTS_SHARED|RARRAY_EMBED_FLAG)) +#define FL_SET_EMBED(a) do { \ + assert(!ARY_SHARED_P(a)); \ + assert(!OBJ_FROZEN(a)); \ + FL_SET(a, RARRAY_EMBED_FLAG); \ +} while (0) +#define FL_UNSET_EMBED(ary) FL_UNSET(ary, RARRAY_EMBED_FLAG|RARRAY_EMBED_LEN_MASK) +#define FL_SET_SHARED(ary) do { \ + assert(!ARY_EMBED_P(ary)); \ + FL_SET(ary, ELTS_SHARED); \ +} while (0) +#define FL_UNSET_SHARED(ary) FL_UNSET(ary, ELTS_SHARED) + +#define ARY_SET_PTR(ary, p) do { \ + assert(!ARY_EMBED_P(ary)); \ + assert(!OBJ_FROZEN(ary)); \ + RARRAY(ary)->as.heap.ptr = (p); \ +} while (0) +#define ARY_SET_EMBED_LEN(ary, n) do { \ + long tmp_n = n; \ + assert(ARY_EMBED_P(ary)); \ + assert(!OBJ_FROZEN(ary)); \ + RBASIC(ary)->flags &= ~RARRAY_EMBED_LEN_MASK; \ + RBASIC(ary)->flags |= (tmp_n) << RARRAY_EMBED_LEN_SHIFT; \ +} while (0) +#define ARY_SET_HEAP_LEN(ary, n) do { \ + assert(!ARY_EMBED_P(ary)); \ + RARRAY(ary)->as.heap.len = n; \ +} while (0) +#define ARY_SET_LEN(ary, n) do { \ + if (ARY_EMBED_P(ary)) { \ + ARY_SET_EMBED_LEN(ary, n); \ + } \ + else { \ + ARY_SET_HEAP_LEN(ary, n); \ + } \ + assert(RARRAY_LEN(ary) == n); \ +} while (0) +#define ARY_INCREASE_PTR(ary, n) do { \ + assert(!ARY_EMBED_P(ary)); \ + assert(!OBJ_FROZEN(ary)); \ + RARRAY(ary)->as.heap.ptr += n; \ +} while (0) +#define ARY_INCREASE_LEN(ary, n) do { \ + assert(!OBJ_FROZEN(ary)); \ + if (ARY_EMBED_P(ary)) { \ + ARY_SET_EMBED_LEN(ary, RARRAY_LEN(ary)+n); \ + } \ + else { \ + RARRAY(ary)->as.heap.len += n; \ + } \ +} while (0) + +#define ARY_CAPA(ary) (ARY_EMBED_P(ary) ? RARRAY_EMBED_LEN_MAX : \ + ARY_SHARED_ROOT_P(ary) ? RARRAY_LEN(ary) : RARRAY(ary)->as.heap.aux.capa) +#define ARY_SET_CAPA(ary, n) do { \ + assert(!ARY_EMBED_P(ary)); \ + assert(!ARY_SHARED_P(ary)); \ + assert(!OBJ_FROZEN(ary)); \ + RARRAY(ary)->as.heap.aux.capa = (n); \ +} while (0) + +#define ARY_SHARED(ary) (assert(ARY_SHARED_P(ary)), RARRAY(ary)->as.heap.aux.shared) +#define ARY_SET_SHARED(ary, value) do { \ + assert(!ARY_EMBED_P(ary)); \ + assert(ARY_SHARED_P(ary)); \ + assert(ARY_SHARED_ROOT_P(value)); \ + RARRAY(ary)->as.heap.aux.shared = (value); \ +} while (0) +#define RARRAY_SHARED_ROOT_FLAG FL_USER5 +#define ARY_SHARED_ROOT_P(ary) (FL_TEST(ary, RARRAY_SHARED_ROOT_FLAG)) +#define ARY_SHARED_NUM(ary) \ + (assert(ARY_SHARED_ROOT_P(ary)), RARRAY(ary)->as.heap.aux.capa) +#define ARY_SET_SHARED_NUM(ary, value) do { \ + assert(ARY_SHARED_ROOT_P(ary)); \ + RARRAY(ary)->as.heap.aux.capa = (value); \ +} while (0) +#define FL_SET_SHARED_ROOT(ary) do { \ + assert(!ARY_EMBED_P(ary)); \ + FL_SET(ary, RARRAY_SHARED_ROOT_FLAG); \ +} while (0) + +static void +ary_resize_capa(VALUE ary, long capacity) +{ + assert(RARRAY_LEN(ary) <= capacity); + assert(!OBJ_FROZEN(ary)); + assert(!ARY_SHARED_P(ary)); + if (capacity > RARRAY_EMBED_LEN_MAX) { + if (ARY_EMBED_P(ary)) { + long len = ARY_EMBED_LEN(ary); + VALUE *ptr = ALLOC_N(VALUE, (capacity)); + MEMCPY(ptr, ARY_EMBED_PTR(ary), VALUE, len); + FL_UNSET_EMBED(ary); + ARY_SET_PTR(ary, ptr); + ARY_SET_HEAP_LEN(ary, len); + } + else { + REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, (capacity)); + } + ARY_SET_CAPA(ary, (capacity)); + } + else { + if (!ARY_EMBED_P(ary)) { + long len = RARRAY_LEN(ary); + VALUE *ptr = RARRAY_PTR(ary); + MEMCPY(RARRAY(ary)->as.ary, ptr, VALUE, len); + FL_SET_EMBED(ary); + ARY_SET_LEN(ary, len); + xfree(ptr); + } + } +} + +static void +rb_ary_decrement_share(VALUE shared) +{ + if (shared) { + int num = ARY_SHARED_NUM(shared) - 1; + if (num == 0) { + rb_ary_free(shared); + rb_gc_force_recycle(shared); + } + else if (num > 0) { + ARY_SET_SHARED_NUM(shared, num); + } + } +} + +static void +rb_ary_unshare(VALUE ary) +{ + VALUE shared = RARRAY(ary)->as.heap.aux.shared; + rb_ary_decrement_share(shared); + FL_UNSET_SHARED(ary); +} + +static inline void +rb_ary_unshare_safe(VALUE ary) { + if (ARY_SHARED_P(ary) && !ARY_EMBED_P(ary)) { + rb_ary_unshare(ary); + } +} + +static VALUE +rb_ary_increment_share(VALUE shared) { + int num = ARY_SHARED_NUM(shared); + if (num >= 0) { + ARY_SET_SHARED_NUM(shared, num + 1); + } + return shared; +} + +static void +rb_ary_set_shared(VALUE ary, VALUE shared) +{ + rb_ary_increment_share(shared); + FL_SET_SHARED(ary); + ARY_SET_SHARED(ary, shared); +} + +static inline void +rb_ary_modify_check(VALUE ary) +{ + if (OBJ_FROZEN(ary)) rb_error_frozen("array"); + if (!OBJ_UNTRUSTED(ary) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify array"); +} + +static void +rb_ary_modify(VALUE ary) +{ + rb_ary_modify_check(ary); + if (ARY_SHARED_P(ary)) { + long len = RARRAY_LEN(ary); + if (len <= RARRAY_EMBED_LEN_MAX) { + VALUE *ptr = ARY_HEAP_PTR(ary); + VALUE shared = ARY_SHARED(ary); + FL_UNSET_SHARED(ary); + FL_SET_EMBED(ary); + MEMCPY(ARY_EMBED_PTR(ary), ptr, VALUE, len); + rb_ary_decrement_share(shared); + ARY_SET_EMBED_LEN(ary, len); + } + else { + VALUE *ptr = ALLOC_N(VALUE, len); + MEMCPY(ptr, RARRAY_PTR(ary), VALUE, len); + rb_ary_unshare(ary); + ARY_SET_CAPA(ary, len); + ARY_SET_PTR(ary, ptr); + } + } +} + +VALUE +rb_ary_freeze(VALUE ary) +{ + return rb_obj_freeze(ary); +} + +/* + * call-seq: + * array.frozen? -> true or false + * + * Return true if this array is frozen (or temporarily frozen + * while being sorted). + */ + +static VALUE +rb_ary_frozen_p(VALUE ary) +{ + if (OBJ_FROZEN(ary)) return Qtrue; + return Qfalse; +} + +static VALUE +ary_alloc(VALUE klass) +{ + NEWOBJ(ary, struct RArray); + OBJSETUP(ary, klass, T_ARRAY); + FL_SET_EMBED((VALUE)ary); + ARY_SET_EMBED_LEN((VALUE)ary, 0); + + return (VALUE)ary; +} + +static VALUE +ary_new(VALUE klass, long len) +{ + VALUE ary; + + if (len < 0) { + rb_raise(rb_eArgError, "negative array size (or size too big)"); + } + if (len > ARY_MAX_SIZE) { + rb_raise(rb_eArgError, "array size too big"); + } + ary = ary_alloc(klass); + if (len > RARRAY_EMBED_LEN_MAX) { + FL_UNSET_EMBED(ary); + ARY_SET_PTR(ary, ALLOC_N(VALUE, len)); + ARY_SET_CAPA(ary, len); + ARY_SET_HEAP_LEN(ary, 0); + } + + return ary; +} + +VALUE +rb_ary_new2(long len) +{ + return ary_new(rb_cArray, len); +} + + +VALUE +rb_ary_new(void) +{ + return rb_ary_new2(RARRAY_EMBED_LEN_MAX); +} + +#include + +VALUE +rb_ary_new3(long n, ...) +{ + va_list ar; + VALUE ary; + long i; + + ary = rb_ary_new2(n); + + va_start(ar, n); + for (i=0; i 0 && elts) { + MEMCPY(RARRAY_PTR(ary), elts, VALUE, n); + ARY_SET_LEN(ary, n); + } + + return ary; +} + +VALUE +rb_ary_tmp_new(long len) +{ + return ary_new(0, len); +} + +void +rb_ary_free(VALUE ary) +{ + if (ARY_OWNS_HEAP_P(ary)) { + xfree(RARRAY_PTR(ary)); + } +} + +static VALUE +ary_make_shared(VALUE ary) +{ + assert(!ARY_EMBED_P(ary)); + if (ARY_SHARED_P(ary)) { + return ARY_SHARED(ary); + } + else { + NEWOBJ(shared, struct RArray); + OBJSETUP(shared, 0, T_ARRAY); + FL_UNSET_EMBED(shared); + + ARY_SET_LEN((VALUE)shared, RARRAY_LEN(ary)); + ARY_SET_PTR((VALUE)shared, RARRAY_PTR(ary)); + FL_SET_SHARED_ROOT(shared); + ARY_SET_SHARED_NUM((VALUE)shared, 1); + FL_SET_SHARED(ary); + ARY_SET_SHARED(ary, (VALUE)shared); + OBJ_FREEZE(shared); + return (VALUE)shared; + } +} + + +static VALUE +ary_make_substitution(VALUE ary) +{ + if (RARRAY_LEN(ary) <= RARRAY_EMBED_LEN_MAX) { + VALUE subst = rb_ary_new2(RARRAY_LEN(ary)); + MEMCPY(ARY_EMBED_PTR(subst), RARRAY_PTR(ary), VALUE, RARRAY_LEN(ary)); + ARY_SET_EMBED_LEN(subst, RARRAY_LEN(ary)); + return subst; + } + else { + return rb_ary_increment_share(ary_make_shared(ary)); + } +} + +VALUE +rb_assoc_new(VALUE car, VALUE cdr) +{ + return rb_ary_new3(2, car, cdr); +} + +static VALUE +to_ary(VALUE ary) +{ + return rb_convert_type(ary, T_ARRAY, "Array", "to_ary"); +} + +VALUE +rb_check_array_type(VALUE ary) +{ + return rb_check_convert_type(ary, T_ARRAY, "Array", "to_ary"); +} + +/* + * call-seq: + * Array.try_convert(obj) -> array or nil + * + * Try to convert obj into an array, using to_ary method. + * Returns converted array or nil if obj cannot be converted + * for any reason. This method is to check if an argument is an + * array. + * + * Array.try_convert([1]) # => [1] + * Array.try_convert("1") # => nil + * + * if tmp = Array.try_convert(arg) + * # the argument is an array + * elsif tmp = String.try_convert(arg) + * # the argument is a string + * end + * + */ + +static VALUE +rb_ary_s_try_convert(VALUE dummy, VALUE ary) +{ + return rb_check_array_type(ary); +} + +/* + * call-seq: + * Array.new(size=0, obj=nil) + * Array.new(array) + * Array.new(size) {|index| block } + * + * Returns a new array. In the first form, the new array is + * empty. In the second it is created with _size_ copies of _obj_ + * (that is, _size_ references to the same + * _obj_). The third form creates a copy of the array + * passed as a parameter (the array is generated by calling + * to_ary on the parameter). In the last form, an array + * of the given size is created. Each element in this array is + * calculated by passing the element's index to the given block and + * storing the return value. + * + * Array.new + * Array.new(2) + * Array.new(5, "A") + * + * # only one copy of the object is created + * a = Array.new(2, Hash.new) + * a[0]['cat'] = 'feline' + * a + * a[1]['cat'] = 'Felix' + * a + * + * # here multiple copies are created + * a = Array.new(2) { Hash.new } + * a[0]['cat'] = 'feline' + * a + * + * squares = Array.new(5) {|i| i*i} + * squares + * + * copy = Array.new(squares) + */ + +static VALUE +rb_ary_initialize(int argc, VALUE *argv, VALUE ary) +{ + long len; + VALUE size, val; + + rb_ary_modify(ary); + if (argc == 0) { + if (ARY_OWNS_HEAP_P(ary) && RARRAY_PTR(ary)) { + xfree(RARRAY_PTR(ary)); + } + rb_ary_unshare_safe(ary); + FL_SET_EMBED(ary); + ARY_SET_EMBED_LEN(ary, 0); + if (rb_block_given_p()) { + rb_warning("given block not used"); + } + return ary; + } + rb_scan_args(argc, argv, "02", &size, &val); + if (argc == 1 && !FIXNUM_P(size)) { + val = rb_check_array_type(size); + if (!NIL_P(val)) { + rb_ary_replace(ary, val); + return ary; + } + } + + len = NUM2LONG(size); + if (len < 0) { + rb_raise(rb_eArgError, "negative array size"); + } + if (len > ARY_MAX_SIZE) { + rb_raise(rb_eArgError, "array size too big"); + } + rb_ary_modify(ary); + ary_resize_capa(ary, len); + if (rb_block_given_p()) { + long i; + + if (argc == 2) { + rb_warn("block supersedes default value argument"); + } + for (i=0; i 0 && argv) { + MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc); + ARY_SET_LEN(ary, argc); + } + + return ary; +} + +void +rb_ary_store(VALUE ary, long idx, VALUE val) +{ + if (idx < 0) { + idx += RARRAY_LEN(ary); + if (idx < 0) { + rb_raise(rb_eIndexError, "index %ld out of array", + idx - RARRAY_LEN(ary)); + } + } + else if (idx >= ARY_MAX_SIZE) { + rb_raise(rb_eIndexError, "index %ld too big", idx); + } + + rb_ary_modify(ary); + if (idx >= ARY_CAPA(ary)) { + long new_capa = ARY_CAPA(ary) / 2; + + if (new_capa < ARY_DEFAULT_SIZE) { + new_capa = ARY_DEFAULT_SIZE; + } + if (new_capa >= ARY_MAX_SIZE - idx) { + new_capa = (ARY_MAX_SIZE - idx) / 2; + } + new_capa += idx; + ary_resize_capa(ary, new_capa); + } + if (idx > RARRAY_LEN(ary)) { + rb_mem_clear(RARRAY_PTR(ary) + RARRAY_LEN(ary), + idx-RARRAY_LEN(ary) + 1); + } + + if (idx >= RARRAY_LEN(ary)) { + ARY_SET_LEN(ary, idx + 1); + } + RARRAY_PTR(ary)[idx] = val; +} + +static VALUE +ary_make_partial(VALUE ary, VALUE klass, long offset, long len) +{ + assert(offset >= 0); + assert(len >= 0); + assert(offset+len <= RARRAY_LEN(ary)); + + if (len <= RARRAY_EMBED_LEN_MAX) { + VALUE result = ary_alloc(klass); + MEMCPY(ARY_EMBED_PTR(result), RARRAY_PTR(ary) + offset, VALUE, len); + ARY_SET_EMBED_LEN(result, len); + return result; + } + else { + VALUE shared, result = ary_alloc(klass); + FL_UNSET_EMBED(result); + + shared = ary_make_shared(ary); + ARY_SET_PTR(result, RARRAY_PTR(ary)); + ARY_SET_LEN(result, RARRAY_LEN(ary)); + rb_ary_set_shared(result, shared); + + ARY_INCREASE_PTR(result, offset); + ARY_SET_LEN(result, len); + return result; + } +} + +static VALUE +ary_make_shared_copy(VALUE ary) +{ + return ary_make_partial(ary, rb_obj_class(ary), 0, RARRAY_LEN(ary)); +} + +enum ary_take_pos_flags +{ + ARY_TAKE_FIRST = 0, + ARY_TAKE_LAST = 1 +}; + +static VALUE +ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags last) +{ + VALUE nv; + long n; + long offset = 0; + + rb_scan_args(argc, argv, "1", &nv); + n = NUM2LONG(nv); + if (n > RARRAY_LEN(ary)) { + n = RARRAY_LEN(ary); + } + else if (n < 0) { + rb_raise(rb_eArgError, "negative array size"); + } + if (last) { + offset = RARRAY_LEN(ary) - n; + } + return ary_make_partial(ary, rb_cArray, offset, n); +} + +/* + * call-seq: + * array << obj -> array + * + * Append---Pushes the given object on to the end of this array. This + * expression returns the array itself, so several appends + * may be chained together. + * + * [ 1, 2 ] << "c" << "d" << [ 3, 4 ] + * #=> [ 1, 2, "c", "d", [ 3, 4 ] ] + * + */ + +VALUE +rb_ary_push(VALUE ary, VALUE item) +{ + rb_ary_store(ary, RARRAY_LEN(ary), item); + return ary; +} + +/* + * call-seq: + * array.push(obj, ... ) -> array + * + * Append---Pushes the given object(s) on to the end of this array. This + * expression returns the array itself, so several appends + * may be chained together. + * + * a = [ "a", "b", "c" ] + * a.push("d", "e", "f") + * #=> ["a", "b", "c", "d", "e", "f"] + */ + +static VALUE +rb_ary_push_m(int argc, VALUE *argv, VALUE ary) +{ + rb_ary_modify_check(ary); + while (argc--) { + rb_ary_push(ary, *argv++); + } + return ary; +} + +VALUE +rb_ary_pop(VALUE ary) +{ + long n; + rb_ary_modify_check(ary); + if (RARRAY_LEN(ary) == 0) return Qnil; + if (ARY_OWNS_HEAP_P(ary) && + RARRAY_LEN(ary) * 3 < ARY_CAPA(ary) && + ARY_CAPA(ary) > ARY_DEFAULT_SIZE) + { + ary_resize_capa(ary, RARRAY_LEN(ary) * 2); + } + n = RARRAY_LEN(ary)-1; + ARY_SET_LEN(ary, n); + return RARRAY_PTR(ary)[n]; +} + +/* + * call-seq: + * array.pop -> obj or nil + * array.pop(n) -> array + * + * Removes the last element from self and returns it, or + * nil if the array is empty. + * + * If a number _n_ is given, returns an array of the last n elements + * (or less) just like array.slice!(-n, n) does. + * + * a = [ "a", "b", "c", "d" ] + * a.pop #=> "d" + * a.pop(2) #=> ["b", "c"] + * a #=> ["a"] + */ + +static VALUE +rb_ary_pop_m(int argc, VALUE *argv, VALUE ary) +{ + VALUE result; + + if (argc == 0) { + return rb_ary_pop(ary); + } + + rb_ary_modify_check(ary); + result = ary_take_first_or_last(argc, argv, ary, ARY_TAKE_LAST); + ARY_INCREASE_LEN(ary, -RARRAY_LEN(result)); + return result; +} + +VALUE +rb_ary_shift(VALUE ary) +{ + VALUE top; + + rb_ary_modify_check(ary); + if (RARRAY_LEN(ary) == 0) return Qnil; + top = RARRAY_PTR(ary)[0]; + if (!ARY_SHARED_P(ary)) { + if (RARRAY_LEN(ary) < ARY_DEFAULT_SIZE) { + MEMMOVE(RARRAY_PTR(ary), RARRAY_PTR(ary)+1, VALUE, RARRAY_LEN(ary)-1); + ARY_INCREASE_LEN(ary, -1); + return top; + } + assert(!ARY_EMBED_P(ary)); /* ARY_EMBED_LEN_MAX < ARY_DEFAULT_SIZE */ + + RARRAY_PTR(ary)[0] = Qnil; + ary_make_shared(ary); + } + ARY_INCREASE_PTR(ary, 1); /* shift ptr */ + ARY_INCREASE_LEN(ary, -1); + + return top; +} + +/* + * call-seq: + * array.shift -> obj or nil + * array.shift(n) -> array + * + * Returns the first element of self and removes it (shifting all + * other elements down by one). Returns nil if the array + * is empty. + * + * If a number _n_ is given, returns an array of the first n elements + * (or less) just like array.slice!(0, n) does. + * + * args = [ "-m", "-q", "filename" ] + * args.shift #=> "-m" + * args #=> ["-q", "filename"] + * + * args = [ "-m", "-q", "filename" ] + * args.shift(2) #=> ["-m", "-q"] + * args #=> ["filename"] + */ + +static VALUE +rb_ary_shift_m(int argc, VALUE *argv, VALUE ary) +{ + VALUE result; + long n; + + if (argc == 0) { + return rb_ary_shift(ary); + } + + rb_ary_modify_check(ary); + result = ary_take_first_or_last(argc, argv, ary, ARY_TAKE_FIRST); + n = RARRAY_LEN(result); + if (ARY_SHARED_P(ary)) { + ARY_INCREASE_PTR(ary, n); + } + else { + MEMMOVE(RARRAY_PTR(ary), RARRAY_PTR(ary)+n, VALUE, RARRAY_LEN(ary)-n); + } + ARY_INCREASE_LEN(ary, -n); + + return result; +} + +/* + * call-seq: + * array.unshift(obj, ...) -> array + * + * Prepends objects to the front of array. + * other elements up one. + * + * a = [ "b", "c", "d" ] + * a.unshift("a") #=> ["a", "b", "c", "d"] + * a.unshift(1, 2) #=> [ 1, 2, "a", "b", "c", "d"] + */ + +static VALUE +rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary) +{ + long len; + + if (argc == 0) return ary; + rb_ary_modify(ary); + if (ARY_CAPA(ary) <= (len = RARRAY_LEN(ary)) + argc) { + ary_resize_capa(ary, len + argc + ARY_DEFAULT_SIZE); + } + + /* sliding items */ + MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len); + MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc); + ARY_INCREASE_LEN(ary, argc); + + return ary; +} + +VALUE +rb_ary_unshift(VALUE ary, VALUE item) +{ + return rb_ary_unshift_m(1,&item,ary); +} + +/* faster version - use this if you don't need to treat negative offset */ +static inline VALUE +rb_ary_elt(VALUE ary, long offset) +{ + if (RARRAY_LEN(ary) == 0) return Qnil; + if (offset < 0 || RARRAY_LEN(ary) <= offset) { + return Qnil; + } + return RARRAY_PTR(ary)[offset]; +} + +VALUE +rb_ary_entry(VALUE ary, long offset) +{ + if (offset < 0) { + offset += RARRAY_LEN(ary); + } + return rb_ary_elt(ary, offset); +} + +VALUE +rb_ary_subseq(VALUE ary, long beg, long len) +{ + VALUE klass; + + if (beg > RARRAY_LEN(ary)) return Qnil; + if (beg < 0 || len < 0) return Qnil; + + if (RARRAY_LEN(ary) < len || RARRAY_LEN(ary) < beg + len) { + len = RARRAY_LEN(ary) - beg; + } + klass = rb_obj_class(ary); + if (len == 0) return ary_new(klass, 0); + + return ary_make_partial(ary, klass, beg, len); +} + +/* + * call-seq: + * array[index] -> obj or nil + * array[start, length] -> an_array or nil + * array[range] -> an_array or nil + * array.slice(index) -> obj or nil + * array.slice(start, length) -> an_array or nil + * array.slice(range) -> an_array or nil + * + * Element Reference---Returns the element at _index_, + * or returns a subarray starting at _start_ and + * continuing for _length_ elements, or returns a subarray + * specified by _range_. + * Negative indices count backward from the end of the + * array (-1 is the last element). Returns nil if the index + * (or starting index) are out of range. + * + * a = [ "a", "b", "c", "d", "e" ] + * a[2] + a[0] + a[1] #=> "cab" + * a[6] #=> nil + * a[1, 2] #=> [ "b", "c" ] + * a[1..3] #=> [ "b", "c", "d" ] + * a[4..7] #=> [ "e" ] + * a[6..10] #=> nil + * a[-3, 3] #=> [ "c", "d", "e" ] + * # special cases + * a[5] #=> nil + * a[5, 1] #=> [] + * a[5..10] #=> [] + * + */ + +VALUE +rb_ary_aref(int argc, VALUE *argv, VALUE ary) +{ + VALUE arg; + long beg, len; + + if (argc == 2) { + beg = NUM2LONG(argv[0]); + len = NUM2LONG(argv[1]); + if (beg < 0) { + beg += RARRAY_LEN(ary); + } + return rb_ary_subseq(ary, beg, len); + } + if (argc != 1) { + rb_scan_args(argc, argv, "11", 0, 0); + } + arg = argv[0]; + /* special case - speeding up */ + if (FIXNUM_P(arg)) { + return rb_ary_entry(ary, FIX2LONG(arg)); + } + /* check if idx is Range */ + switch (rb_range_beg_len(arg, &beg, &len, RARRAY_LEN(ary), 0)) { + case Qfalse: + break; + case Qnil: + return Qnil; + default: + return rb_ary_subseq(ary, beg, len); + } + return rb_ary_entry(ary, NUM2LONG(arg)); +} + +/* + * call-seq: + * array.at(index) -> obj or nil + * + * Returns the element at _index_. A + * negative index counts from the end of _self_. Returns +nil+ + * if the index is out of range. See also Array#[]. + * + * a = [ "a", "b", "c", "d", "e" ] + * a.at(0) #=> "a" + * a.at(-1) #=> "e" + */ + +static VALUE +rb_ary_at(VALUE ary, VALUE pos) +{ + return rb_ary_entry(ary, NUM2LONG(pos)); +} + +/* + * call-seq: + * array.first -> obj or nil + * array.first(n) -> an_array + * + * Returns the first element, or the first +n+ elements, of the array. + * If the array is empty, the first form returns nil, and the + * second form returns an empty array. + * + * a = [ "q", "r", "s", "t" ] + * a.first #=> "q" + * a.first(2) #=> ["q", "r"] + */ + +static VALUE +rb_ary_first(int argc, VALUE *argv, VALUE ary) +{ + if (argc == 0) { + if (RARRAY_LEN(ary) == 0) return Qnil; + return RARRAY_PTR(ary)[0]; + } + else { + return ary_take_first_or_last(argc, argv, ary, ARY_TAKE_FIRST); + } +} + +/* + * call-seq: + * array.last -> obj or nil + * array.last(n) -> an_array + * + * Returns the last element(s) of self. If the array is empty, + * the first form returns nil. + * + * a = [ "w", "x", "y", "z" ] + * a.last #=> "z" + * a.last(2) #=> ["y", "z"] + */ + +VALUE +rb_ary_last(int argc, VALUE *argv, VALUE ary) +{ + if (argc == 0) { + if (RARRAY_LEN(ary) == 0) return Qnil; + return RARRAY_PTR(ary)[RARRAY_LEN(ary)-1]; + } + else { + return ary_take_first_or_last(argc, argv, ary, ARY_TAKE_LAST); + } +} + +/* + * call-seq: + * array.fetch(index) -> obj + * array.fetch(index, default ) -> obj + * array.fetch(index) {|index| block } -> obj + * + * Tries to return the element at position index. If the index + * lies outside the array, the first form throws an + * IndexError exception, the second form returns + * default, and the third form returns the value of invoking + * the block, passing in the index. Negative values of index + * count from the end of the array. + * + * a = [ 11, 22, 33, 44 ] + * a.fetch(1) #=> 22 + * a.fetch(-1) #=> 44 + * a.fetch(4, 'cat') #=> "cat" + * a.fetch(4) { |i| i*i } #=> 16 + */ + +static VALUE +rb_ary_fetch(int argc, VALUE *argv, VALUE ary) +{ + VALUE pos, ifnone; + long block_given; + long idx; + + rb_scan_args(argc, argv, "11", &pos, &ifnone); + block_given = rb_block_given_p(); + if (block_given && argc == 2) { + rb_warn("block supersedes default value argument"); + } + idx = NUM2LONG(pos); + + if (idx < 0) { + idx += RARRAY_LEN(ary); + } + if (idx < 0 || RARRAY_LEN(ary) <= idx) { + if (block_given) return rb_yield(pos); + if (argc == 1) { + rb_raise(rb_eIndexError, "index %ld out of array", idx); + } + return ifnone; + } + return RARRAY_PTR(ary)[idx]; +} + +/* + * call-seq: + * array.index(obj) -> int or nil + * array.index {|item| block} -> int or nil + * + * Returns the index of the first object in self such that is + * == to obj. If a block is given instead of an + * argument, returns first object for which block is true. + * Returns nil if no match is found. + * + * a = [ "a", "b", "c" ] + * a.index("b") #=> 1 + * a.index("z") #=> nil + * a.index{|x|x=="b"} #=> 1 + * + * This is an alias of #find_index. + */ + +static VALUE +rb_ary_index(int argc, VALUE *argv, VALUE ary) +{ + VALUE val; + long i; + + if (argc == 0) { + RETURN_ENUMERATOR(ary, 0, 0); + for (i=0; i int or nil + * + * Returns the index of the last object in array + * == to obj. If a block is given instead of an + * argument, returns first object for which block is + * true. Returns nil if no match is found. + * + * a = [ "a", "b", "b", "b", "c" ] + * a.rindex("b") #=> 3 + * a.rindex("z") #=> nil + * a.rindex{|x|x=="b"} #=> 3 + */ + +static VALUE +rb_ary_rindex(int argc, VALUE *argv, VALUE ary) +{ + VALUE val; + long i = RARRAY_LEN(ary); + + if (argc == 0) { + RETURN_ENUMERATOR(ary, 0, 0); + while (i--) { + if (RTEST(rb_yield(RARRAY_PTR(ary)[i]))) + return LONG2NUM(i); + if (i > RARRAY_LEN(ary)) { + i = RARRAY_LEN(ary); + } + } + return Qnil; + } + rb_scan_args(argc, argv, "01", &val); + while (i--) { + if (rb_equal(RARRAY_PTR(ary)[i], val)) + return LONG2NUM(i); + if (i > RARRAY_LEN(ary)) { + i = RARRAY_LEN(ary); + } + } + return Qnil; +} + +VALUE +rb_ary_to_ary(VALUE obj) +{ + if (TYPE(obj) == T_ARRAY) { + return obj; + } + if (rb_respond_to(obj, rb_intern("to_ary"))) { + return to_ary(obj); + } + return rb_ary_new3(1, obj); +} + +static void +rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl) +{ + long rlen; + + if (len < 0) rb_raise(rb_eIndexError, "negative length (%ld)", len); + if (beg < 0) { + beg += RARRAY_LEN(ary); + if (beg < 0) { + beg -= RARRAY_LEN(ary); + rb_raise(rb_eIndexError, "index %ld out of array", beg); + } + } + if (RARRAY_LEN(ary) < len || RARRAY_LEN(ary) < beg + len) { + len = RARRAY_LEN(ary) - beg; + } + + if (rpl == Qundef) { + rlen = 0; + } + else { + rpl = rb_ary_to_ary(rpl); + rlen = RARRAY_LEN(rpl); + } + rb_ary_modify(ary); + if (beg >= RARRAY_LEN(ary)) { + if (beg > ARY_MAX_SIZE - rlen) { + rb_raise(rb_eIndexError, "index %ld too big", beg); + } + len = beg + rlen; + if (len >= ARY_CAPA(ary)) { + ary_resize_capa(ary, len); + } + rb_mem_clear(RARRAY_PTR(ary) + RARRAY_LEN(ary), beg - RARRAY_LEN(ary)); + if (rlen > 0) { + MEMCPY(RARRAY_PTR(ary) + beg, RARRAY_PTR(rpl), VALUE, rlen); + } + ARY_SET_LEN(ary, len); + } + else { + long alen; + + alen = RARRAY_LEN(ary) + rlen - len; + if (alen >= ARY_CAPA(ary)) { + ary_resize_capa(ary, alen); + } + + if (len != rlen) { + MEMMOVE(RARRAY_PTR(ary) + beg + rlen, RARRAY_PTR(ary) + beg + len, + VALUE, RARRAY_LEN(ary) - (beg + len)); + ARY_SET_LEN(ary, alen); + } + if (rlen > 0) { + MEMMOVE(RARRAY_PTR(ary) + beg, RARRAY_PTR(rpl), VALUE, rlen); + } + } +} + +/* + * call-seq: + * array[index] = obj -> obj + * array[start, length] = obj or an_array or nil -> obj or an_array or nil + * array[range] = obj or an_array or nil -> obj or an_array or nil + * + * Element Assignment---Sets the element at _index_, + * or replaces a subarray starting at _start_ and + * continuing for _length_ elements, or replaces a subarray + * specified by _range_. If indices are greater than + * the current capacity of the array, the array grows + * automatically. A negative indices will count backward + * from the end of the array. Inserts elements if _length_ is + * zero. An +IndexError+ is raised if a negative index points + * past the beginning of the array. See also + * Array#push, and Array#unshift. + * + * a = Array.new + * a[4] = "4"; #=> [nil, nil, nil, nil, "4"] + * a[0, 3] = [ 'a', 'b', 'c' ] #=> ["a", "b", "c", nil, "4"] + * a[1..2] = [ 1, 2 ] #=> ["a", 1, 2, nil, "4"] + * a[0, 2] = "?" #=> ["?", 2, nil, "4"] + * a[0..2] = "A" #=> ["A", "4"] + * a[-1] = "Z" #=> ["A", "Z"] + * a[1..-1] = nil #=> ["A", nil] + * a[1..-1] = [] #=> ["A"] + */ + +static VALUE +rb_ary_aset(int argc, VALUE *argv, VALUE ary) +{ + long offset, beg, len; + + if (argc == 3) { + beg = NUM2LONG(argv[0]); + len = NUM2LONG(argv[1]); + rb_ary_splice(ary, beg, len, argv[2]); + return argv[2]; + } + if (argc != 2) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + if (FIXNUM_P(argv[0])) { + offset = FIX2LONG(argv[0]); + goto fixnum; + } + if (rb_range_beg_len(argv[0], &beg, &len, RARRAY_LEN(ary), 1)) { + /* check if idx is Range */ + rb_ary_splice(ary, beg, len, argv[1]); + return argv[1]; + } + + offset = NUM2LONG(argv[0]); +fixnum: + rb_ary_store(ary, offset, argv[1]); + return argv[1]; +} + +/* + * call-seq: + * array.insert(index, obj...) -> array + * + * Inserts the given values before the element with the given index + * (which may be negative). + * + * a = %w{ a b c d } + * a.insert(2, 99) #=> ["a", "b", 99, "c", "d"] + * a.insert(-2, 1, 2, 3) #=> ["a", "b", 99, "c", 1, 2, 3, "d"] + */ + +static VALUE +rb_ary_insert(int argc, VALUE *argv, VALUE ary) +{ + long pos; + + if (argc == 1) return ary; + if (argc < 1) { + rb_raise(rb_eArgError, "wrong number of arguments (at least 1)"); + } + pos = NUM2LONG(argv[0]); + if (pos == -1) { + pos = RARRAY_LEN(ary); + } + if (pos < 0) { + pos++; + } + rb_ary_splice(ary, pos, 0, rb_ary_new4(argc - 1, argv + 1)); + return ary; +} + +/* + * call-seq: + * array.each {|item| block } -> array + * + * Calls block once for each element in self, passing that + * element as a parameter. + * + * a = [ "a", "b", "c" ] + * a.each {|x| print x, " -- " } + * + * produces: + * + * a -- b -- c -- + */ + +VALUE +rb_ary_each(VALUE ary) +{ + long i; + + RETURN_ENUMERATOR(ary, 0, 0); + for (i=0; i array + * + * Same as Array#each, but passes the index of the element + * instead of the element itself. + * + * a = [ "a", "b", "c" ] + * a.each_index {|x| print x, " -- " } + * + * produces: + * + * 0 -- 1 -- 2 -- + */ + +static VALUE +rb_ary_each_index(VALUE ary) +{ + long i; + RETURN_ENUMERATOR(ary, 0, 0); + + for (i=0; iArray#each, but traverses self in reverse + * order. + * + * a = [ "a", "b", "c" ] + * a.reverse_each {|x| print x, " " } + * + * produces: + * + * c b a + */ + +static VALUE +rb_ary_reverse_each(VALUE ary) +{ + long len; + + RETURN_ENUMERATOR(ary, 0, 0); + len = RARRAY_LEN(ary); + while (len--) { + rb_yield(RARRAY_PTR(ary)[len]); + if (RARRAY_LEN(ary) < len) { + len = RARRAY_LEN(ary); + } + } + return ary; +} + +/* + * call-seq: + * array.length -> int + * + * Returns the number of elements in self. May be zero. + * + * [ 1, 2, 3, 4, 5 ].length #=> 5 + */ + +static VALUE +rb_ary_length(VALUE ary) +{ + long len = RARRAY_LEN(ary); + return LONG2NUM(len); +} + +/* + * call-seq: + * array.empty? -> true or false + * + * Returns true if self array contains no elements. + * + * [].empty? #=> true + */ + +static VALUE +rb_ary_empty_p(VALUE ary) +{ + if (RARRAY_LEN(ary) == 0) + return Qtrue; + return Qfalse; +} + +VALUE +rb_ary_dup(VALUE ary) +{ + VALUE dup = rb_ary_new2(RARRAY_LEN(ary)); + int is_embed = ARY_EMBED_P(dup); + DUPSETUP(dup, ary); + if (is_embed) FL_SET_EMBED(dup); + MEMCPY(RARRAY_PTR(dup), RARRAY_PTR(ary), VALUE, RARRAY_LEN(ary)); + ARY_SET_LEN(dup, RARRAY_LEN(ary)); + + return dup; +} + +extern VALUE rb_output_fs; + +static VALUE +recursive_join(VALUE ary, VALUE argp, int recur) +{ + VALUE *arg = (VALUE *)argp; + if (recur) { + return rb_usascii_str_new2("[...]"); + } + return rb_ary_join(arg[0], arg[1]); +} + +VALUE +rb_ary_join(VALUE ary, VALUE sep) +{ + long len = 1, i; + int taint = Qfalse; + int untrust = Qfalse; + VALUE result, tmp; + + if (RARRAY_LEN(ary) == 0) return rb_str_new(0, 0); + if (OBJ_TAINTED(ary) || OBJ_TAINTED(sep)) taint = Qtrue; + if (OBJ_UNTRUSTED(ary) || OBJ_UNTRUSTED(sep)) untrust = Qtrue; + + for (i=0; i 0 && !NIL_P(sep)) + rb_str_buf_append(result, sep); + rb_str_buf_append(result, tmp); + if (OBJ_TAINTED(tmp)) taint = Qtrue; + if (OBJ_UNTRUSTED(tmp)) untrust = Qtrue; + } + + if (taint) OBJ_TAINT(result); + if (untrust) OBJ_UNTRUST(result); + return result; +} + +/* + * call-seq: + * array.join(sep=$,) -> str + * + * Returns a string created by converting each element of the array to + * a string, separated by sep. + * + * [ "a", "b", "c" ].join #=> "abc" + * [ "a", "b", "c" ].join("-") #=> "a-b-c" + */ + +static VALUE +rb_ary_join_m(int argc, VALUE *argv, VALUE ary) +{ + VALUE sep; + + rb_scan_args(argc, argv, "01", &sep); + if (NIL_P(sep)) sep = rb_output_fs; + + return rb_ary_join(ary, sep); +} + +static VALUE +inspect_ary(VALUE ary, VALUE dummy, int recur) +{ + int tainted = OBJ_TAINTED(ary); + int untrust = OBJ_UNTRUSTED(ary); + long i; + VALUE s, str; + + if (recur) return rb_tainted_str_new2("[...]"); + str = rb_str_buf_new2("["); + for (i=0; i 0) rb_str_buf_cat2(str, ", "); + rb_str_buf_append(str, s); + } + rb_str_buf_cat2(str, "]"); + if (tainted) OBJ_TAINT(str); + if (untrust) OBJ_UNTRUST(str); + return str; +} + +/* + * call-seq: + * array.to_s -> string + * array.inspect -> string + * + * Create a printable version of array. + */ + +static VALUE +rb_ary_inspect(VALUE ary) +{ + if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new2("[]"); + return rb_exec_recursive(inspect_ary, ary, 0); +} + +VALUE +rb_ary_to_s(VALUE ary) +{ + return rb_ary_inspect(ary); +} + +/* + * call-seq: + * array.to_a -> array + * + * Returns _self_. If called on a subclass of Array, converts + * the receiver to an Array object. + */ + +static VALUE +rb_ary_to_a(VALUE ary) +{ + if (rb_obj_class(ary) != rb_cArray) { + VALUE dup = rb_ary_new2(RARRAY_LEN(ary)); + rb_ary_replace(dup, ary); + return dup; + } + return ary; +} + +/* + * call-seq: + * array.to_ary -> array + * + * Returns _self_. + */ + +static VALUE +rb_ary_to_ary_m(VALUE ary) +{ + return ary; +} + +VALUE +rb_ary_reverse(VALUE ary) +{ + VALUE *p1, *p2; + VALUE tmp; + + rb_ary_modify(ary); + if (RARRAY_LEN(ary) > 1) { + p1 = RARRAY_PTR(ary); + p2 = p1 + RARRAY_LEN(ary) - 1; /* points last item */ + + while (p1 < p2) { + tmp = *p1; + *p1++ = *p2; + *p2-- = tmp; + } + } + return ary; +} + +/* + * call-seq: + * array.reverse! -> array + * + * Reverses _self_ in place. + * + * a = [ "a", "b", "c" ] + * a.reverse! #=> ["c", "b", "a"] + * a #=> ["c", "b", "a"] + */ + +static VALUE +rb_ary_reverse_bang(VALUE ary) +{ + return rb_ary_reverse(ary); +} + +/* + * call-seq: + * array.reverse -> an_array + * + * Returns a new array containing self's elements in reverse order. + * + * [ "a", "b", "c" ].reverse #=> ["c", "b", "a"] + * [ 1 ].reverse #=> [1] + */ + +static VALUE +rb_ary_reverse_m(VALUE ary) +{ + return rb_ary_reverse(rb_ary_dup(ary)); +} + +struct ary_sort_data { + VALUE ary; + int opt_methods; + int opt_inited; +}; + +enum { + sort_opt_Fixnum, + sort_opt_String, + sort_optimizable_count +}; + +#define STRING_P(s) (TYPE(s) == T_STRING && CLASS_OF(s) == rb_cString) + +#define SORT_OPTIMIZABLE_BIT(type) (1U << TOKEN_PASTE(sort_opt_,type)) +#define SORT_OPTIMIZABLE(data, type) \ + ((data->opt_inited & SORT_OPTIMIZABLE_BIT(type)) ? \ + (data->opt_methods & SORT_OPTIMIZABLE_BIT(type)) : \ + ((data->opt_inited |= SORT_OPTIMIZABLE_BIT(type)), \ + rb_method_basic_definition_p(TOKEN_PASTE(rb_c,type), id_cmp) && \ + (data->opt_methods |= SORT_OPTIMIZABLE_BIT(type)))) + +static VALUE +sort_reentered(VALUE ary) +{ + if (RBASIC(ary)->klass) { + rb_raise(rb_eRuntimeError, "sort reentered"); + } + return Qnil; +} + +static int +sort_1(const void *ap, const void *bp, void *dummy) +{ + struct ary_sort_data *data = dummy; + VALUE retval = sort_reentered(data->ary); + VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp; + int n; + + retval = rb_yield_values(2, a, b); + n = rb_cmpint(retval, a, b); + sort_reentered(data->ary); + return n; +} + +static int +sort_2(const void *ap, const void *bp, void *dummy) +{ + struct ary_sort_data *data = dummy; + VALUE retval = sort_reentered(data->ary); + VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp; + int n; + + if (FIXNUM_P(a) && FIXNUM_P(b) && SORT_OPTIMIZABLE(data, Fixnum)) { + if ((long)a > (long)b) return 1; + if ((long)a < (long)b) return -1; + return 0; + } + if (STRING_P(a) && STRING_P(b) && SORT_OPTIMIZABLE(data, String)) { + return rb_str_cmp(a, b); + } + + retval = rb_funcall(a, id_cmp, 1, b); + n = rb_cmpint(retval, a, b); + sort_reentered(data->ary); + + return n; +} + +/* + * call-seq: + * array.sort! -> array + * array.sort! {| a,b | block } -> array + * + * Sorts _self_. Comparisons for + * the sort will be done using the <=> operator or using + * an optional code block. The block implements a comparison between + * a and b, returning -1, 0, or +1. See also + * Enumerable#sort_by. + * + * a = [ "d", "a", "e", "c", "b" ] + * a.sort #=> ["a", "b", "c", "d", "e"] + * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + */ + +VALUE +rb_ary_sort_bang(VALUE ary) +{ + rb_ary_modify(ary); + assert(!ARY_SHARED_P(ary)); + if (RARRAY_LEN(ary) > 1) { + VALUE tmp = ary_make_substitution(ary); /* only ary refers tmp */ + struct ary_sort_data data; + + RBASIC(tmp)->klass = 0; + data.ary = tmp; + data.opt_methods = 0; + data.opt_inited = 0; + ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE), + rb_block_given_p()?sort_1:sort_2, &data); + + if (ARY_EMBED_P(tmp)) { + assert(ARY_EMBED_P(tmp)); + if (ARY_SHARED_P(ary)) { /* ary might be destructively operated in the given block */ + rb_ary_unshare(ary); + } + FL_SET_EMBED(ary); + MEMCPY(RARRAY_PTR(ary), ARY_EMBED_PTR(tmp), VALUE, ARY_EMBED_LEN(tmp)); + ARY_SET_LEN(ary, ARY_EMBED_LEN(tmp)); + } + else { + assert(!ARY_EMBED_P(tmp)); + if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) { + assert(!ARY_EMBED_P(ary)); + FL_UNSET_SHARED(ary); + ARY_SET_CAPA(ary, ARY_CAPA(tmp)); + } + else { + assert(!ARY_SHARED_P(tmp)); + if (ARY_EMBED_P(ary)) { + FL_UNSET_EMBED(ary); + } + else if (ARY_SHARED_P(ary)) { + /* ary might be destructively operated in the given block */ + rb_ary_unshare(ary); + } + else { + xfree(ARY_HEAP_PTR(ary)); + } + ARY_SET_PTR(ary, RARRAY_PTR(tmp)); + ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp)); + ARY_SET_CAPA(ary, ARY_CAPA(tmp)); + } + /* tmp was lost ownership for the ptr */ + FL_UNSET(tmp, FL_FREEZE); + FL_SET_EMBED(tmp); + ARY_SET_EMBED_LEN(tmp, 0); + FL_SET(tmp, FL_FREEZE); + } + /* tmp will be GC'ed. */ + RBASIC(tmp)->klass = rb_cArray; + } + return ary; +} + +/* + * call-seq: + * array.sort -> an_array + * array.sort {| a,b | block } -> an_array + * + * Returns a new array created by sorting self. Comparisons for + * the sort will be done using the <=> operator or using + * an optional code block. The block implements a comparison between + * a and b, returning -1, 0, or +1. See also + * Enumerable#sort_by. + * + * a = [ "d", "a", "e", "c", "b" ] + * a.sort #=> ["a", "b", "c", "d", "e"] + * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + */ + +VALUE +rb_ary_sort(VALUE ary) +{ + ary = rb_ary_dup(ary); + rb_ary_sort_bang(ary); + return ary; +} + + +/* + * call-seq: + * array.collect {|item| block } -> an_array + * array.map {|item| block } -> an_array + * + * Invokes block once for each element of self. Creates a + * new array containing the values returned by the block. + * See also Enumerable#collect. + * + * a = [ "a", "b", "c", "d" ] + * a.collect {|x| x + "!" } #=> ["a!", "b!", "c!", "d!"] + * a #=> ["a", "b", "c", "d"] + */ + +static VALUE +rb_ary_collect(VALUE ary) +{ + long i; + VALUE collect; + + RETURN_ENUMERATOR(ary, 0, 0); + collect = rb_ary_new2(RARRAY_LEN(ary)); + for (i = 0; i < RARRAY_LEN(ary); i++) { + rb_ary_push(collect, rb_yield(RARRAY_PTR(ary)[i])); + } + return collect; +} + + +/* + * call-seq: + * array.collect! {|item| block } -> array + * array.map! {|item| block } -> array + * + * Invokes the block once for each element of _self_, replacing the + * element with the value returned by _block_. + * See also Enumerable#collect. + * + * a = [ "a", "b", "c", "d" ] + * a.collect! {|x| x + "!" } + * a #=> [ "a!", "b!", "c!", "d!" ] + */ + +static VALUE +rb_ary_collect_bang(VALUE ary) +{ + long i; + + RETURN_ENUMERATOR(ary, 0, 0); + rb_ary_modify(ary); + for (i = 0; i < RARRAY_LEN(ary); i++) { + rb_ary_store(ary, i, rb_yield(RARRAY_PTR(ary)[i])); + } + return ary; +} + +VALUE +rb_get_values_at(VALUE obj, long olen, int argc, VALUE *argv, VALUE (*func) (VALUE, long)) +{ + VALUE result = rb_ary_new2(argc); + long beg, len, i, j; + + for (i=0; i an_array + * + * Returns an array containing the elements in + * _self_ corresponding to the given selector(s). The selectors + * may be either integer indices or ranges. + * See also Array#select. + * + * a = %w{ a b c d e f } + * a.values_at(1, 3, 5) + * a.values_at(1, 3, 5, 7) + * a.values_at(-1, -3, -5, -7) + * a.values_at(1..3, 2...5) + */ + +static VALUE +rb_ary_values_at(int argc, VALUE *argv, VALUE ary) +{ + return rb_get_values_at(ary, RARRAY_LEN(ary), argc, argv, rb_ary_entry); +} + + +/* + * call-seq: + * array.select {|item| block } -> an_array + * + * Invokes the block passing in successive elements from array, + * returning an array containing those elements for which the block + * returns a true value (equivalent to Enumerable#select). + * + * a = %w{ a b c d e f } + * a.select {|v| v =~ /[aeiou]/} #=> ["a", "e"] + */ + +static VALUE +rb_ary_select(VALUE ary) +{ + VALUE result; + long i; + + RETURN_ENUMERATOR(ary, 0, 0); + result = rb_ary_new2(RARRAY_LEN(ary)); + for (i = 0; i < RARRAY_LEN(ary); i++) { + if (RTEST(rb_yield(RARRAY_PTR(ary)[i]))) { + rb_ary_push(result, rb_ary_elt(ary, i)); + } + } + return result; +} + +/* + * call-seq: + * array.delete(obj) -> obj or nil + * array.delete(obj) { block } -> obj or nil + * + * Deletes items from self that are equal to obj. If + * the item is not found, returns nil. If the optional + * code block is given, returns the result of block if the item + * is not found. + * + * a = [ "a", "b", "b", "b", "c" ] + * a.delete("b") #=> "b" + * a #=> ["a", "c"] + * a.delete("z") #=> nil + * a.delete("z") { "not found" } #=> "not found" + */ + +VALUE +rb_ary_delete(VALUE ary, VALUE item) +{ + VALUE v = item; + long i1, i2; + + for (i1 = i2 = 0; i1 < RARRAY_LEN(ary); i1++) { + VALUE e = RARRAY_PTR(ary)[i1]; + + if (rb_equal(e, item)) { + v = e; + continue; + } + if (i1 != i2) { + rb_ary_store(ary, i2, e); + } + i2++; + } + if (RARRAY_LEN(ary) == i2) { + if (rb_block_given_p()) { + return rb_yield(item); + } + return Qnil; + } + + rb_ary_modify(ary); + if (RARRAY_LEN(ary) > i2) { + ARY_SET_LEN(ary, i2); + if (i2 * 2 < ARY_CAPA(ary) && + ARY_CAPA(ary) > ARY_DEFAULT_SIZE) { + ary_resize_capa(ary, i2*2); + } + } + + return v; +} + +VALUE +rb_ary_delete_at(VALUE ary, long pos) +{ + long len = RARRAY_LEN(ary); + VALUE del; + + if (pos >= len) return Qnil; + if (pos < 0) { + pos += len; + if (pos < 0) return Qnil; + } + + rb_ary_modify(ary); + del = RARRAY_PTR(ary)[pos]; + MEMMOVE(RARRAY_PTR(ary)+pos, RARRAY_PTR(ary)+pos+1, VALUE, + RARRAY_LEN(ary)-pos-1); + ARY_INCREASE_LEN(ary, -1); + + return del; +} + +/* + * call-seq: + * array.delete_at(index) -> obj or nil + * + * Deletes the element at the specified index, returning that element, + * or nil if the index is out of range. See also + * Array#slice!. + * + * a = %w( ant bat cat dog ) + * a.delete_at(2) #=> "cat" + * a #=> ["ant", "bat", "dog"] + * a.delete_at(99) #=> nil + */ + +static VALUE +rb_ary_delete_at_m(VALUE ary, VALUE pos) +{ + return rb_ary_delete_at(ary, NUM2LONG(pos)); +} + +/* + * call-seq: + * array.slice!(index) -> obj or nil + * array.slice!(start, length) -> sub_array or nil + * array.slice!(range) -> sub_array or nil + * + * Deletes the element(s) given by an index (optionally with a length) + * or by a range. Returns the deleted object, subarray, or + * nil if the index is out of range. + * + * a = [ "a", "b", "c" ] + * a.slice!(1) #=> "b" + * a #=> ["a", "c"] + * a.slice!(-1) #=> "c" + * a #=> ["a"] + * a.slice!(100) #=> nil + * a #=> ["a"] + */ + +static VALUE +rb_ary_slice_bang(int argc, VALUE *argv, VALUE ary) +{ + VALUE arg1, arg2; + long pos, len, orig_len; + + rb_ary_modify_check(ary); + if (rb_scan_args(argc, argv, "11", &arg1, &arg2) == 2) { + pos = NUM2LONG(arg1); + len = NUM2LONG(arg2); + delete_pos_len: + if (len < 0) return Qnil; + orig_len = RARRAY_LEN(ary); + if (pos < 0) { + pos += orig_len; + if (pos < 0) return Qnil; + } + else if (orig_len < pos) return Qnil; + if (orig_len < pos + len) { + len = orig_len - pos; + } + if (len == 0) return rb_ary_new2(0); + arg2 = rb_ary_new4(len, RARRAY_PTR(ary)+pos); + RBASIC(arg2)->klass = rb_obj_class(ary); + rb_ary_splice(ary, pos, len, Qundef); + return arg2; + } + + if (!FIXNUM_P(arg1)) { + switch (rb_range_beg_len(arg1, &pos, &len, RARRAY_LEN(ary), 0)) { + case Qtrue: + /* valid range */ + goto delete_pos_len; + case Qnil: + /* invalid range */ + return Qnil; + default: + /* not a range */ + break; + } + } + + return rb_ary_delete_at(ary, NUM2LONG(arg1)); +} + +/* + * call-seq: + * array.reject! {|item| block } -> array or nil + * + * Equivalent to Array#delete_if, deleting elements from + * _self_ for which the block evaluates to true, but returns + * nil if no changes were made. Also see + * Enumerable#reject. + */ + +static VALUE +rb_ary_reject_bang(VALUE ary) +{ + long i1, i2; + + RETURN_ENUMERATOR(ary, 0, 0); + rb_ary_modify(ary); + for (i1 = i2 = 0; i1 < RARRAY_LEN(ary); i1++) { + VALUE v = RARRAY_PTR(ary)[i1]; + if (RTEST(rb_yield(v))) continue; + if (i1 != i2) { + rb_ary_store(ary, i2, v); + } + i2++; + } + + if (RARRAY_LEN(ary) == i2) return Qnil; + if (i2 < RARRAY_LEN(ary)) + ARY_SET_LEN(ary, i2); + return ary; +} + +/* + * call-seq: + * array.reject {|item| block } -> an_array + * + * Returns a new array containing the items in _self_ + * for which the block is not true. + */ + +static VALUE +rb_ary_reject(VALUE ary) +{ + RETURN_ENUMERATOR(ary, 0, 0); + ary = rb_ary_dup(ary); + rb_ary_reject_bang(ary); + return ary; +} + +/* + * call-seq: + * array.delete_if {|item| block } -> array + * + * Deletes every element of self for which block evaluates + * to true. + * + * a = [ "a", "b", "c" ] + * a.delete_if {|x| x >= "b" } #=> ["a"] + */ + +static VALUE +rb_ary_delete_if(VALUE ary) +{ + RETURN_ENUMERATOR(ary, 0, 0); + rb_ary_reject_bang(ary); + return ary; +} + +static VALUE +take_i(VALUE val, VALUE *args, int argc, VALUE *argv) +{ + if (args[1]-- == 0) rb_iter_break(); + if (argc > 1) val = rb_ary_new4(argc, argv); + rb_ary_push(args[0], val); + return Qnil; +} + +static VALUE +take_items(VALUE obj, long n) +{ + VALUE result = rb_check_array_type(obj); + VALUE args[2]; + + if (!NIL_P(result)) return rb_ary_subseq(result, 0, n); + result = rb_ary_new2(n); + args[0] = result; args[1] = (VALUE)n; + rb_block_call(obj, rb_intern("each"), 0, 0, take_i, (VALUE)args); + return result; +} + + +/* + * call-seq: + * array.zip(arg, ...) -> an_array + * array.zip(arg, ...) {| arr | block } -> nil + * + * Converts any arguments to arrays, then merges elements of + * self with corresponding elements from each argument. This + * generates a sequence of self.size n-element + * arrays, where n is one more that the count of arguments. If + * the size of any argument is less than enumObj.size, + * nil values are supplied. If a block given, it is + * invoked for each output array, otherwise an array of arrays is + * returned. + * + * a = [ 4, 5, 6 ] + * b = [ 7, 8, 9 ] + * [1,2,3].zip(a, b) #=> [[1, 4, 7], [2, 5, 8], [3, 6, 9]] + * [1,2].zip(a,b) #=> [[1, 4, 7], [2, 5, 8]] + * a.zip([1,2],[8]) #=> [[4,1,8], [5,2,nil], [6,nil,nil]] + */ + +static VALUE +rb_ary_zip(int argc, VALUE *argv, VALUE ary) +{ + int i, j; + long len; + VALUE result = Qnil; + + len = RARRAY_LEN(ary); + for (i=0; i an_array + * + * Assumes that self is an array of arrays and transposes the + * rows and columns. + * + * a = [[1,2], [3,4], [5,6]] + * a.transpose #=> [[1, 3, 5], [2, 4, 6]] + */ + +static VALUE +rb_ary_transpose(VALUE ary) +{ + long elen = -1, alen, i, j; + VALUE tmp, result = 0; + + alen = RARRAY_LEN(ary); + if (alen == 0) return rb_ary_dup(ary); + for (i=0; i array + * + * Replaces the contents of self with the contents of + * other_array, truncating or expanding if necessary. + * + * a = [ "a", "b", "c", "d", "e" ] + * a.replace([ "x", "y", "z" ]) #=> ["x", "y", "z"] + * a #=> ["x", "y", "z"] + */ + +VALUE +rb_ary_replace(VALUE copy, VALUE orig) +{ + orig = to_ary(orig); + rb_ary_modify_check(copy); + if (copy == orig) return copy; + + if (RARRAY_LEN(orig) <= RARRAY_EMBED_LEN_MAX) { + VALUE *ptr; + VALUE shared = 0; + + if (ARY_OWNS_HEAP_P(copy)) { + xfree(RARRAY_PTR(copy)); + } + else if (ARY_SHARED_P(copy)) { + shared = ARY_SHARED(copy); + FL_UNSET_SHARED(copy); + } + FL_SET_EMBED(copy); + ptr = RARRAY_PTR(orig); + MEMCPY(RARRAY_PTR(copy), ptr, VALUE, RARRAY_LEN(orig)); + if (shared) { + rb_ary_decrement_share(shared); + } + ARY_SET_LEN(copy, RARRAY_LEN(orig)); + } + else { + VALUE shared = ary_make_shared(orig); + if (ARY_OWNS_HEAP_P(copy)) { + xfree(RARRAY_PTR(copy)); + } else { + rb_ary_unshare_safe(copy); + } + FL_UNSET_EMBED(copy); + ARY_SET_PTR(copy, RARRAY_PTR(orig)); + ARY_SET_LEN(copy, RARRAY_LEN(orig)); + rb_ary_set_shared(copy, shared); + } + return copy; +} + +/* + * call-seq: + * array.clear -> array + * + * Removes all elements from _self_. + * + * a = [ "a", "b", "c", "d", "e" ] + * a.clear #=> [ ] + */ + +VALUE +rb_ary_clear(VALUE ary) +{ + rb_ary_modify(ary); + ARY_SET_LEN(ary, 0); + if (ARY_DEFAULT_SIZE * 2 < ARY_CAPA(ary)) { + ary_resize_capa(ary, ARY_DEFAULT_SIZE * 2); + } + return ary; +} + +/* + * call-seq: + * array.fill(obj) -> array + * array.fill(obj, start [, length]) -> array + * array.fill(obj, range ) -> array + * array.fill {|index| block } -> array + * array.fill(start [, length] ) {|index| block } -> array + * array.fill(range) {|index| block } -> array + * + * The first three forms set the selected elements of self (which + * may be the entire array) to obj. A start of + * nil is equivalent to zero. A length of + * nil is equivalent to self.length. The last three + * forms fill the array with the value of the block. The block is + * passed the absolute index of each element to be filled. + * + * a = [ "a", "b", "c", "d" ] + * a.fill("x") #=> ["x", "x", "x", "x"] + * a.fill("z", 2, 2) #=> ["x", "x", "z", "z"] + * a.fill("y", 0..1) #=> ["y", "y", "z", "z"] + * a.fill {|i| i*i} #=> [0, 1, 4, 9] + * a.fill(-2) {|i| i*i*i} #=> [0, 1, 8, 27] + */ + +static VALUE +rb_ary_fill(int argc, VALUE *argv, VALUE ary) +{ + VALUE item, arg1, arg2; + long beg = 0, end = 0, len = 0; + VALUE *p, *pend; + int block_p = Qfalse; + + if (rb_block_given_p()) { + block_p = Qtrue; + rb_scan_args(argc, argv, "02", &arg1, &arg2); + argc += 1; /* hackish */ + } + else { + rb_scan_args(argc, argv, "12", &item, &arg1, &arg2); + } + switch (argc) { + case 1: + beg = 0; + len = RARRAY_LEN(ary); + break; + case 2: + if (rb_range_beg_len(arg1, &beg, &len, RARRAY_LEN(ary), 1)) { + break; + } + /* fall through */ + case 3: + beg = NIL_P(arg1) ? 0 : NUM2LONG(arg1); + if (beg < 0) { + beg = RARRAY_LEN(ary) + beg; + if (beg < 0) beg = 0; + } + len = NIL_P(arg2) ? RARRAY_LEN(ary) - beg : NUM2LONG(arg2); + break; + } + rb_ary_modify(ary); + if (len < 0) { + return ary; + } + if (beg >= ARY_MAX_SIZE || len > ARY_MAX_SIZE - beg) { + rb_raise(rb_eArgError, "argument too big"); + } + end = beg + len; + if (RARRAY_LEN(ary) < end) { + if (end >= ARY_CAPA(ary)) { + ary_resize_capa(ary, end); + } + rb_mem_clear(RARRAY_PTR(ary) + RARRAY_LEN(ary), end - RARRAY_LEN(ary)); + ARY_SET_LEN(ary, end); + } + + if (block_p) { + VALUE v; + long i; + + for (i=beg; i=RARRAY_LEN(ary)) break; + RARRAY_PTR(ary)[i] = v; + } + } + else { + p = RARRAY_PTR(ary) + beg; + pend = p + len; + while (p < pend) { + *p++ = item; + } + } + return ary; +} + +/* + * call-seq: + * array + other_array -> an_array + * + * Concatenation---Returns a new array built by concatenating the + * two arrays together to produce a third array. + * + * [ 1, 2, 3 ] + [ 4, 5 ] #=> [ 1, 2, 3, 4, 5 ] + */ + +VALUE +rb_ary_plus(VALUE x, VALUE y) +{ + VALUE z; + long len; + + y = to_ary(y); + len = RARRAY_LEN(x) + RARRAY_LEN(y); + z = rb_ary_new2(len); + MEMCPY(RARRAY_PTR(z), RARRAY_PTR(x), VALUE, RARRAY_LEN(x)); + MEMCPY(RARRAY_PTR(z) + RARRAY_LEN(x), RARRAY_PTR(y), VALUE, RARRAY_LEN(y)); + ARY_SET_LEN(z, len); + return z; +} + +/* + * call-seq: + * array.concat(other_array) -> array + * + * Appends the elements in other_array to _self_. + * + * [ "a", "b" ].concat( ["c", "d"] ) #=> [ "a", "b", "c", "d" ] + */ + + +VALUE +rb_ary_concat(VALUE x, VALUE y) +{ + y = to_ary(y); + if (RARRAY_LEN(y) > 0) { + rb_ary_splice(x, RARRAY_LEN(x), 0, y); + } + return x; +} + + +/* + * call-seq: + * array * int -> an_array + * array * str -> a_string + * + * Repetition---With a String argument, equivalent to + * self.join(str). Otherwise, returns a new array + * built by concatenating the _int_ copies of _self_. + * + * + * [ 1, 2, 3 ] * 3 #=> [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] + * [ 1, 2, 3 ] * "," #=> "1,2,3" + * + */ + +static VALUE +rb_ary_times(VALUE ary, VALUE times) +{ + VALUE ary2, tmp; + long i, len; + + tmp = rb_check_string_type(times); + if (!NIL_P(tmp)) { + return rb_ary_join(ary, tmp); + } + + len = NUM2LONG(times); + if (len == 0) { + ary2 = ary_new(rb_obj_class(ary), 0); + goto out; + } + if (len < 0) { + rb_raise(rb_eArgError, "negative argument"); + } + if (ARY_MAX_SIZE/len < RARRAY_LEN(ary)) { + rb_raise(rb_eArgError, "argument too big"); + } + len *= RARRAY_LEN(ary); + + ary2 = ary_new(rb_obj_class(ary), len); + ARY_SET_LEN(ary2, len); + + for (i=0; i an_array or nil + * + * Searches through an array whose elements are also arrays + * comparing _obj_ with the first element of each contained array + * using obj.==. + * Returns the first contained array that matches (that + * is, the first associated array), + * or +nil+ if no match is found. + * See also Array#rassoc. + * + * s1 = [ "colors", "red", "blue", "green" ] + * s2 = [ "letters", "a", "b", "c" ] + * s3 = "foo" + * a = [ s1, s2, s3 ] + * a.assoc("letters") #=> [ "letters", "a", "b", "c" ] + * a.assoc("foo") #=> nil + */ + +VALUE +rb_ary_assoc(VALUE ary, VALUE key) +{ + long i; + VALUE v; + + for (i = 0; i < RARRAY_LEN(ary); ++i) { + v = rb_check_array_type(RARRAY_PTR(ary)[i]); + if (!NIL_P(v) && RARRAY_LEN(v) > 0 && + rb_equal(RARRAY_PTR(v)[0], key)) + return v; + } + return Qnil; +} + +/* + * call-seq: + * array.rassoc(obj) -> an_array or nil + * + * Searches through the array whose elements are also arrays. Compares + * _obj_ with the second element of each contained array using + * ==. Returns the first contained array that matches. See + * also Array#assoc. + * + * a = [ [ 1, "one"], [2, "two"], [3, "three"], ["ii", "two"] ] + * a.rassoc("two") #=> [2, "two"] + * a.rassoc("four") #=> nil + */ + +VALUE +rb_ary_rassoc(VALUE ary, VALUE value) +{ + long i; + VALUE v; + + for (i = 0; i < RARRAY_LEN(ary); ++i) { + v = RARRAY_PTR(ary)[i]; + if (TYPE(v) == T_ARRAY && + RARRAY_LEN(v) > 1 && + rb_equal(RARRAY_PTR(v)[1], value)) + return v; + } + return Qnil; +} + +static VALUE +recursive_equal(VALUE ary1, VALUE ary2, int recur) +{ + long i; + + if (recur) return Qtrue; /* Subtle! */ + for (i=0; i bool + * + * Equality---Two arrays are equal if they contain the same number + * of elements and if each element is equal to (according to + * Object.==) the corresponding element in the other array. + * + * [ "a", "c" ] == [ "a", "c", 7 ] #=> false + * [ "a", "c", 7 ] == [ "a", "c", 7 ] #=> true + * [ "a", "c", 7 ] == [ "a", "d", "f" ] #=> false + * + */ + +static VALUE +rb_ary_equal(VALUE ary1, VALUE ary2) +{ + if (ary1 == ary2) return Qtrue; + if (TYPE(ary2) != T_ARRAY) { + if (!rb_respond_to(ary2, rb_intern("to_ary"))) { + return Qfalse; + } + return rb_equal(ary2, ary1); + } + if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return Qfalse; + return rb_exec_recursive_paired(recursive_equal, ary1, ary2, ary2); +} + +static VALUE +recursive_eql(VALUE ary1, VALUE ary2, int recur) +{ + long i; + + if (recur) return Qtrue; /* Subtle! */ + for (i=0; i true or false + * + * Returns true if _array_ and _other_ are the same object, + * or are both arrays with the same content. + */ + +static VALUE +rb_ary_eql(VALUE ary1, VALUE ary2) +{ + if (ary1 == ary2) return Qtrue; + if (TYPE(ary2) != T_ARRAY) return Qfalse; + if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return Qfalse; + return rb_exec_recursive_paired(recursive_eql, ary1, ary2, ary2); +} + +static VALUE +recursive_hash(VALUE ary, VALUE dummy, int recur) +{ + long i, h; + VALUE n; + + if (recur) { + return LONG2FIX(0); + } + h = RARRAY_LEN(ary); + for (i=0; i fixnum + * + * Compute a hash-code for this array. Two arrays with the same content + * will have the same hash code (and will compare using eql?). + */ + +static VALUE +rb_ary_hash(VALUE ary) +{ + return rb_exec_recursive(recursive_hash, ary, 0); +} + +/* + * call-seq: + * array.include?(obj) -> true or false + * + * Returns true if the given object is present in + * self (that is, if any object == anObject), + * false otherwise. + * + * a = [ "a", "b", "c" ] + * a.include?("b") #=> true + * a.include?("z") #=> false + */ + +VALUE +rb_ary_includes(VALUE ary, VALUE item) +{ + long i; + + for (i=0; i RARRAY_LEN(ary2)) { + len = RARRAY_LEN(ary2); + } + for (i=0; i other_array -> -1, 0, +1 + * + * Comparison---Returns an integer (-1, 0, + * or +1) if this array is less than, equal to, or greater than + * other_array. Each object in each array is compared + * (using <=>). If any value isn't + * equal, then that inequality is the return value. If all the + * values found are equal, then the return is based on a + * comparison of the array lengths. Thus, two arrays are + * ``equal'' according to Array#<=> if and only if they have + * the same length and the value of each element is equal to the + * value of the corresponding element in the other array. + * + * [ "a", "a", "c" ] <=> [ "a", "b", "c" ] #=> -1 + * [ 1, 2, 3, 4, 5, 6 ] <=> [ 1, 2 ] #=> +1 + * + */ + +VALUE +rb_ary_cmp(VALUE ary1, VALUE ary2) +{ + long len; + VALUE v; + + ary2 = to_ary(ary2); + if (ary1 == ary2) return INT2FIX(0); + v = rb_exec_recursive_paired(recursive_cmp, ary1, ary2, ary2); + if (v != Qundef) return v; + len = RARRAY_LEN(ary1) - RARRAY_LEN(ary2); + if (len == 0) return INT2FIX(0); + if (len > 0) return INT2FIX(1); + return INT2FIX(-1); +} + +static VALUE +ary_make_hash(VALUE ary1, VALUE ary2) +{ + VALUE hash = rb_hash_new(); + long i; + + for (i=0; i an_array + * + * Array Difference---Returns a new array that is a copy of + * the original array, removing any items that also appear in + * other_array. (If you need set-like behavior, see the + * library class Set.) + * + * [ 1, 1, 2, 2, 3, 3, 4, 5 ] - [ 1, 2, 4 ] #=> [ 3, 3, 5 ] + */ + +static VALUE +rb_ary_diff(VALUE ary1, VALUE ary2) +{ + VALUE ary3; + volatile VALUE hash; + long i; + + hash = ary_make_hash(to_ary(ary2), 0); + ary3 = rb_ary_new(); + + for (i=0; i [ 1, 3 ] + */ + + +static VALUE +rb_ary_and(VALUE ary1, VALUE ary2) +{ + VALUE hash, ary3, v, vv; + long i; + + ary2 = to_ary(ary2); + ary3 = rb_ary_new2(RARRAY_LEN(ary1) < RARRAY_LEN(ary2) ? + RARRAY_LEN(ary1) : RARRAY_LEN(ary2)); + hash = ary_make_hash(ary2, 0); + + if (RHASH_EMPTY_P(hash)) + return ary3; + + for (i=0; i an_array + * + * Set Union---Returns a new array by joining this array with + * other_array, removing duplicates. + * + * [ "a", "b", "c" ] | [ "c", "d", "a" ] + * #=> [ "a", "b", "c", "d" ] + */ + +static VALUE +rb_ary_or(VALUE ary1, VALUE ary2) +{ + VALUE hash, ary3; + VALUE v, vv; + long i; + + ary2 = to_ary(ary2); + ary3 = rb_ary_new2(RARRAY_LEN(ary1)+RARRAY_LEN(ary2)); + hash = ary_make_hash(ary1, ary2); + + for (i=0; i array or nil + * + * Removes duplicate elements from _self_. + * Returns nil if no changes are made (that is, no + * duplicates are found). + * + * a = [ "a", "a", "b", "b", "c" ] + * a.uniq! #=> ["a", "b", "c"] + * b = [ "a", "b", "c" ] + * b.uniq! #=> nil + */ + +static VALUE +rb_ary_uniq_bang(VALUE ary) +{ + VALUE hash, v, vv; + long i, j; + + hash = ary_make_hash(ary, 0); + + if (RARRAY_LEN(ary) == RHASH_SIZE(hash)) { + return Qnil; + } + for (i=j=0; i an_array + * + * Returns a new array by removing duplicate values in self. + * + * a = [ "a", "a", "b", "b", "c" ] + * a.uniq #=> ["a", "b", "c"] + */ + +static VALUE +rb_ary_uniq(VALUE ary) +{ + ary = rb_ary_dup(ary); + rb_ary_uniq_bang(ary); + return ary; +} + +/* + * call-seq: + * array.compact! -> array or nil + * + * Removes +nil+ elements from array. + * Returns +nil+ if no changes were made. + * + * [ "a", nil, "b", nil, "c" ].compact! #=> [ "a", "b", "c" ] + * [ "a", "b", "c" ].compact! #=> nil + */ + +static VALUE +rb_ary_compact_bang(VALUE ary) +{ + VALUE *p, *t, *end; + long n; + + rb_ary_modify(ary); + p = t = RARRAY_PTR(ary); + end = p + RARRAY_LEN(ary); + + while (t < end) { + if (NIL_P(*t)) t++; + else *p++ = *t++; + } + n = p - RARRAY_PTR(ary); + if (RARRAY_LEN(ary) == n) { + return Qnil; + } + ARY_SET_LEN(ary, n); + if (n * 2 < ARY_CAPA(ary) && ARY_DEFAULT_SIZE * 2 < ARY_CAPA(ary)) { + ary_resize_capa(ary, n * 2); + } + + return ary; +} + +/* + * call-seq: + * array.compact -> an_array + * + * Returns a copy of _self_ with all +nil+ elements removed. + * + * [ "a", nil, "b", nil, "c", nil ].compact + * #=> [ "a", "b", "c" ] + */ + +static VALUE +rb_ary_compact(VALUE ary) +{ + ary = rb_ary_dup(ary); + rb_ary_compact_bang(ary); + return ary; +} + +/* + * call-seq: + * array.count -> int + * array.count(obj) -> int + * array.count { |item| block } -> int + * + * Returns the number of elements. If an argument is given, counts + * the number of elements which equals to obj. If a block is + * given, counts the number of elements yielding a true value. + * + * ary = [1, 2, 4, 2] + * ary.count # => 4 + * ary.count(2) # => 2 + * ary.count{|x|x%2==0} # => 3 + * + */ + +static VALUE +rb_ary_count(int argc, VALUE *argv, VALUE ary) +{ + long n = 0; + + if (argc == 0) { + VALUE *p, *pend; + + if (!rb_block_given_p()) + return LONG2NUM(RARRAY_LEN(ary)); + + for (p = RARRAY_PTR(ary), pend = p + RARRAY_LEN(ary); p < pend; p++) { + if (RTEST(rb_yield(*p))) n++; + } + } + else { + VALUE obj, *p, *pend; + + rb_scan_args(argc, argv, "1", &obj); + if (rb_block_given_p()) { + rb_warn("given block not used"); + } + for (p = RARRAY_PTR(ary), pend = p + RARRAY_LEN(ary); p < pend; p++) { + if (rb_equal(*p, obj)) n++; + } + } + + return LONG2NUM(n); +} + +static VALUE +flatten(VALUE ary, int level, int *modified) +{ + long i = 0; + VALUE stack, result, tmp, elt; + st_table *memo; + st_data_t id; + + stack = ary_new(0, ARY_DEFAULT_SIZE); + result = ary_new(0, RARRAY_LEN(ary)); + memo = st_init_numtable(); + st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue); + *modified = 0; + + while (1) { + while (i < RARRAY_LEN(ary)) { + elt = RARRAY_PTR(ary)[i++]; + tmp = rb_check_array_type(elt); + if (RBASIC(result)->klass) { + rb_raise(rb_eRuntimeError, "flatten reentered"); + } + if (NIL_P(tmp) || (level >= 0 && RARRAY_LEN(stack) / 2 >= level)) { + rb_ary_push(result, elt); + } + else { + *modified = 1; + id = (st_data_t)tmp; + if (st_lookup(memo, id, 0)) { + st_free_table(memo); + rb_raise(rb_eArgError, "tried to flatten recursive array"); + } + st_insert(memo, id, (st_data_t)Qtrue); + rb_ary_push(stack, ary); + rb_ary_push(stack, LONG2NUM(i)); + ary = tmp; + i = 0; + } + } + if (RARRAY_LEN(stack) == 0) { + break; + } + id = (st_data_t)ary; + st_delete(memo, &id, 0); + tmp = rb_ary_pop(stack); + i = NUM2LONG(tmp); + ary = rb_ary_pop(stack); + } + + st_free_table(memo); + + RBASIC(result)->klass = rb_class_of(ary); + return result; +} + +/* + * call-seq: + * array.flatten! -> array or nil + * array.flatten!(level) -> array or nil + * + * Flattens _self_ in place. + * Returns nil if no modifications were made (i.e., + * array contains no subarrays.) If the optional level + * argument determines the level of recursion to flatten. + * + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten! #=> [1, 2, 3, 4, 5] + * a.flatten! #=> nil + * a #=> [1, 2, 3, 4, 5] + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten!(1) #=> [1, 2, 3, [4, 5]] + */ + +static VALUE +rb_ary_flatten_bang(int argc, VALUE *argv, VALUE ary) +{ + int mod = 0, level = -1; + VALUE result, lv; + + rb_scan_args(argc, argv, "01", &lv); + if (!NIL_P(lv)) level = NUM2INT(lv); + if (level == 0) return Qnil; + + result = flatten(ary, level, &mod); + if (mod == 0) return Qnil; + rb_ary_replace(ary, result); + + return ary; +} + +/* + * call-seq: + * array.flatten -> an_array + * array.flatten(level) -> an_array + * + * Returns a new array that is a one-dimensional flattening of this + * array (recursively). That is, for every element that is an array, + * extract its elements into the new array. If the optional + * level argument determines the level of recursion to flatten. + * + * s = [ 1, 2, 3 ] #=> [1, 2, 3] + * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] + * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] + * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten(1) #=> [1, 2, 3, [4, 5]] + */ + +static VALUE +rb_ary_flatten(int argc, VALUE *argv, VALUE ary) +{ + int mod = 0, level = -1; + VALUE result, lv; + + rb_scan_args(argc, argv, "01", &lv); + if (!NIL_P(lv)) level = NUM2INT(lv); + if (level == 0) return ary_make_shared_copy(ary); + + result = flatten(ary, level, &mod); + OBJ_INFECT(result, ary); + + return result; +} + +/* + * call-seq: + * array.shuffle! -> array + * + * Shuffles elements in _self_ in place. + */ + + +static VALUE +rb_ary_shuffle_bang(VALUE ary) +{ + long i = RARRAY_LEN(ary); + + rb_ary_modify(ary); + while (i) { + long j = rb_genrand_real()*i; + VALUE tmp = RARRAY_PTR(ary)[--i]; + RARRAY_PTR(ary)[i] = RARRAY_PTR(ary)[j]; + RARRAY_PTR(ary)[j] = tmp; + } + return ary; +} + + +/* + * call-seq: + * array.shuffle -> an_array + * + * Returns a new array with elements of this array shuffled. + * + * a = [ 1, 2, 3 ] #=> [1, 2, 3] + * a.shuffle #=> [2, 3, 1] + */ + +static VALUE +rb_ary_shuffle(VALUE ary) +{ + ary = rb_ary_dup(ary); + rb_ary_shuffle_bang(ary); + return ary; +} + + +/* + * call-seq: + * array.sample -> obj + * array.sample(n) -> an_array + * + * Choose a random element, or the random +n+ elements, from the array. + * If the array is empty, the first form returns nil, and the + * second form returns an empty array. + * + */ + + +static VALUE +rb_ary_sample(int argc, VALUE *argv, VALUE ary) +{ + VALUE nv, result, *ptr; + long n, len, i, j, k, idx[10]; + + len = RARRAY_LEN(ary); + if (argc == 0) { + if (len == 0) return Qnil; + i = len == 1 ? 0 : rb_genrand_real()*len; + return RARRAY_PTR(ary)[i]; + } + rb_scan_args(argc, argv, "1", &nv); + n = NUM2LONG(nv); + if (n < 0) rb_raise(rb_eArgError, "negative sample number"); + ptr = RARRAY_PTR(ary); + len = RARRAY_LEN(ary); + if (n > len) n = len; + switch (n) { + case 0: return rb_ary_new2(0); + case 1: + return rb_ary_new4(1, &ptr[(long)(rb_genrand_real()*len)]); + case 2: + i = rb_genrand_real()*len; + j = rb_genrand_real()*(len-1); + if (j >= i) j++; + return rb_ary_new3(2, ptr[i], ptr[j]); + case 3: + i = rb_genrand_real()*len; + j = rb_genrand_real()*(len-1); + k = rb_genrand_real()*(len-2); + { + long l = j, g = i; + if (j >= i) l = i, g = ++j; + if (k >= l && (++k >= g)) ++k; + } + return rb_ary_new3(3, ptr[i], ptr[j], ptr[k]); + } + if (n < sizeof(idx)/sizeof(idx[0])) { + long sorted[sizeof(idx)/sizeof(idx[0])]; + sorted[0] = idx[0] = rb_genrand_real()*len; + for (i=1; iblock for each element repeatedly _n_ times or + * forever if none or nil is given. If a non-positive number is + * given or the array is empty, does nothing. Returns nil if the + * loop has finished without getting interrupted. + * + * a = ["a", "b", "c"] + * a.cycle {|x| puts x } # print, a, b, c, a, b, c,.. forever. + * a.cycle(2) {|x| puts x } # print, a, b, c, a, b, c. + * + */ + +static VALUE +rb_ary_cycle(int argc, VALUE *argv, VALUE ary) +{ + long n, i; + VALUE nv = Qnil; + + rb_scan_args(argc, argv, "01", &nv); + + RETURN_ENUMERATOR(ary, argc, argv); + if (NIL_P(nv)) { + n = -1; + } + else { + n = NUM2LONG(nv); + if (n <= 0) return Qnil; + } + + while (RARRAY_LEN(ary) > 0 && (n < 0 || 0 < n--)) { + for (i=0; iklass) { + rb_raise(rb_eRuntimeError, "permute reentered"); + } + } + } + } +} + +/* + * call-seq: + * ary.permutation { |p| block } -> array + * ary.permutation -> enumerator + * ary.permutation(n) { |p| block } -> array + * ary.permutation(n) -> enumerator + * + * When invoked with a block, yield all permutations of length n + * of the elements of ary, then return the array itself. + * If n is not specified, yield all permutations of all elements. + * The implementation makes no guarantees about the order in which + * the permutations are yielded. + * + * When invoked without a block, return an enumerator object instead. + * + * Examples: + * + * a = [1, 2, 3] + * a.permutation.to_a #=> [[1,2,3],[1,3,2],[2,1,3],[2,3,1],[3,1,2],[3,2,1]] + * a.permutation(1).to_a #=> [[1],[2],[3]] + * a.permutation(2).to_a #=> [[1,2],[1,3],[2,1],[2,3],[3,1],[3,2]] + * a.permutation(3).to_a #=> [[1,2,3],[1,3,2],[2,1,3],[2,3,1],[3,1,2],[3,2,1]] + * a.permutation(0).to_a #=> [[]] # one permutation of length 0 + * a.permutation(4).to_a #=> [] # no permutations of length 4 + */ + +static VALUE +rb_ary_permutation(int argc, VALUE *argv, VALUE ary) +{ + VALUE num; + long r, n, i; + + n = RARRAY_LEN(ary); /* Array length */ + RETURN_ENUMERATOR(ary, argc, argv); /* Return enumerator if no block */ + rb_scan_args(argc, argv, "01", &num); + r = NIL_P(num) ? n : NUM2LONG(num); /* Permutation size from argument */ + + if (r < 0 || n < r) { + /* no permutations: yield nothing */ + } + else if (r == 0) { /* exactly one permutation: the zero-length array */ + rb_yield(rb_ary_new2(0)); + } + else if (r == 1) { /* this is a special, easy case */ + for (i = 0; i < RARRAY_LEN(ary); i++) { + rb_yield(rb_ary_new3(1, RARRAY_PTR(ary)[i])); + } + } + else { /* this is the general case */ + volatile VALUE t0 = tmpbuf(n,sizeof(long)); + long *p = (long*)RSTRING_PTR(t0); + volatile VALUE t1 = tmpbuf(n,sizeof(int)); + int *used = (int*)RSTRING_PTR(t1); + VALUE ary0 = ary_make_substitution(ary); /* private defensive copy of ary */ + RBASIC(ary0)->klass = 0; + + for (i = 0; i < n; i++) used[i] = 0; /* initialize array */ + + permute0(n, r, p, 0, used, ary0); /* compute and yield permutations */ + RB_GC_GUARD(t0); + RB_GC_GUARD(t1); + RBASIC(ary0)->klass = rb_cArray; + } + return ary; +} + +static long +combi_len(long n, long k) +{ + long i, val = 1; + + if (k*2 > n) k = n-k; + if (k == 0) return 1; + if (k < 0) return 0; + val = 1; + for (i=1; i <= k; i++,n--) { + long m = val; + val *= n; + if (val < m) { + rb_raise(rb_eRangeError, "too big for combination"); + } + val /= i; + } + return val; +} + +/* + * call-seq: + * ary.combination(n) { |c| block } -> ary + * ary.combination(n) -> enumerator + * + * When invoked with a block, yields all combinations of length n + * of elements from ary and then returns ary itself. + * The implementation makes no guarantees about the order in which + * the combinations are yielded. + * + * When invoked without a block, returns an enumerator object instead. + * + * Examples: + * + * a = [1, 2, 3, 4] + * a.combination(1).to_a #=> [[1],[2],[3],[4]] + * a.combination(2).to_a #=> [[1,2],[1,3],[1,4],[2,3],[2,4],[3,4]] + * a.combination(3).to_a #=> [[1,2,3],[1,2,4],[1,3,4],[2,3,4]] + * a.combination(4).to_a #=> [[1,2,3,4]] + * a.combination(0).to_a #=> [[]] # one combination of length 0 + * a.combination(5).to_a #=> [] # no combinations of length 5 + * + */ + +static VALUE +rb_ary_combination(VALUE ary, VALUE num) +{ + long n, i, len; + + n = NUM2LONG(num); + RETURN_ENUMERATOR(ary, 1, &num); + len = RARRAY_LEN(ary); + if (n < 0 || len < n) { + /* yield nothing */ + } + else if (n == 0) { + rb_yield(rb_ary_new2(0)); + } + else if (n == 1) { + for (i = 0; i < len; i++) { + rb_yield(rb_ary_new3(1, RARRAY_PTR(ary)[i])); + } + } + else { + volatile VALUE t0 = tmpbuf(n+1, sizeof(long)); + long *stack = (long*)RSTRING_PTR(t0); + long nlen = combi_len(len, n); + volatile VALUE cc = tmpary(n); + VALUE *chosen = RARRAY_PTR(cc); + long lev = 0; + + MEMZERO(stack, long, n); + stack[0] = -1; + for (i = 0; i < nlen; i++) { + chosen[lev] = RARRAY_PTR(ary)[stack[lev+1]]; + for (lev++; lev < n; lev++) { + chosen[lev] = RARRAY_PTR(ary)[stack[lev+1] = stack[lev]+1]; + } + rb_yield(rb_ary_new4(n, chosen)); + if (RBASIC(t0)->klass) { + rb_raise(rb_eRuntimeError, "combination reentered"); + } + do { + stack[lev--]++; + } while (lev && (stack[lev+1]+n == len+lev+1)); + } + } + return ary; +} + +/* + * call-seq: + * ary.product(other_ary, ...) + * + * Returns an array of all combinations of elements from all arrays. + * The length of the returned array is the product of the length + * of ary and the argument arrays + * + * [1,2,3].product([4,5]) # => [[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]] + * [1,2].product([1,2]) # => [[1,1],[1,2],[2,1],[2,2]] + * [1,2].product([3,4],[5,6]) # => [[1,3,5],[1,3,6],[1,4,5],[1,4,6], + * # [2,3,5],[2,3,6],[2,4,5],[2,4,6]] + * [1,2].product() # => [[1],[2]] + * [1,2].product([]) # => [] + */ + +static VALUE +rb_ary_product(int argc, VALUE *argv, VALUE ary) +{ + int n = argc+1; /* How many arrays we're operating on */ + volatile VALUE t0 = tmpbuf(n, sizeof(VALUE)); + volatile VALUE t1 = tmpbuf(n, sizeof(int)); + VALUE *arrays = (VALUE*)RSTRING_PTR(t0); /* The arrays we're computing the product of */ + int *counters = (int*)RSTRING_PTR(t1); /* The current position in each one */ + VALUE result; /* The array we'll be returning */ + long i,j; + long resultlen = 1; + + RBASIC(t0)->klass = 0; + RBASIC(t1)->klass = 0; + + /* initialize the arrays of arrays */ + arrays[0] = ary; + for (i = 1; i < n; i++) arrays[i] = to_ary(argv[i-1]); + + /* initialize the counters for the arrays */ + for (i = 0; i < n; i++) counters[i] = 0; + + /* Compute the length of the result array; return [] if any is empty */ + for (i = 0; i < n; i++) { + long k = RARRAY_LEN(arrays[i]), l = resultlen; + if (k == 0) return rb_ary_new2(0); + resultlen *= k; + if (resultlen < k || resultlen < l || resultlen / k != l) { + rb_raise(rb_eRangeError, "too big to product"); + } + } + + /* Otherwise, allocate and fill in an array of results */ + result = rb_ary_new2(resultlen); + for (i = 0; i < resultlen; i++) { + int m; + /* fill in one subarray */ + VALUE subarray = rb_ary_new2(n); + for (j = 0; j < n; j++) { + rb_ary_push(subarray, rb_ary_entry(arrays[j], counters[j])); + } + + /* put it on the result array */ + rb_ary_push(result, subarray); + + /* + * Increment the last counter. If it overflows, reset to 0 + * and increment the one before it. + */ + m = n-1; + counters[m]++; + while (m > 0 && counters[m] == RARRAY_LEN(arrays[m])) { + counters[m] = 0; + m--; + counters[m]++; + } + } + + return result; +} + +/* + * call-seq: + * ary.take(n) => array + * + * Returns first n elements from ary. + * + * a = [1, 2, 3, 4, 5, 0] + * a.take(3) # => [1, 2, 3] + * + */ + +static VALUE +rb_ary_take(VALUE obj, VALUE n) +{ + long len = NUM2LONG(n); + if (len < 0) { + rb_raise(rb_eArgError, "attempt to take negative size"); + } + return rb_ary_subseq(obj, 0, len); +} + +/* + * call-seq: + * ary.take_while {|arr| block } => array + * + * Passes elements to the block until the block returns nil or false, + * then stops iterating and returns an array of all prior elements. + * + * a = [1, 2, 3, 4, 5, 0] + * a.take_while {|i| i < 3 } # => [1, 2] + * + */ + +static VALUE +rb_ary_take_while(VALUE ary) +{ + long i; + + RETURN_ENUMERATOR(ary, 0, 0); + for (i = 0; i < RARRAY_LEN(ary); i++) { + if (!RTEST(rb_yield(RARRAY_PTR(ary)[i]))) break; + } + return rb_ary_take(ary, LONG2FIX(i)); +} + +/* + * call-seq: + * ary.drop(n) => array + * + * Drops first n elements from ary, and returns rest elements + * in an array. + * + * a = [1, 2, 3, 4, 5, 0] + * a.drop(3) # => [4, 5, 0] + * + */ + +static VALUE +rb_ary_drop(VALUE ary, VALUE n) +{ + VALUE result; + long pos = NUM2LONG(n); + if (pos < 0) { + rb_raise(rb_eArgError, "attempt to drop negative size"); + } + + result = rb_ary_subseq(ary, pos, RARRAY_LEN(ary)); + if (result == Qnil) result = rb_ary_new(); + return result; +} + +/* + * call-seq: + * ary.drop_while {|arr| block } => array + * + * Drops elements up to, but not including, the first element for + * which the block returns nil or false and returns an array + * containing the remaining elements. + * + * a = [1, 2, 3, 4, 5, 0] + * a.drop_while {|i| i < 3 } # => [3, 4, 5, 0] + * + */ + +static VALUE +rb_ary_drop_while(VALUE ary) +{ + long i; + + RETURN_ENUMERATOR(ary, 0, 0); + for (i = 0; i < RARRAY_LEN(ary); i++) { + if (!RTEST(rb_yield(RARRAY_PTR(ary)[i]))) break; + } + return rb_ary_drop(ary, LONG2FIX(i)); +} + + + +/* Arrays are ordered, integer-indexed collections of any object. + * Array indexing starts at 0, as in C or Java. A negative index is + * assumed to be relative to the end of the array---that is, an index of -1 + * indicates the last element of the array, -2 is the next to last + * element in the array, and so on. + */ + +void +Init_Array(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + rb_cArray = rb_define_class("Array", rb_cObject); + rb_include_module(rb_cArray, rb_mEnumerable); + + rb_define_alloc_func(rb_cArray, ary_alloc); + rb_define_singleton_method(rb_cArray, "[]", rb_ary_s_create, -1); + rb_define_singleton_method(rb_cArray, "try_convert", rb_ary_s_try_convert, 1); + rb_define_method(rb_cArray, "initialize", rb_ary_initialize, -1); + rb_define_method(rb_cArray, "initialize_copy", rb_ary_replace, 1); + + rb_define_method(rb_cArray, "to_s", rb_ary_inspect, 0); + rb_define_method(rb_cArray, "inspect", rb_ary_inspect, 0); + rb_define_method(rb_cArray, "to_a", rb_ary_to_a, 0); + rb_define_method(rb_cArray, "to_ary", rb_ary_to_ary_m, 0); + rb_define_method(rb_cArray, "frozen?", rb_ary_frozen_p, 0); + + rb_define_method(rb_cArray, "==", rb_ary_equal, 1); + rb_define_method(rb_cArray, "eql?", rb_ary_eql, 1); + rb_define_method(rb_cArray, "hash", rb_ary_hash, 0); + + rb_define_method(rb_cArray, "[]", rb_ary_aref, -1); + rb_define_method(rb_cArray, "[]=", rb_ary_aset, -1); + rb_define_method(rb_cArray, "at", rb_ary_at, 1); + rb_define_method(rb_cArray, "fetch", rb_ary_fetch, -1); + rb_define_method(rb_cArray, "first", rb_ary_first, -1); + rb_define_method(rb_cArray, "last", rb_ary_last, -1); + rb_define_method(rb_cArray, "concat", rb_ary_concat, 1); + rb_define_method(rb_cArray, "<<", rb_ary_push, 1); + rb_define_method(rb_cArray, "push", rb_ary_push_m, -1); + rb_define_method(rb_cArray, "pop", rb_ary_pop_m, -1); + rb_define_method(rb_cArray, "shift", rb_ary_shift_m, -1); + rb_define_method(rb_cArray, "unshift", rb_ary_unshift_m, -1); + rb_define_method(rb_cArray, "insert", rb_ary_insert, -1); + rb_define_method(rb_cArray, "each", rb_ary_each, 0); + rb_define_method(rb_cArray, "each_index", rb_ary_each_index, 0); + rb_define_method(rb_cArray, "reverse_each", rb_ary_reverse_each, 0); + rb_define_method(rb_cArray, "length", rb_ary_length, 0); + rb_define_alias(rb_cArray, "size", "length"); + rb_define_method(rb_cArray, "empty?", rb_ary_empty_p, 0); + rb_define_method(rb_cArray, "find_index", rb_ary_index, -1); + rb_define_method(rb_cArray, "index", rb_ary_index, -1); + rb_define_method(rb_cArray, "rindex", rb_ary_rindex, -1); + rb_define_method(rb_cArray, "join", rb_ary_join_m, -1); + rb_define_method(rb_cArray, "reverse", rb_ary_reverse_m, 0); + rb_define_method(rb_cArray, "reverse!", rb_ary_reverse_bang, 0); + rb_define_method(rb_cArray, "sort", rb_ary_sort, 0); + rb_define_method(rb_cArray, "sort!", rb_ary_sort_bang, 0); + rb_define_method(rb_cArray, "collect", rb_ary_collect, 0); + rb_define_method(rb_cArray, "collect!", rb_ary_collect_bang, 0); + rb_define_method(rb_cArray, "map", rb_ary_collect, 0); + rb_define_method(rb_cArray, "map!", rb_ary_collect_bang, 0); + rb_define_method(rb_cArray, "select", rb_ary_select, 0); + rb_define_method(rb_cArray, "values_at", rb_ary_values_at, -1); + rb_define_method(rb_cArray, "delete", rb_ary_delete, 1); + rb_define_method(rb_cArray, "delete_at", rb_ary_delete_at_m, 1); + rb_define_method(rb_cArray, "delete_if", rb_ary_delete_if, 0); + rb_define_method(rb_cArray, "reject", rb_ary_reject, 0); + rb_define_method(rb_cArray, "reject!", rb_ary_reject_bang, 0); + rb_define_method(rb_cArray, "zip", rb_ary_zip, -1); + rb_define_method(rb_cArray, "transpose", rb_ary_transpose, 0); + rb_define_method(rb_cArray, "replace", rb_ary_replace, 1); + rb_define_method(rb_cArray, "clear", rb_ary_clear, 0); + rb_define_method(rb_cArray, "fill", rb_ary_fill, -1); + rb_define_method(rb_cArray, "include?", rb_ary_includes, 1); + rb_define_method(rb_cArray, "<=>", rb_ary_cmp, 1); + + rb_define_method(rb_cArray, "slice", rb_ary_aref, -1); + rb_define_method(rb_cArray, "slice!", rb_ary_slice_bang, -1); + + rb_define_method(rb_cArray, "assoc", rb_ary_assoc, 1); + rb_define_method(rb_cArray, "rassoc", rb_ary_rassoc, 1); + + rb_define_method(rb_cArray, "+", rb_ary_plus, 1); + rb_define_method(rb_cArray, "*", rb_ary_times, 1); + + rb_define_method(rb_cArray, "-", rb_ary_diff, 1); + rb_define_method(rb_cArray, "&", rb_ary_and, 1); + rb_define_method(rb_cArray, "|", rb_ary_or, 1); + + rb_define_method(rb_cArray, "uniq", rb_ary_uniq, 0); + rb_define_method(rb_cArray, "uniq!", rb_ary_uniq_bang, 0); + rb_define_method(rb_cArray, "compact", rb_ary_compact, 0); + rb_define_method(rb_cArray, "compact!", rb_ary_compact_bang, 0); + rb_define_method(rb_cArray, "flatten", rb_ary_flatten, -1); + rb_define_method(rb_cArray, "flatten!", rb_ary_flatten_bang, -1); + rb_define_method(rb_cArray, "count", rb_ary_count, -1); + rb_define_method(rb_cArray, "shuffle!", rb_ary_shuffle_bang, 0); + rb_define_method(rb_cArray, "shuffle", rb_ary_shuffle, 0); + rb_define_method(rb_cArray, "sample", rb_ary_sample, -1); + rb_define_method(rb_cArray, "cycle", rb_ary_cycle, -1); + rb_define_method(rb_cArray, "permutation", rb_ary_permutation, -1); + rb_define_method(rb_cArray, "combination", rb_ary_combination, 1); + rb_define_method(rb_cArray, "product", rb_ary_product, -1); + + rb_define_method(rb_cArray, "take", rb_ary_take, 1); + rb_define_method(rb_cArray, "take_while", rb_ary_take_while, 0); + rb_define_method(rb_cArray, "drop", rb_ary_drop, 1); + rb_define_method(rb_cArray, "drop_while", rb_ary_drop_while, 0); + + id_cmp = rb_intern("<=>"); +} diff --git a/bignum.c b/bignum.c new file mode 100644 index 0000000..b1ebbdd --- /dev/null +++ b/bignum.c @@ -0,0 +1,2723 @@ +/********************************************************************** + + bignum.c - + + $Author: yugui $ + created at: Fri Jun 10 00:48:55 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" + +#include +#include +#include +#ifdef HAVE_IEEEFP_H +#include +#endif + +VALUE rb_cBignum; + +#if defined __MINGW32__ +#define USHORT _USHORT +#endif + +#define BDIGITS(x) (RBIGNUM_DIGITS(x)) +#define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT) +#define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG) +#define DIGSPERLONG ((unsigned int)(SIZEOF_LONG/SIZEOF_BDIGITS)) +#if HAVE_LONG_LONG +# define DIGSPERLL ((unsigned int)(SIZEOF_LONG_LONG/SIZEOF_BDIGITS)) +#endif +#define BIGUP(x) ((BDIGIT_DBL)(x) << BITSPERDIG) +#define BIGDN(x) RSHIFT(x,BITSPERDIG) +#define BIGLO(x) ((BDIGIT)((x) & (BIGRAD-1))) +#define BDIGMAX ((BDIGIT)-1) + +#define BIGZEROP(x) (RBIGNUM_LEN(x) == 0 || \ + (BDIGITS(x)[0] == 0 && \ + (RBIGNUM_LEN(x) == 1 || bigzero_p(x)))) + +static int +bigzero_p(VALUE x) +{ + long i; + for (i = RBIGNUM_LEN(x) - 1; 0 <= i; i--) { + if (BDIGITS(x)[i]) return 0; + } + return 1; +} + +int +rb_cmpint(VALUE val, VALUE a, VALUE b) +{ + if (NIL_P(val)) { + rb_cmperr(a, b); + } + if (FIXNUM_P(val)) { + long l = FIX2LONG(val); + if (l > 0) return 1; + if (l < 0) return -1; + return 0; + } + if (TYPE(val) == T_BIGNUM) { + if (BIGZEROP(val)) return 0; + if (RBIGNUM_SIGN(val)) return 1; + return -1; + } + if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1; + if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1; + return 0; +} + +#define RBIGNUM_SET_LEN(b,l) \ + ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \ + (RBASIC(b)->flags = (RBASIC(b)->flags & ~RBIGNUM_EMBED_LEN_MASK) | \ + ((l) << RBIGNUM_EMBED_LEN_SHIFT)) : \ + (RBIGNUM(b)->as.heap.len = (l))) + +static void +rb_big_realloc(VALUE big, long len) +{ + BDIGIT *ds; + if (RBASIC(big)->flags & RBIGNUM_EMBED_FLAG) { + if (RBIGNUM_EMBED_LEN_MAX < len) { + ds = ALLOC_N(BDIGIT, len); + MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, RBIGNUM_EMBED_LEN_MAX); + RBIGNUM(big)->as.heap.len = RBIGNUM_LEN(big); + RBIGNUM(big)->as.heap.digits = ds; + RBASIC(big)->flags &= ~RBIGNUM_EMBED_FLAG; + } + } + else { + if (len <= RBIGNUM_EMBED_LEN_MAX) { + ds = RBIGNUM(big)->as.heap.digits; + RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG; + RBIGNUM_SET_LEN(big, len); + if (ds) { + MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len); + xfree(ds); + } + } + else { + if (RBIGNUM_LEN(big) == 0) { + RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len); + } + else { + REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len); + } + } + } +} + +void +rb_big_resize(VALUE big, long len) +{ + rb_big_realloc(big, len); + RBIGNUM_SET_LEN(big, len); +} + +static VALUE +bignew_1(VALUE klass, long len, int sign) +{ + NEWOBJ(big, struct RBignum); + OBJSETUP(big, klass, T_BIGNUM); + RBIGNUM_SET_SIGN(big, sign?1:0); + if (len <= RBIGNUM_EMBED_LEN_MAX) { + RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG; + RBIGNUM_SET_LEN(big, len); + } + else { + rb_big_resize((VALUE)big, len); + } + + return (VALUE)big; +} + +#define bignew(len,sign) bignew_1(rb_cBignum,len,sign) + +VALUE +rb_big_clone(VALUE x) +{ + VALUE z = bignew_1(CLASS_OF(x), RBIGNUM_LEN(x), RBIGNUM_SIGN(x)); + + MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, RBIGNUM_LEN(x)); + return z; +} + +/* modify a bignum by 2's complement */ +static void +get2comp(VALUE x) +{ + long i = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + BDIGIT_DBL num; + + if (!i) return; + while (i--) ds[i] = ~ds[i]; + i = 0; num = 1; + do { + num += ds[i]; + ds[i++] = BIGLO(num); + num = BIGDN(num); + } while (i < RBIGNUM_LEN(x)); + if (num != 0) { + rb_big_resize(x, RBIGNUM_LEN(x)+1); + ds = BDIGITS(x); + ds[RBIGNUM_LEN(x)-1] = 1; + } +} + +void +rb_big_2comp(VALUE x) /* get 2's complement */ +{ + get2comp(x); +} + +static VALUE +bigtrunc(VALUE x) +{ + long len = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + + if (len == 0) return x; + while (--len && !ds[len]); + rb_big_resize(x, len+1); + return x; +} + +static VALUE +bigfixize(VALUE x) +{ + long len = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + + if (len*SIZEOF_BDIGITS <= sizeof(long)) { + long num = 0; + while (len--) { + num = BIGUP(num) + ds[len]; + } + if (num >= 0) { + if (RBIGNUM_SIGN(x)) { + if (POSFIXABLE(num)) return LONG2FIX(num); + } + else { + if (NEGFIXABLE(-(long)num)) return LONG2FIX(-(long)num); + } + } + } + return x; +} + +static VALUE +bignorm(VALUE x) +{ + if (!FIXNUM_P(x) && TYPE(x) == T_BIGNUM) { + x = bigfixize(bigtrunc(x)); + } + return x; +} + +VALUE +rb_big_norm(VALUE x) +{ + return bignorm(x); +} + +VALUE +rb_uint2big(VALUE n) +{ + BDIGIT_DBL num = n; + long i = 0; + BDIGIT *digits; + VALUE big; + + big = bignew(DIGSPERLONG, 1); + digits = BDIGITS(big); + while (i < DIGSPERLONG) { + digits[i++] = BIGLO(num); + num = BIGDN(num); + } + + i = DIGSPERLONG; + while (--i && !digits[i]) ; + RBIGNUM_SET_LEN(big, i+1); + return big; +} + +VALUE +rb_int2big(SIGNED_VALUE n) +{ + long neg = 0; + VALUE big; + + if (n < 0) { + n = -n; + neg = 1; + } + big = rb_uint2big(n); + if (neg) { + RBIGNUM_SET_SIGN(big, 0); + } + return big; +} + +VALUE +rb_uint2inum(VALUE n) +{ + if (POSFIXABLE(n)) return LONG2FIX(n); + return rb_uint2big(n); +} + +VALUE +rb_int2inum(SIGNED_VALUE n) +{ + if (FIXABLE(n)) return LONG2FIX(n); + return rb_int2big(n); +} + +#ifdef HAVE_LONG_LONG + +void +rb_quad_pack(char *buf, VALUE val) +{ + LONG_LONG q; + + val = rb_to_int(val); + if (FIXNUM_P(val)) { + q = FIX2LONG(val); + } + else { + long len = RBIGNUM_LEN(val); + BDIGIT *ds; + + if (len > SIZEOF_LONG_LONG/SIZEOF_BDIGITS) { + len = SIZEOF_LONG_LONG/SIZEOF_BDIGITS; + } + ds = BDIGITS(val); + q = 0; + while (len--) { + q = BIGUP(q); + q += ds[len]; + } + if (!RBIGNUM_SIGN(val)) q = -q; + } + memcpy(buf, (char*)&q, SIZEOF_LONG_LONG); +} + +VALUE +rb_quad_unpack(const char *buf, int sign) +{ + unsigned LONG_LONG q; + long neg = 0; + long i; + BDIGIT *digits; + VALUE big; + + memcpy(&q, buf, SIZEOF_LONG_LONG); + if (sign) { + if (FIXABLE((LONG_LONG)q)) return LONG2FIX((LONG_LONG)q); + if ((LONG_LONG)q < 0) { + q = -(LONG_LONG)q; + neg = 1; + } + } + else { + if (POSFIXABLE(q)) return LONG2FIX(q); + } + + i = 0; + big = bignew(DIGSPERLL, 1); + digits = BDIGITS(big); + while (i < DIGSPERLL) { + digits[i++] = BIGLO(q); + q = BIGDN(q); + } + + i = DIGSPERLL; + while (i-- && !digits[i]) ; + RBIGNUM_SET_LEN(big, i+1); + + if (neg) { + RBIGNUM_SET_SIGN(big, 0); + } + return bignorm(big); +} + +#else + +#define QUAD_SIZE 8 + +void +rb_quad_pack(char *buf, VALUE val) +{ + long len; + + memset(buf, 0, QUAD_SIZE); + val = rb_to_int(val); + if (FIXNUM_P(val)) { + val = rb_int2big(FIX2LONG(val)); + } + len = RBIGNUM_LEN(val) * SIZEOF_BDIGITS; + if (len > QUAD_SIZE) { + rb_raise(rb_eRangeError, "bignum too big to convert into `quad int'"); + } + memcpy(buf, (char*)BDIGITS(val), len); + if (!RBIGNUM_SIGN(val)) { + len = QUAD_SIZE; + while (len--) { + *buf = ~*buf; + buf++; + } + } +} + +#define BNEG(b) (RSHIFT(((BDIGIT*)b)[QUAD_SIZE/SIZEOF_BDIGITS-1],BITSPERDIG-1) != 0) + +VALUE +rb_quad_unpack(const char *buf, int sign) +{ + VALUE big = bignew(QUAD_SIZE/SIZEOF_BDIGITS, 1); + + memcpy((char*)BDIGITS(big), buf, QUAD_SIZE); + if (sign && BNEG(buf)) { + long len = QUAD_SIZE; + char *tmp = (char*)BDIGITS(big); + + RBIGNUM_SET_SIGN(big, 0); + while (len--) { + *tmp = ~*tmp; + tmp++; + } + } + + return bignorm(big); +} + +#endif + +VALUE +rb_cstr_to_inum(const char *str, int base, int badcheck) +{ + const char *s = str; + char *end; + char sign = 1, nondigit = 0; + int c; + BDIGIT_DBL num; + long len, blen = 1; + long i; + VALUE z; + BDIGIT *zds; + +#define conv_digit(c) \ + (!ISASCII(c) ? -1 : \ + ISDIGIT(c) ? ((c) - '0') : \ + ISLOWER(c) ? ((c) - 'a' + 10) : \ + ISUPPER(c) ? ((c) - 'A' + 10) : \ + -1) + + if (!str) { + if (badcheck) goto bad; + return INT2FIX(0); + } + while (ISSPACE(*str)) str++; + + if (str[0] == '+') { + str++; + } + else if (str[0] == '-') { + str++; + sign = 0; + } + if (str[0] == '+' || str[0] == '-') { + if (badcheck) goto bad; + return INT2FIX(0); + } + if (base <= 0) { + if (str[0] == '0') { + switch (str[1]) { + case 'x': case 'X': + base = 16; + break; + case 'b': case 'B': + base = 2; + break; + case 'o': case 'O': + base = 8; + break; + case 'd': case 'D': + base = 10; + break; + default: + base = 8; + } + } + else if (base < -1) { + base = -base; + } + else { + base = 10; + } + } + switch (base) { + case 2: + len = 1; + if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) { + str += 2; + } + break; + case 3: + len = 2; + break; + case 8: + if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) { + str += 2; + } + case 4: case 5: case 6: case 7: + len = 3; + break; + case 10: + if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) { + str += 2; + } + case 9: case 11: case 12: case 13: case 14: case 15: + len = 4; + break; + case 16: + len = 4; + if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) { + str += 2; + } + break; + default: + if (base < 2 || 36 < base) { + rb_raise(rb_eArgError, "invalid radix %d", base); + } + if (base <= 32) { + len = 5; + } + else { + len = 6; + } + break; + } + if (*str == '0') { /* squeeze preceding 0s */ + int us = 0; + while ((c = *++str) == '0' || c == '_') { + if (c == '_') { + if (++us >= 2) + break; + } else + us = 0; + } + if (!(c = *str) || ISSPACE(c)) --str; + } + c = *str; + c = conv_digit(c); + if (c < 0 || c >= base) { + if (badcheck) goto bad; + return INT2FIX(0); + } + len *= strlen(str)*sizeof(char); + + if (len <= (sizeof(long)*CHAR_BIT)) { + unsigned long val = STRTOUL(str, &end, base); + + if (str < end && *end == '_') goto bigparse; + if (badcheck) { + if (end == str) goto bad; /* no number */ + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; /* trailing garbage */ + } + + if (POSFIXABLE(val)) { + if (sign) return LONG2FIX(val); + else { + long result = -(long)val; + return LONG2FIX(result); + } + } + else { + VALUE big = rb_uint2big(val); + RBIGNUM_SET_SIGN(big, sign); + return bignorm(big); + } + } + bigparse: + len = (len/BITSPERDIG)+1; + if (badcheck && *str == '_') goto bad; + + z = bignew(len, sign); + zds = BDIGITS(z); + for (i=len;i--;) zds[i]=0; + while ((c = *str++) != 0) { + if (c == '_') { + if (nondigit) { + if (badcheck) goto bad; + break; + } + nondigit = c; + continue; + } + else if ((c = conv_digit(c)) < 0) { + break; + } + if (c >= base) break; + nondigit = 0; + i = 0; + num = c; + for (;;) { + while (i> 1) & MASK_55; + x = ((x >> 2) & MASK_33) + (x & MASK_33); + x = ((x >> 4) + x) & MASK_0f; + x += (x >> 8); + x += (x >> 16); +#if SIZEOF_LONG == 8 + x += (x >> 32); +#endif + return (int)(x & 0x7f); +#undef MASK_0f +#undef MASK_33 +#undef MASK_55 +} + +static inline unsigned long +next_pow2(register unsigned long x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if SIZEOF_LONG == 8 + x |= x >> 32; +#endif + return x + 1; +} + +static inline int +floor_log2(register unsigned long x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if SIZEOF_LONG == 8 + x |= x >> 32; +#endif + return (int)ones(x) - 1; +} + +static inline int +ceil_log2(register unsigned long x) +{ + return floor_log2(x) + !POW2_P(x); +} + +#define LOG2_KARATSUBA_DIGITS 7 +#define KARATSUBA_DIGITS (1L< KARATSUBA_DIGITS"); + + m = ceil_log2(n1); + if (m1) *m1 = 1 << m; + i = m - LOG2_KARATSUBA_DIGITS; + if (i >= MAX_BIG2STR_TABLE_ENTRIES) + i = MAX_BIG2STR_TABLE_ENTRIES - 1; + t = power_cache_get_power0(base, i); + + j = KARATSUBA_DIGITS*(1 << i); + while (n1 > j) { + t = bigsqr(t); + j *= 2; + } + return t; +} + +/* big2str_muraken_find_n1 + * + * Let a natural number x is given by: + * x = 2^0 * x_0 + 2^1 * x_1 + ... + 2^(B*n_0 - 1) * x_{B*n_0 - 1}, + * where B is BITSPERDIG (i.e. BDIGITS*CHAR_BIT) and n_0 is + * RBIGNUM_LEN(x). + * + * Now, we assume n_1 = min_n \{ n | 2^(B*n_0/2) <= b_1^(n_1) \}, so + * it is realized that 2^(B*n_0) <= {b_1}^{2*n_1}, where b_1 is a + * given radix number. And then, we have n_1 <= (B*n_0) / + * (2*log_2(b_1)), therefore n_1 is given by ceil((B*n_0) / + * (2*log_2(b_1))). + */ +static long +big2str_find_n1(VALUE x, int base) +{ + static const double log_2[] = { + 1.0, 1.58496250072116, 2.0, + 2.32192809488736, 2.58496250072116, 2.8073549220576, + 3.0, 3.16992500144231, 3.32192809488736, + 3.4594316186373, 3.58496250072116, 3.70043971814109, + 3.8073549220576, 3.90689059560852, 4.0, + 4.08746284125034, 4.16992500144231, 4.24792751344359, + 4.32192809488736, 4.39231742277876, 4.4594316186373, + 4.52356195605701, 4.58496250072116, 4.64385618977472, + 4.70043971814109, 4.75488750216347, 4.8073549220576, + 4.85798099512757, 4.90689059560852, 4.95419631038688, + 5.0, 5.04439411935845, 5.08746284125034, + 5.12928301694497, 5.16992500144231 + }; + long bits; + + if (base < 2 || 36 < base) + rb_bug("invalid radix %d", base); + + if (FIXNUM_P(x)) { + bits = (SIZEOF_LONG*CHAR_BIT - 1)/2 + 1; + } + else if (BIGZEROP(x)) { + return 0; + } + else if (RBIGNUM_LEN(x) >= LONG_MAX/BITSPERDIG) { + rb_raise(rb_eRangeError, "bignum too big to convert into `string'"); + } + else { + bits = BITSPERDIG*RBIGNUM_LEN(x); + } + + return (long)ceil(bits/log_2[base - 2]); +} + +static long +big2str_orig(VALUE x, int base, char* ptr, long len, long hbase, int trim) +{ + long i = RBIGNUM_LEN(x), j = len; + BDIGIT* ds = BDIGITS(x); + + while (i && j > 0) { + long k = i; + BDIGIT_DBL num = 0; + + while (k--) { /* x / hbase */ + num = BIGUP(num) + ds[k]; + ds[k] = (BDIGIT)(num / hbase); + num %= hbase; + } + if (trim && ds[i-1] == 0) i--; + k = SIZEOF_BDIGITS; + while (k--) { + ptr[--j] = ruby_digitmap[num % base]; + num /= base; + if (j <= 0) break; + if (trim && i == 0 && num == 0) break; + } + } + if (trim) { + while (j < len && ptr[j] == '0') j++; + MEMMOVE(ptr, ptr + j, char, len - j); + len -= j; + } + return len; +} + +static long +big2str_karatsuba(VALUE x, int base, char* ptr, + long n1, long len, long hbase, int trim) +{ + long lh, ll, m1; + VALUE b, q, r; + + if (BIGZEROP(x)) { + if (trim) return 0; + else { + memset(ptr, '0', len); + return len; + } + } + + if (n1 <= KARATSUBA_DIGITS) { + return big2str_orig(x, base, ptr, len, hbase, trim); + } + + b = power_cache_get_power(base, n1, &m1); + bigdivmod(x, b, &q, &r); + lh = big2str_karatsuba(q, base, ptr, (len - m1)/2, + len - m1, hbase, trim); + rb_big_resize(q, 0); + ll = big2str_karatsuba(r, base, ptr + lh, m1/2, + m1, hbase, !lh && trim); + rb_big_resize(r, 0); + + return lh + ll; +} + +VALUE +rb_big2str0(VALUE x, int base, int trim) +{ + int off; + VALUE ss, xx; + long n1, n2, len, hbase; + char* ptr; + + if (FIXNUM_P(x)) { + return rb_fix2str(x, base); + } + if (BIGZEROP(x)) { + return rb_usascii_str_new2("0"); + } + + if (base < 2 || 36 < base) + rb_raise(rb_eArgError, "invalid radix %d", base); + + n2 = big2str_find_n1(x, base); + n1 = (n2 + 1) / 2; + ss = rb_usascii_str_new(0, n2 + 1); /* plus one for sign */ + ptr = RSTRING_PTR(ss); + ptr[0] = RBIGNUM_SIGN(x) ? '+' : '-'; + + hbase = base*base; +#if SIZEOF_BDIGITS > 2 + hbase *= hbase; +#endif + off = !(trim && RBIGNUM_SIGN(x)); /* erase plus sign if trim */ + xx = rb_big_clone(x); + RBIGNUM_SET_SIGN(xx, 1); + if (n1 <= KARATSUBA_DIGITS) { + len = off + big2str_orig(xx, base, ptr + off, n2, hbase, trim); + } + else { + len = off + big2str_karatsuba(xx, base, ptr + off, n1, + n2, hbase, trim); + } + rb_big_resize(xx, 0); + + ptr[len] = '\0'; + rb_str_resize(ss, len); + + return ss; +} + +VALUE +rb_big2str(VALUE x, int base) +{ + return rb_big2str0(x, base, 1); +} + +/* + * call-seq: + * big.to_s(base=10) => string + * + * Returns a string containing the representation of big radix + * base (2 through 36). + * + * 12345654321.to_s #=> "12345654321" + * 12345654321.to_s(2) #=> "1011011111110110111011110000110001" + * 12345654321.to_s(8) #=> "133766736061" + * 12345654321.to_s(16) #=> "2dfdbbc31" + * 78546939656932.to_s(36) #=> "rubyrules" + */ + +static VALUE +rb_big_to_s(int argc, VALUE *argv, VALUE x) +{ + int base; + + if (argc == 0) base = 10; + else { + VALUE b; + + rb_scan_args(argc, argv, "01", &b); + base = NUM2INT(b); + } + return rb_big2str(x, base); +} + +static VALUE +big2ulong(VALUE x, const char *type, int check) +{ + long len = RBIGNUM_LEN(x); + BDIGIT_DBL num; + BDIGIT *ds; + + if (len > DIGSPERLONG) { + if (check) + rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type); + len = DIGSPERLONG; + } + ds = BDIGITS(x); + num = 0; + while (len--) { + num = BIGUP(num); + num += ds[len]; + } + return num; +} + +VALUE +rb_big2ulong_pack(VALUE x) +{ + VALUE num = big2ulong(x, "unsigned long", Qfalse); + if (!RBIGNUM_SIGN(x)) { + return -num; + } + return num; +} + +VALUE +rb_big2ulong(VALUE x) +{ + VALUE num = big2ulong(x, "unsigned long", Qtrue); + + if (!RBIGNUM_SIGN(x)) { + if ((SIGNED_VALUE)num < 0) { + rb_raise(rb_eRangeError, "bignum out of range of unsigned long"); + } + return -num; + } + return num; +} + +SIGNED_VALUE +rb_big2long(VALUE x) +{ + VALUE num = big2ulong(x, "long", Qtrue); + + if ((SIGNED_VALUE)num < 0 && + (RBIGNUM_SIGN(x) || (SIGNED_VALUE)num != LONG_MIN)) { + rb_raise(rb_eRangeError, "bignum too big to convert into `long'"); + } + if (!RBIGNUM_SIGN(x)) return -(SIGNED_VALUE)num; + return num; +} + +#if HAVE_LONG_LONG + +static unsigned LONG_LONG +big2ull(VALUE x, const char *type) +{ + long len = RBIGNUM_LEN(x); + BDIGIT_DBL num; + BDIGIT *ds; + + if (len > SIZEOF_LONG_LONG/SIZEOF_BDIGITS) + rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type); + ds = BDIGITS(x); + num = 0; + while (len--) { + num = BIGUP(num); + num += ds[len]; + } + return num; +} + +unsigned LONG_LONG +rb_big2ull(VALUE x) +{ + unsigned LONG_LONG num = big2ull(x, "unsigned long long"); + + if (!RBIGNUM_SIGN(x)) return -num; + return num; +} + +LONG_LONG +rb_big2ll(VALUE x) +{ + unsigned LONG_LONG num = big2ull(x, "long long"); + + if ((LONG_LONG)num < 0 && (RBIGNUM_SIGN(x) + || (LONG_LONG)num != LLONG_MIN)) { + rb_raise(rb_eRangeError, "bignum too big to convert into `long long'"); + } + if (!RBIGNUM_SIGN(x)) return -(LONG_LONG)num; + return num; +} + +#endif /* HAVE_LONG_LONG */ + +static VALUE +dbl2big(double d) +{ + long i = 0; + BDIGIT c; + BDIGIT *digits; + VALUE z; + double u = (d < 0)?-d:d; + + if (isinf(d)) { + rb_raise(rb_eFloatDomainError, d < 0 ? "-Infinity" : "Infinity"); + } + if (isnan(d)) { + rb_raise(rb_eFloatDomainError, "NaN"); + } + + while (!POSFIXABLE(u) || 0 != (long)u) { + u /= (double)(BIGRAD); + i++; + } + z = bignew(i, d>=0); + digits = BDIGITS(z); + while (i--) { + u *= BIGRAD; + c = (BDIGIT)u; + u -= c; + digits[i] = c; + } + + return z; +} + +VALUE +rb_dbl2big(double d) +{ + return bignorm(dbl2big(d)); +} + +static int +nlz(BDIGIT x) +{ + BDIGIT y; + int n = BITSPERDIG; +#if BITSPERDIG > 64 + y = x >> 64; if (y) {n -= 64; x = y;} +#endif +#if BITSPERDIG > 32 + y = x >> 32; if (y) {n -= 32; x = y;} +#endif +#if BITSPERDIG > 16 + y = x >> 16; if (y) {n -= 16; x = y;} +#endif + y = x >> 8; if (y) {n -= 8; x = y;} + y = x >> 4; if (y) {n -= 4; x = y;} + y = x >> 2; if (y) {n -= 2; x = y;} + y = x >> 1; if (y) {return n - 2;} + return n - x; +} + +static double +big2dbl(VALUE x) +{ + double d = 0.0; + long i = RBIGNUM_LEN(x), lo = 0, bits; + BDIGIT *ds = BDIGITS(x), dl; + + if (i) { + bits = i * BITSPERDIG - nlz(ds[i-1]); + if (bits > DBL_MANT_DIG+DBL_MAX_EXP) { + d = HUGE_VAL; + } + else { + if (bits > DBL_MANT_DIG+1) + lo = (bits -= DBL_MANT_DIG+1) / BITSPERDIG; + else + bits = 0; + while (--i > lo) { + d = ds[i] + BIGRAD*d; + } + dl = ds[i]; + if (bits && (dl & (1UL << (bits %= BITSPERDIG)))) { + int carry = dl & ~(~0UL << bits); + if (!carry) { + while (i-- > 0) { + if ((carry = ds[i]) != 0) break; + } + } + if (carry) { + dl &= ~0UL << bits; + dl += 1UL << bits; + if (!dl) d += 1; + } + } + d = dl + BIGRAD*d; + if (lo) d = ldexp(d, lo * BITSPERDIG); + } + } + if (!RBIGNUM_SIGN(x)) d = -d; + return d; +} + +double +rb_big2dbl(VALUE x) +{ + double d = big2dbl(x); + + if (isinf(d)) { + rb_warning("Bignum out of Float range"); + if (d < 0.0) + d = -HUGE_VAL; + else + d = HUGE_VAL; + } + return d; +} + +/* + * call-seq: + * big.to_f -> float + * + * Converts big to a Float. If big doesn't + * fit in a Float, the result is infinity. + * + */ + +static VALUE +rb_big_to_f(VALUE x) +{ + return DBL2NUM(rb_big2dbl(x)); +} + +/* + * call-seq: + * big <=> numeric => -1, 0, +1 + * + * Comparison---Returns -1, 0, or +1 depending on whether big is + * less than, equal to, or greater than numeric. This is the + * basis for the tests in Comparable. + * + */ + +VALUE +rb_big_cmp(VALUE x, VALUE y) +{ + long xlen = RBIGNUM_LEN(x); + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + { + double a = RFLOAT_VALUE(y); + + if (isinf(a)) { + if (a > 0.0) return INT2FIX(-1); + else return INT2FIX(1); + } + return rb_dbl_cmp(rb_big2dbl(x), a); + } + + default: + return rb_num_coerce_cmp(x, y, rb_intern("<=>")); + } + + if (RBIGNUM_SIGN(x) > RBIGNUM_SIGN(y)) return INT2FIX(1); + if (RBIGNUM_SIGN(x) < RBIGNUM_SIGN(y)) return INT2FIX(-1); + if (xlen < RBIGNUM_LEN(y)) + return (RBIGNUM_SIGN(x)) ? INT2FIX(-1) : INT2FIX(1); + if (xlen > RBIGNUM_LEN(y)) + return (RBIGNUM_SIGN(x)) ? INT2FIX(1) : INT2FIX(-1); + + while(xlen-- && (BDIGITS(x)[xlen]==BDIGITS(y)[xlen])); + if (-1 == xlen) return INT2FIX(0); + return (BDIGITS(x)[xlen] > BDIGITS(y)[xlen]) ? + (RBIGNUM_SIGN(x) ? INT2FIX(1) : INT2FIX(-1)) : + (RBIGNUM_SIGN(x) ? INT2FIX(-1) : INT2FIX(1)); +} + +/* + * call-seq: + * big == obj => true or false + * + * Returns true only if obj has the same value + * as big. Contrast this with Bignum#eql?, which + * requires obj to be a Bignum. + * + * 68719476736 == 68719476736.0 #=> true + */ + +VALUE +rb_big_eq(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + case T_BIGNUM: + break; + case T_FLOAT: + { + volatile double a, b; + + a = RFLOAT_VALUE(y); + if (isnan(a)) return Qfalse; + b = rb_big2dbl(x); + return (a == b)?Qtrue:Qfalse; + } + default: + return rb_equal(y, x); + } + if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y)) return Qfalse; + if (RBIGNUM_LEN(x) != RBIGNUM_LEN(y)) return Qfalse; + if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM_LEN(y)) != 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * big.eql?(obj) => true or false + * + * Returns true only if obj is a + * Bignum with the same value as big. Contrast this + * with Bignum#==, which performs type conversions. + * + * 68719476736.eql?(68719476736.0) #=> false + */ + +static VALUE +rb_big_eql(VALUE x, VALUE y) +{ + if (TYPE(y) != T_BIGNUM) return Qfalse; + if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y)) return Qfalse; + if (RBIGNUM_LEN(x) != RBIGNUM_LEN(y)) return Qfalse; + if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM_LEN(y)) != 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * -big => other_big + * + * Unary minus (returns a new Bignum whose value is 0-big) + */ + +static VALUE +rb_big_uminus(VALUE x) +{ + VALUE z = rb_big_clone(x); + + RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(x)); + + return bignorm(z); +} + +/* + * call-seq: + * ~big => integer + * + * Inverts the bits in big. As Bignums are conceptually infinite + * length, the result acts as if it had an infinite number of one + * bits to the left. In hex representations, this is displayed + * as two periods to the left of the digits. + * + * sprintf("%X", ~0x1122334455) #=> "..FEEDDCCBBAA" + */ + +static VALUE +rb_big_neg(VALUE x) +{ + VALUE z = rb_big_clone(x); + BDIGIT *ds; + long i; + + if (!RBIGNUM_SIGN(x)) get2comp(z); + ds = BDIGITS(z); + i = RBIGNUM_LEN(x); + if (!i) return INT2FIX(~(SIGNED_VALUE)0); + while (i--) { + ds[i] = ~ds[i]; + } + RBIGNUM_SET_SIGN(z, !RBIGNUM_SIGN(z)); + if (RBIGNUM_SIGN(x)) get2comp(z); + + return bignorm(z); +} + +static VALUE +bigsub(VALUE x, VALUE y) +{ + VALUE z = 0; + BDIGIT *zds; + BDIGIT_DBL_SIGNED num; + long i = RBIGNUM_LEN(x); + + /* if x is larger than y, swap */ + if (RBIGNUM_LEN(x) < RBIGNUM_LEN(y)) { + z = x; x = y; y = z; /* swap x y */ + } + else if (RBIGNUM_LEN(x) == RBIGNUM_LEN(y)) { + while (i > 0) { + i--; + if (BDIGITS(x)[i] > BDIGITS(y)[i]) { + break; + } + if (BDIGITS(x)[i] < BDIGITS(y)[i]) { + z = x; x = y; y = z; /* swap x y */ + break; + } + } + } + + z = bignew(RBIGNUM_LEN(x), z==0); + zds = BDIGITS(z); + + for (i = 0, num = 0; i < RBIGNUM_LEN(y); i++) { + num += (BDIGIT_DBL_SIGNED)BDIGITS(x)[i] - BDIGITS(y)[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + while (num && i < RBIGNUM_LEN(x)) { + num += BDIGITS(x)[i]; + zds[i++] = BIGLO(num); + num = BIGDN(num); + } + while (i < RBIGNUM_LEN(x)) { + zds[i] = BDIGITS(x)[i]; + i++; + } + + return z; +} + +static VALUE +bigadd(VALUE x, VALUE y, int sign) +{ + VALUE z; + BDIGIT_DBL num; + long i, len; + + sign = (sign == RBIGNUM_SIGN(y)); + if (RBIGNUM_SIGN(x) != sign) { + if (sign) return bigsub(y, x); + return bigsub(x, y); + } + + if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) { + len = RBIGNUM_LEN(x) + 1; + z = x; x = y; y = z; + } + else { + len = RBIGNUM_LEN(y) + 1; + } + z = bignew(len, sign); + + len = RBIGNUM_LEN(x); + for (i = 0, num = 0; i < len; i++) { + num += (BDIGIT_DBL)BDIGITS(x)[i] + BDIGITS(y)[i]; + BDIGITS(z)[i] = BIGLO(num); + num = BIGDN(num); + } + len = RBIGNUM_LEN(y); + while (num && i < len) { + num += BDIGITS(y)[i]; + BDIGITS(z)[i++] = BIGLO(num); + num = BIGDN(num); + } + while (i < len) { + BDIGITS(z)[i] = BDIGITS(y)[i]; + i++; + } + BDIGITS(z)[i] = (BDIGIT)num; + + return z; +} + +/* + * call-seq: + * big + other => Numeric + * + * Adds big and other, returning the result. + */ + +VALUE +rb_big_plus(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + /* fall through */ + case T_BIGNUM: + return bignorm(bigadd(x, y, 1)); + + case T_FLOAT: + return DBL2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y)); + + default: + return rb_num_coerce_bin(x, y, '+'); + } +} + +/* + * call-seq: + * big - other => Numeric + * + * Subtracts other from big, returning the result. + */ + +VALUE +rb_big_minus(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + /* fall through */ + case T_BIGNUM: + return bignorm(bigadd(x, y, 0)); + + case T_FLOAT: + return DBL2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y)); + + default: + return rb_num_coerce_bin(x, y, '-'); + } +} + +static void +rb_big_stop(void *ptr) +{ + VALUE *stop = (VALUE*)ptr; + *stop = Qtrue; +} + +struct big_mul_struct { + VALUE x, y, z, stop; +}; + +static VALUE +bigmul1(void *ptr) +{ + struct big_mul_struct *bms = (struct big_mul_struct*)ptr; + long i, j; + BDIGIT_DBL n = 0; + VALUE x = bms->x, y = bms->y, z = bms->z; + BDIGIT *zds; + + j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1; + zds = BDIGITS(z); + while (j--) zds[j] = 0; + for (i = 0; i < RBIGNUM_LEN(x); i++) { + BDIGIT_DBL dd; + if (bms->stop) return Qnil; + dd = BDIGITS(x)[i]; + if (dd == 0) continue; + n = 0; + for (j = 0; j < RBIGNUM_LEN(y); j++) { + BDIGIT_DBL ee = n + (BDIGIT_DBL)dd * BDIGITS(y)[j]; + n = zds[i + j] + ee; + if (ee) zds[i + j] = BIGLO(n); + n = BIGDN(n); + } + if (n) { + zds[i + j] = n; + } + } + return z; +} + +static VALUE +rb_big_mul0(VALUE x, VALUE y) +{ + struct big_mul_struct bms; + volatile VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + return DBL2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y)); + + default: + return rb_num_coerce_bin(x, y, '*'); + } + + bms.x = x; + bms.y = y; + bms.z = bignew(RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); + bms.stop = Qfalse; + + if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) { + z = rb_thread_blocking_region(bigmul1, &bms, rb_big_stop, &bms.stop); + } + else { + z = bigmul1(&bms); + } + + return z; +} + +/* + * call-seq: + * big * other => Numeric + * + * Multiplies big and other, returning the result. + */ + +VALUE +rb_big_mul(VALUE x, VALUE y) +{ + return bignorm(rb_big_mul0(x, y)); +} + +struct big_div_struct { + long nx, ny; + BDIGIT *yds, *zds; + VALUE stop; +}; + +static VALUE +bigdivrem1(void *ptr) +{ + struct big_div_struct *bds = (struct big_div_struct*)ptr; + long nx = bds->nx, ny = bds->ny; + long i, j, nyzero; + BDIGIT *yds = bds->yds, *zds = bds->zds; + BDIGIT_DBL t2; + BDIGIT_DBL_SIGNED num; + BDIGIT q; + + j = nx==ny?nx+1:nx; + for (nyzero = 0; !yds[nyzero]; nyzero++); + do { + if (bds->stop) return Qnil; + if (zds[j] == yds[ny-1]) q = BIGRAD-1; + else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]); + if (q) { + i = nyzero; num = 0; t2 = 0; + do { /* multiply and subtract */ + BDIGIT_DBL ee; + t2 += (BDIGIT_DBL)yds[i] * q; + ee = num - BIGLO(t2); + num = (BDIGIT_DBL)zds[j - ny + i] + ee; + if (ee) zds[j - ny + i] = BIGLO(num); + num = BIGDN(num); + t2 = BIGDN(t2); + } while (++i < ny); + num += zds[j - ny + i] - t2;/* borrow from high digit; don't update */ + while (num) { /* "add back" required */ + i = 0; num = 0; q--; + do { + BDIGIT_DBL ee = num + yds[i]; + num = (BDIGIT_DBL)zds[j - ny + i] + ee; + if (ee) zds[j - ny + i] = BIGLO(num); + num = BIGDN(num); + } while (++i < ny); + num--; + } + } + zds[j] = q; + } while (--j >= ny); + return Qnil; +} + +static VALUE +bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp) +{ + struct big_div_struct bds; + long nx = RBIGNUM_LEN(x), ny = RBIGNUM_LEN(y); + long i, j; + volatile VALUE yy, z; + BDIGIT *xds, *yds, *zds, *tds; + BDIGIT_DBL t2; + BDIGIT dd, q; + + if (BIGZEROP(y)) rb_num_zerodiv(); + yds = BDIGITS(y); + if (nx < ny || (nx == ny && BDIGITS(x)[nx - 1] < BDIGITS(y)[ny - 1])) { + if (divp) *divp = rb_int2big(0); + if (modp) *modp = x; + return Qnil; + } + xds = BDIGITS(x); + if (ny == 1) { + dd = yds[0]; + z = rb_big_clone(x); + zds = BDIGITS(z); + t2 = 0; i = nx; + while (i--) { + t2 = BIGUP(t2) + zds[i]; + zds[i] = (BDIGIT)(t2 / dd); + t2 %= dd; + } + RBIGNUM_SET_SIGN(z, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); + if (modp) { + *modp = rb_uint2big((VALUE)t2); + RBIGNUM_SET_SIGN(*modp, RBIGNUM_SIGN(x)); + } + if (divp) *divp = z; + return Qnil; + } + z = bignew(nx==ny?nx+2:nx+1, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y)); + zds = BDIGITS(z); + if (nx==ny) zds[nx+1] = 0; + while (!yds[ny-1]) ny--; + + dd = 0; + q = yds[ny-1]; + while ((q & (1UL<<(BITSPERDIG-1))) == 0) { + q <<= 1UL; + dd++; + } + if (dd) { + yy = rb_big_clone(y); + tds = BDIGITS(yy); + j = 0; + t2 = 0; + while (j 10000 || RBIGNUM_LEN(y) > 10000) { + rb_thread_blocking_region(bigdivrem1, &bds, rb_big_stop, &bds.stop); + } + else { + bigdivrem1(&bds); + } + + if (divp) { /* move quotient down in z */ + *divp = rb_big_clone(z); + zds = BDIGITS(*divp); + j = (nx==ny ? nx+2 : nx+1) - ny; + for (i = 0;i < j;i++) zds[i] = zds[i+ny]; + if (!zds[i-1]) i--; + RBIGNUM_SET_LEN(*divp, i); + } + if (modp) { /* normalize remainder */ + *modp = rb_big_clone(z); + zds = BDIGITS(*modp); + while (--ny && !zds[ny]); ++ny; + if (dd) { + t2 = 0; i = ny; + while(i--) { + t2 = (t2 | zds[i]) >> dd; + q = zds[i]; + zds[i] = BIGLO(t2); + t2 = BIGUP(q); + } + } + if (!zds[ny-1]) ny--; + RBIGNUM_SET_LEN(*modp, ny); + RBIGNUM_SET_SIGN(*modp, RBIGNUM_SIGN(x)); + } + return z; +} + +static void +bigdivmod(VALUE x, VALUE y, VALUE *divp, VALUE *modp) +{ + VALUE mod; + + bigdivrem(x, y, divp, &mod); + if (RBIGNUM_SIGN(x) != RBIGNUM_SIGN(y) && !BIGZEROP(mod)) { + if (divp) *divp = bigadd(*divp, rb_int2big(1), 0); + if (modp) *modp = bigadd(mod, y, 1); + } + else if (modp) { + *modp = mod; + } +} + + +static VALUE +rb_big_divide(VALUE x, VALUE y, ID op) +{ + VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + { + double div = rb_big2dbl(x) / RFLOAT_VALUE(y); + if (op == '/') { + return DBL2NUM(div); + } + else { + return rb_dbl2big(div); + } + } + + default: + return rb_num_coerce_bin(x, y, op); + } + bigdivmod(x, y, &z, 0); + + return bignorm(z); +} + +/* + * call-seq: + * big / other => Numeric + * + * Divides big by other, returning the result. + */ + +VALUE +rb_big_div(VALUE x, VALUE y) +{ + return rb_big_divide(x, y, '/'); +} + +VALUE +rb_big_idiv(VALUE x, VALUE y) +{ + return rb_big_divide(x, y, rb_intern("div")); +} + +/* + * call-seq: + * big % other => Numeric + * big.modulo(other) => Numeric + * + * Returns big modulo other. See Numeric.divmod for more + * information. + */ + +VALUE +rb_big_modulo(VALUE x, VALUE y) +{ + VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + default: + return rb_num_coerce_bin(x, y, '%'); + } + bigdivmod(x, y, 0, &z); + + return bignorm(z); +} + +/* + * call-seq: + * big.remainder(numeric) => number + * + * Returns the remainder after dividing big by numeric. + * + * -1234567890987654321.remainder(13731) #=> -6966 + * -1234567890987654321.remainder(13731.24) #=> -9906.22531493148 + */ +static VALUE +rb_big_remainder(VALUE x, VALUE y) +{ + VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + default: + return rb_num_coerce_bin(x, y, rb_intern("remainder")); + } + bigdivrem(x, y, 0, &z); + + return bignorm(z); +} + +/* + * call-seq: + * big.divmod(numeric) => array + * + * See Numeric#divmod. + * + */ +VALUE +rb_big_divmod(VALUE x, VALUE y) +{ + VALUE div, mod; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + default: + return rb_num_coerce_bin(x, y, rb_intern("divmod")); + } + bigdivmod(x, y, &div, &mod); + + return rb_assoc_new(bignorm(div), bignorm(mod)); +} + +static int +bdigbitsize(BDIGIT x) +{ + int size = 1; + int nb = BITSPERDIG / 2; + BDIGIT bits = (~0 << nb); + + if (!x) return 0; + while (x > 1) { + if (x & bits) { + size += nb; + x >>= nb; + } + x &= ~bits; + nb /= 2; + bits >>= nb; + } + + return size; +} + +static VALUE big_lshift(VALUE, unsigned long); +static VALUE big_rshift(VALUE, unsigned long); + +static VALUE big_shift(VALUE x, int n) +{ + if (n < 0) + return big_lshift(x, (unsigned int)-n); + else if (n > 0) + return big_rshift(x, (unsigned int)n); + return x; +} + +/* + * call-seq: + * big.fdiv(numeric) -> float + * + * Returns the floating point result of dividing big by + * numeric. + * + * -1234567890987654321.fdiv(13731) #=> -89910996357705.5 + * -1234567890987654321.fdiv(13731.24) #=> -89909424858035.7 + * + */ + +static VALUE +rb_big_fdiv(VALUE x, VALUE y) +{ + double dx = big2dbl(x); + double dy; + + if (isinf(dx)) { +#define DBL_BIGDIG ((DBL_MANT_DIG + BITSPERDIG) / BITSPERDIG) + VALUE z; + int ex, ey; + + ex = (RBIGNUM_LEN(bigtrunc(x)) - 1) * BITSPERDIG; + ex += bdigbitsize(BDIGITS(x)[RBIGNUM_LEN(x) - 1]); + ex -= 2 * DBL_BIGDIG * BITSPERDIG; + if (ex) x = big_shift(x, ex); + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + case T_BIGNUM: { + ey = (RBIGNUM_LEN(bigtrunc(y)) - 1) * BITSPERDIG; + ey += bdigbitsize(BDIGITS(y)[RBIGNUM_LEN(y) - 1]); + ey -= DBL_BIGDIG * BITSPERDIG; + if (ey) y = big_shift(y, ey); + bignum: + bigdivrem(x, y, &z, 0); + return DBL2NUM(ldexp(big2dbl(z), ex - ey)); + } + case T_FLOAT: + if (isnan(RFLOAT_VALUE(y))) return y; + y = dbl2big(ldexp(frexp(RFLOAT_VALUE(y), &ey), DBL_MANT_DIG)); + ey -= DBL_MANT_DIG; + goto bignum; + } + } + switch (TYPE(y)) { + case T_FIXNUM: + dy = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + dy = rb_big2dbl(y); + break; + + case T_FLOAT: + dy = RFLOAT_VALUE(y); + break; + + default: + return rb_num_coerce_bin(x, y, rb_intern("fdiv")); + } + return DBL2NUM(dx / dy); +} + +static VALUE +bigsqr(VALUE x) +{ + long len = RBIGNUM_LEN(x), k = len / 2, i; + VALUE a, b, a2, z; + BDIGIT_DBL num; + + if (len < 4000 / BITSPERDIG) { + return bigtrunc(rb_big_mul0(x, x)); + } + + a = bignew(len - k, 1); + MEMCPY(BDIGITS(a), BDIGITS(x) + k, BDIGIT, len - k); + b = bignew(k, 1); + MEMCPY(BDIGITS(b), BDIGITS(x), BDIGIT, k); + + a2 = bigtrunc(bigsqr(a)); + z = bigsqr(b); + rb_big_realloc(z, (len = 2 * k + RBIGNUM_LEN(a2)) + 1); + while (RBIGNUM_LEN(z) < 2 * k) { + BDIGITS(z)[RBIGNUM_LEN(z)] = 0; + RBIGNUM_SET_LEN(z, RBIGNUM_LEN(z)+1); + } + MEMCPY(BDIGITS(z) + 2 * k, BDIGITS(a2), BDIGIT, RBIGNUM_LEN(a2)); + RBIGNUM_SET_LEN(z, len); + a2 = bigtrunc(rb_big_mul0(a, b)); + len = RBIGNUM_LEN(a2); + for (i = 0, num = 0; i < len; i++) { + num += (BDIGIT_DBL)BDIGITS(z)[i + k] + ((BDIGIT_DBL)BDIGITS(a2)[i] << 1); + BDIGITS(z)[i + k] = BIGLO(num); + num = BIGDN(num); + } + if (num) { + len = RBIGNUM_LEN(z); + for (i += k; i < len && num; ++i) { + num += (BDIGIT_DBL)BDIGITS(z)[i]; + BDIGITS(z)[i] = BIGLO(num); + num = BIGDN(num); + } + if (num) { + BDIGITS(z)[RBIGNUM_LEN(z)] = BIGLO(num); + RBIGNUM_SET_LEN(z, RBIGNUM_LEN(z)+1); + } + } + return bigtrunc(z); +} + +/* + * call-seq: + * big ** exponent => numeric + * + * Raises _big_ to the _exponent_ power (which may be an integer, float, + * or anything that will coerce to a number). The result may be + * a Fixnum, Bignum, or Float + * + * 123456789 ** 2 #=> 15241578750190521 + * 123456789 ** 1.2 #=> 5126464716.09932 + * 123456789 ** -2 #=> 6.5610001194102e-17 + */ + +VALUE +rb_big_pow(VALUE x, VALUE y) +{ + double d; + SIGNED_VALUE yy; + + if (y == INT2FIX(0)) return INT2FIX(1); + switch (TYPE(y)) { + case T_FLOAT: + d = RFLOAT_VALUE(y); + break; + + case T_BIGNUM: + rb_warn("in a**b, b may be too big"); + d = rb_big2dbl(y); + break; + + case T_FIXNUM: + yy = FIX2LONG(y); + + if (yy < 0) + return rb_funcall(rb_rational_raw1(x), rb_intern("**"), 1, y); + else { + VALUE z = 0; + SIGNED_VALUE mask; + const long BIGLEN_LIMIT = 1024*1024 / SIZEOF_BDIGITS; + + if ((RBIGNUM_LEN(x) > BIGLEN_LIMIT) || + (RBIGNUM_LEN(x) > BIGLEN_LIMIT / yy)) { + rb_warn("in a**b, b may be too big"); + d = (double)yy; + break; + } + for (mask = FIXNUM_MAX + 1; mask; mask >>= 1) { + if (z) z = bigtrunc(bigsqr(z)); + if (yy & mask) { + z = z ? bigtrunc(rb_big_mul0(z, x)) : x; + } + } + return bignorm(z); + } + /* NOTREACHED */ + break; + + default: + return rb_num_coerce_bin(x, y, rb_intern("**")); + } + return DBL2NUM(pow(rb_big2dbl(x), d)); +} + +static VALUE +bit_coerce(VALUE x) +{ + while (!FIXNUM_P(x) && TYPE(x) != T_BIGNUM) { + if (TYPE(x) == T_FLOAT) { + rb_raise(rb_eTypeError, "can't convert Float into Integer"); + } + x = rb_to_int(x); + } + return x; +} + +/* + * call-seq: + * big & numeric => integer + * + * Performs bitwise +and+ between _big_ and _numeric_. + */ + +VALUE +rb_big_and(VALUE xx, VALUE yy) +{ + volatile VALUE x, y, z; + BDIGIT *ds1, *ds2, *zds; + long i, l1, l2; + char sign; + + x = xx; + y = bit_coerce(yy); + if (FIXNUM_P(y)) { + y = rb_int2big(FIX2LONG(y)); + } + if (!RBIGNUM_SIGN(y)) { + y = rb_big_clone(y); + get2comp(y); + } + if (!RBIGNUM_SIGN(x)) { + x = rb_big_clone(x); + get2comp(x); + } + if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) { + l1 = RBIGNUM_LEN(y); + l2 = RBIGNUM_LEN(x); + ds1 = BDIGITS(y); + ds2 = BDIGITS(x); + sign = RBIGNUM_SIGN(y); + } + else { + l1 = RBIGNUM_LEN(x); + l2 = RBIGNUM_LEN(y); + ds1 = BDIGITS(x); + ds2 = BDIGITS(y); + sign = RBIGNUM_SIGN(x); + } + z = bignew(l2, RBIGNUM_SIGN(x) || RBIGNUM_SIGN(y)); + zds = BDIGITS(z); + + for (i=0; i integer + * + * Performs bitwise +or+ between _big_ and _numeric_. + */ + +VALUE +rb_big_or(VALUE xx, VALUE yy) +{ + volatile VALUE x, y, z; + BDIGIT *ds1, *ds2, *zds; + long i, l1, l2; + char sign; + + x = xx; + y = bit_coerce(yy); + if (FIXNUM_P(y)) { + y = rb_int2big(FIX2LONG(y)); + } + + if (!RBIGNUM_SIGN(y)) { + y = rb_big_clone(y); + get2comp(y); + } + if (!RBIGNUM_SIGN(x)) { + x = rb_big_clone(x); + get2comp(x); + } + if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) { + l1 = RBIGNUM_LEN(y); + l2 = RBIGNUM_LEN(x); + ds1 = BDIGITS(y); + ds2 = BDIGITS(x); + sign = RBIGNUM_SIGN(y); + } + else { + l1 = RBIGNUM_LEN(x); + l2 = RBIGNUM_LEN(y); + ds1 = BDIGITS(x); + ds2 = BDIGITS(y); + sign = RBIGNUM_SIGN(x); + } + z = bignew(l2, RBIGNUM_SIGN(x) && RBIGNUM_SIGN(y)); + zds = BDIGITS(z); + + for (i=0; i integer + * + * Performs bitwise +exclusive or+ between _big_ and _numeric_. + */ + +VALUE +rb_big_xor(VALUE xx, VALUE yy) +{ + volatile VALUE x, y; + VALUE z; + BDIGIT *ds1, *ds2, *zds; + long i, l1, l2; + char sign; + + x = xx; + y = bit_coerce(yy); + if (FIXNUM_P(y)) { + y = rb_int2big(FIX2LONG(y)); + } + + if (!RBIGNUM_SIGN(y)) { + y = rb_big_clone(y); + get2comp(y); + } + if (!RBIGNUM_SIGN(x)) { + x = rb_big_clone(x); + get2comp(x); + } + if (RBIGNUM_LEN(x) > RBIGNUM_LEN(y)) { + l1 = RBIGNUM_LEN(y); + l2 = RBIGNUM_LEN(x); + ds1 = BDIGITS(y); + ds2 = BDIGITS(x); + sign = RBIGNUM_SIGN(y); + } + else { + l1 = RBIGNUM_LEN(x); + l2 = RBIGNUM_LEN(y); + ds1 = BDIGITS(x); + ds2 = BDIGITS(y); + sign = RBIGNUM_SIGN(x); + } + RBIGNUM_SET_SIGN(x, RBIGNUM_SIGN(x)?1:0); + RBIGNUM_SET_SIGN(y, RBIGNUM_SIGN(y)?1:0); + z = bignew(l2, !(RBIGNUM_SIGN(x) ^ RBIGNUM_SIGN(y))); + zds = BDIGITS(z); + + for (i=0; i SIZEOF_LONG / SIZEOF_BDIGITS) { + return RBIGNUM_SIGN(x) ? INT2FIX(0) : INT2FIX(-1); + } + return Qnil; +} + +/* + * call-seq: + * big << numeric => integer + * + * Shifts big left _numeric_ positions (right if _numeric_ is negative). + */ + +VALUE +rb_big_lshift(VALUE x, VALUE y) +{ + long shift; + int neg = 0; + + for (;;) { + if (FIXNUM_P(y)) { + shift = FIX2LONG(y); + if (shift < 0) { + neg = 1; + shift = -shift; + } + break; + } + else if (TYPE(y) == T_BIGNUM) { + if (!RBIGNUM_SIGN(y)) { + VALUE t = check_shiftdown(y, x); + if (!NIL_P(t)) return t; + neg = 1; + } + shift = big2ulong(y, "long", Qtrue); + break; + } + y = rb_to_int(y); + } + + x = neg ? big_rshift(x, shift) : big_lshift(x, shift); + return bignorm(x); +} + +static VALUE +big_lshift(VALUE x, unsigned long shift) +{ + BDIGIT *xds, *zds; + long s1 = shift/BITSPERDIG; + int s2 = shift%BITSPERDIG; + VALUE z; + BDIGIT_DBL num = 0; + long len, i; + + len = RBIGNUM_LEN(x); + z = bignew(len+s1+1, RBIGNUM_SIGN(x)); + zds = BDIGITS(z); + for (i=0; i> numeric => integer + * + * Shifts big right _numeric_ positions (left if _numeric_ is negative). + */ + +VALUE +rb_big_rshift(VALUE x, VALUE y) +{ + long shift; + int neg = 0; + + for (;;) { + if (FIXNUM_P(y)) { + shift = FIX2LONG(y); + if (shift < 0) { + neg = 1; + shift = -shift; + } + break; + } + else if (TYPE(y) == T_BIGNUM) { + if (RBIGNUM_SIGN(y)) { + VALUE t = check_shiftdown(y, x); + if (!NIL_P(t)) return t; + } + else { + neg = 1; + } + shift = big2ulong(y, "long", Qtrue); + break; + } + y = rb_to_int(y); + } + + x = neg ? big_lshift(x, shift) : big_rshift(x, shift); + return bignorm(x); +} + +static VALUE +big_rshift(VALUE x, unsigned long shift) +{ + BDIGIT *xds, *zds; + long s1 = shift/BITSPERDIG; + int s2 = shift%BITSPERDIG; + VALUE z; + BDIGIT_DBL num = 0; + long i, j; + volatile VALUE save_x; + + if (s1 > RBIGNUM_LEN(x)) { + if (RBIGNUM_SIGN(x)) + return INT2FIX(0); + else + return INT2FIX(-1); + } + if (!RBIGNUM_SIGN(x)) { + save_x = x = rb_big_clone(x); + get2comp(x); + } + xds = BDIGITS(x); + i = RBIGNUM_LEN(x); j = i - s1; + if (j == 0) { + if (RBIGNUM_SIGN(x)) return INT2FIX(0); + else return INT2FIX(-1); + } + z = bignew(j, RBIGNUM_SIGN(x)); + if (!RBIGNUM_SIGN(x)) { + num = ((BDIGIT_DBL)~0) << BITSPERDIG; + } + zds = BDIGITS(z); + while (i--, j--) { + num = (num | xds[i]) >> s2; + zds[j] = BIGLO(num); + num = BIGUP(xds[i]); + } + if (!RBIGNUM_SIGN(x)) { + get2comp(z); + } + return z; +} + +/* + * call-seq: + * big[n] -> 0, 1 + * + * Bit Reference---Returns the nth bit in the (assumed) binary + * representation of big, where big[0] is the least + * significant bit. + * + * a = 9**15 + * 50.downto(0) do |n| + * print a[n] + * end + * + * produces: + * + * 000101110110100000111000011110010100111100010111001 + * + */ + +static VALUE +rb_big_aref(VALUE x, VALUE y) +{ + BDIGIT *xds; + BDIGIT_DBL num; + VALUE shift; + long i, s1, s2; + + if (TYPE(y) == T_BIGNUM) { + if (!RBIGNUM_SIGN(y)) + return INT2FIX(0); + if (RBIGNUM_LEN(bigtrunc(y)) > DIGSPERLONG) { + out_of_range: + return RBIGNUM_SIGN(x) ? INT2FIX(0) : INT2FIX(1); + } + shift = big2ulong(y, "long", Qfalse); + } + else { + i = NUM2LONG(y); + if (i < 0) return INT2FIX(0); + shift = (VALUE)i; + } + s1 = shift/BITSPERDIG; + s2 = shift%BITSPERDIG; + + if (s1 >= RBIGNUM_LEN(x)) goto out_of_range; + if (!RBIGNUM_SIGN(x)) { + xds = BDIGITS(x); + i = 0; num = 1; + while (num += ~xds[i], ++i <= s1) { + num = BIGDN(num); + } + } + else { + num = BDIGITS(x)[s1]; + } + if (num & ((BDIGIT_DBL)1< fixnum + * + * Compute a hash based on the value of _big_. + */ + +static VALUE +rb_big_hash(VALUE x) +{ + int hash; + + hash = rb_memhash(BDIGITS(x), sizeof(BDIGIT)*RBIGNUM_LEN(x)) ^ RBIGNUM_SIGN(x); + return INT2FIX(hash); +} + +/* + * MISSING: documentation + */ + +static VALUE +rb_big_coerce(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + return rb_assoc_new(rb_int2big(FIX2LONG(y)), x); + } + else if (TYPE(y) == T_BIGNUM) { + return rb_assoc_new(y, x); + } + else { + rb_raise(rb_eTypeError, "can't coerce %s to Bignum", + rb_obj_classname(y)); + } + /* not reached */ + return Qnil; +} + +/* + * call-seq: + * big.abs -> aBignum + * + * Returns the absolute value of big. + * + * -1234567890987654321.abs #=> 1234567890987654321 + */ + +static VALUE +rb_big_abs(VALUE x) +{ + if (!RBIGNUM_SIGN(x)) { + x = rb_big_clone(x); + RBIGNUM_SET_SIGN(x, 1); + } + return x; +} + +/* + * call-seq: + * big.size -> integer + * + * Returns the number of bytes in the machine representation of + * big. + * + * (256**10 - 1).size #=> 12 + * (256**20 - 1).size #=> 20 + * (256**40 - 1).size #=> 40 + */ + +static VALUE +rb_big_size(VALUE big) +{ + return LONG2FIX(RBIGNUM_LEN(big)*SIZEOF_BDIGITS); +} + +/* + * call-seq: + * big.odd? -> true or false + * + * Returns true if big is an odd number. + */ + +static VALUE +rb_big_odd_p(VALUE num) +{ + if (BDIGITS(num)[0] & 1) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * big.even? -> true or false + * + * Returns true if big is an even number. + */ + +static VALUE +rb_big_even_p(VALUE num) +{ + if (BDIGITS(num)[0] & 1) { + return Qfalse; + } + return Qtrue; +} + +/* + * Bignum objects hold integers outside the range of + * Fixnum. Bignum objects are created + * automatically when integer calculations would otherwise overflow a + * Fixnum. When a calculation involving + * Bignum objects returns a result that will fit in a + * Fixnum, the result is automatically converted. + * + * For the purposes of the bitwise operations and [], a + * Bignum is treated as if it were an infinite-length + * bitstring with 2's complement representation. + * + * While Fixnum values are immediate, Bignum + * objects are not---assignment and parameter passing work with + * references to objects, not the objects themselves. + * + */ + +void +Init_Bignum(void) +{ + rb_cBignum = rb_define_class("Bignum", rb_cInteger); + + rb_define_method(rb_cBignum, "to_s", rb_big_to_s, -1); + rb_define_method(rb_cBignum, "coerce", rb_big_coerce, 1); + rb_define_method(rb_cBignum, "-@", rb_big_uminus, 0); + rb_define_method(rb_cBignum, "+", rb_big_plus, 1); + rb_define_method(rb_cBignum, "-", rb_big_minus, 1); + rb_define_method(rb_cBignum, "*", rb_big_mul, 1); + rb_define_method(rb_cBignum, "/", rb_big_div, 1); + rb_define_method(rb_cBignum, "%", rb_big_modulo, 1); + rb_define_method(rb_cBignum, "div", rb_big_idiv, 1); + rb_define_method(rb_cBignum, "divmod", rb_big_divmod, 1); + rb_define_method(rb_cBignum, "modulo", rb_big_modulo, 1); + rb_define_method(rb_cBignum, "remainder", rb_big_remainder, 1); + rb_define_method(rb_cBignum, "fdiv", rb_big_fdiv, 1); + rb_define_method(rb_cBignum, "**", rb_big_pow, 1); + rb_define_method(rb_cBignum, "&", rb_big_and, 1); + rb_define_method(rb_cBignum, "|", rb_big_or, 1); + rb_define_method(rb_cBignum, "^", rb_big_xor, 1); + rb_define_method(rb_cBignum, "~", rb_big_neg, 0); + rb_define_method(rb_cBignum, "<<", rb_big_lshift, 1); + rb_define_method(rb_cBignum, ">>", rb_big_rshift, 1); + rb_define_method(rb_cBignum, "[]", rb_big_aref, 1); + + rb_define_method(rb_cBignum, "<=>", rb_big_cmp, 1); + rb_define_method(rb_cBignum, "==", rb_big_eq, 1); + rb_define_method(rb_cBignum, "===", rb_big_eq, 1); + rb_define_method(rb_cBignum, "eql?", rb_big_eql, 1); + rb_define_method(rb_cBignum, "hash", rb_big_hash, 0); + rb_define_method(rb_cBignum, "to_f", rb_big_to_f, 0); + rb_define_method(rb_cBignum, "abs", rb_big_abs, 0); + rb_define_method(rb_cBignum, "magnitude", rb_big_abs, 0); + rb_define_method(rb_cBignum, "size", rb_big_size, 0); + rb_define_method(rb_cBignum, "odd?", rb_big_odd_p, 0); + rb_define_method(rb_cBignum, "even?", rb_big_even_p, 0); + + power_cache_init(); +} diff --git a/blockinlining.c b/blockinlining.c new file mode 100644 index 0000000..1a6cca3 --- /dev/null +++ b/blockinlining.c @@ -0,0 +1,464 @@ +/********************************************************************** + + blockinlining.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "vm_core.h" + +static VALUE +iseq_special_block(rb_iseq_t *iseq, void *builder) +{ +#if OPT_BLOCKINLINING + VALUE parent = Qfalse; + VALUE iseqval; + + if (iseq->argc > 1 || iseq->arg_simple == 0) { + /* argument check */ + return 0; + } + + if (iseq->cached_special_block_builder) { + if (iseq->cached_special_block_builder == builder) { + return iseq->cached_special_block; + } + else { + return 0; + } + } + else { + iseq->cached_special_block_builder = (void *)1; + } + + if (iseq->parent_iseq) { + parent = iseq->parent_iseq->self; + } + iseqval = rb_iseq_new_with_bopt(iseq->node, iseq->name, iseq->filename, + parent, iseq->type, + GC_GUARDED_PTR(builder)); + if (0) { + printf("%s\n", RSTRING_PTR(rb_iseq_disasm(iseqval))); + } + iseq->cached_special_block = iseqval; + iseq->cached_special_block_builder = builder; + return iseqval; +#else + return 0; +#endif +} + +static NODE * +new_block(NODE * head, NODE * tail) +{ + head = NEW_BLOCK(head); + tail = NEW_BLOCK(tail); + head->nd_next = tail; + return head; +} + +static NODE * +new_ary(NODE * head, NODE * tail) +{ + head = NEW_ARRAY(head); + head->nd_next = tail; + return head; +} + +static NODE * +new_assign(NODE * lnode, NODE * rhs) +{ + switch (nd_type(lnode)) { + case NODE_LASGN:{ + return NEW_NODE(NODE_LASGN, lnode->nd_vid, rhs, lnode->nd_cnt); + /* NEW_LASGN(lnode->nd_vid, rhs); */ + } + case NODE_GASGN:{ + return NEW_GASGN(lnode->nd_vid, rhs); + } + case NODE_DASGN:{ + return NEW_DASGN(lnode->nd_vid, rhs); + } + case NODE_ATTRASGN:{ + NODE *args = 0; + if (lnode->nd_args) { + args = NEW_ARRAY(lnode->nd_args->nd_head); + args->nd_next = NEW_ARRAY(rhs); + args->nd_alen = 2; + } + else { + args = NEW_ARRAY(rhs); + } + + return NEW_ATTRASGN(lnode->nd_recv, + lnode->nd_mid, + args); + } + default: + rb_bug("unimplemented (block inlining): %s", ruby_node_name(nd_type(lnode))); + } + return 0; +} + +static NODE * +build_Integer_times_node(rb_iseq_t *iseq, NODE * node, NODE * lnode, + VALUE param_vars, VALUE local_vars) +{ + /* Special Block for Integer#times + {|e, _self| + _e = e + while(e < _self) + e = _e + redo_point: + BODY + next_point: + _e = _e.succ + end + } + + {|e, _self| + while(e < _self) + BODY + next_point: + e = e.succ + end + } + */ + ID _self; + CONST_ID(_self, "#_self"); + if (iseq->argc == 0) { + ID e; + CONST_ID(e, "#e"); + rb_ary_push(param_vars, ID2SYM(e)); + rb_ary_push(param_vars, ID2SYM(_self)); + iseq->argc += 2; + + node = + NEW_WHILE(NEW_CALL + (NEW_DVAR(e), idLT, new_ary(NEW_DVAR(_self), 0)), + new_block(NEW_OPTBLOCK(node), + NEW_DASGN(e, + NEW_CALL(NEW_DVAR(e), idSucc, 0))), + Qundef); + } + else { + ID _e; + ID e = SYM2ID(rb_ary_entry(param_vars, 0)); + NODE *assign; + + CONST_ID(_e, "#_e"); + rb_ary_push(param_vars, ID2SYM(_self)); + rb_ary_push(local_vars, ID2SYM(_e)); + iseq->argc++; + + if (nd_type(lnode) == NODE_DASGN_CURR) { + assign = NEW_DASGN(e, NEW_DVAR(_e)); + } + else { + assign = new_assign(lnode, NEW_DVAR(_e)); + } + + node = + new_block(NEW_DASGN(_e, NEW_DVAR(e)), + NEW_WHILE(NEW_CALL + (NEW_DVAR(_e), idLT, + new_ary(NEW_DVAR(_self), 0)), + new_block(assign, + new_block(NEW_OPTBLOCK(node), + NEW_DASGN(_e, + NEW_CALL + (NEW_DVAR(_e), + idSucc, 0)))), + Qundef)); + } + return node; +} + +VALUE +invoke_Integer_times_special_block(VALUE num) +{ + rb_thread_t *th = GET_THREAD(); + rb_block_t *orig_block = GC_GUARDED_PTR_REF(th->cfp->lfp[0]); + + if (orig_block && BUILTIN_TYPE(orig_block->iseq) != T_NODE) { + VALUE tsiseqval = iseq_special_block(orig_block->iseq, + build_Integer_times_node); + rb_iseq_t *tsiseq; + VALUE argv[2], val; + + if (tsiseqval) { + rb_block_t block = *orig_block; + GetISeqPtr(tsiseqval, tsiseq); + block.iseq = tsiseq; + th->cfp->lfp[0] = GC_GUARDED_PTR(&block); + argv[0] = INT2FIX(0); + argv[1] = num; + val = rb_yield_values(2, argv); + if (val == Qundef) { + return num; + } + else { + return val; + } + } + } + return Qundef; +} + +static NODE * +build_Range_each_node(rb_iseq_t *iseq, NODE * node, NODE * lnode, + VALUE param_vars, VALUE local_vars, ID mid) +{ + /* Special Block for Range#each + {|e, _last| + _e = e + while _e < _last + e = _e + next_point: + BODY + redo_point: + _e = _e.succ + end + } + {|e, _last| + while e < _last + BODY + redo_point: + e = e.succ + end + } + */ + ID _last; + CONST_ID(_last, "#_last"); + if (iseq->argc == 0) { + ID e; + CONST_ID(e, "#e"); + rb_ary_push(param_vars, ID2SYM(e)); + rb_ary_push(param_vars, ID2SYM(_last)); + iseq->argc += 2; + + node = + NEW_WHILE(NEW_CALL(NEW_DVAR(e), mid, new_ary(NEW_DVAR(_last), 0)), + new_block(NEW_OPTBLOCK(node), + NEW_DASGN(e, + NEW_CALL(NEW_DVAR(e), idSucc, 0))), + Qundef); + } + else { + ID _e; + ID e = SYM2ID(rb_ary_entry(param_vars, 0)); + NODE *assign; + + CONST_ID(_e, "#_e"); + rb_ary_push(param_vars, ID2SYM(_last)); + rb_ary_push(local_vars, ID2SYM(_e)); + iseq->argc++; + + if (nd_type(lnode) == NODE_DASGN_CURR) { + assign = NEW_DASGN(e, NEW_DVAR(_e)); + } + else { + assign = new_assign(lnode, NEW_DVAR(_e)); + } + + node = + new_block(NEW_DASGN(_e, NEW_DVAR(e)), + NEW_WHILE(NEW_CALL + (NEW_DVAR(_e), mid, + new_ary(NEW_DVAR(_last), 0)), + new_block(assign, + new_block(NEW_OPTBLOCK(node), + NEW_DASGN(_e, + NEW_CALL + (NEW_DVAR(_e), + idSucc, 0)))), + Qundef)); + } + return node; +} + +static NODE * +build_Range_each_node_LE(rb_iseq_t *iseq, NODE * node, NODE * lnode, + VALUE param_vars, VALUE local_vars) +{ + return build_Range_each_node(iseq, node, lnode, + param_vars, local_vars, idLE); +} + +static NODE * +build_Range_each_node_LT(rb_iseq_t *iseq, NODE * node, NODE * lnode, + VALUE param_vars, VALUE local_vars) +{ + return build_Range_each_node(iseq, node, lnode, + param_vars, local_vars, idLT); +} + +VALUE +invoke_Range_each_special_block(VALUE range, + VALUE beg, VALUE end, int excl) +{ + rb_thread_t *th = GET_THREAD(); + rb_block_t *orig_block = GC_GUARDED_PTR_REF(th->cfp->lfp[0]); + + if (BUILTIN_TYPE(orig_block->iseq) != T_NODE) { + void *builder = + excl ? build_Range_each_node_LT : build_Range_each_node_LE; + VALUE tsiseqval = iseq_special_block(orig_block->iseq, builder); + rb_iseq_t *tsiseq; + VALUE argv[2]; + + if (tsiseqval) { + VALUE val; + rb_block_t block = *orig_block; + GetISeqPtr(tsiseqval, tsiseq); + block.iseq = tsiseq; + th->cfp->lfp[0] = GC_GUARDED_PTR(&block); + argv[0] = beg; + argv[1] = end; + val = rb_yield_values(2, argv); + if (val == Qundef) { + return range; + } + else { + return val; + } + } + } + return Qundef; +} + + +static NODE * +build_Array_each_node(rb_iseq_t *iseq, NODE * node, NODE * lnode, + VALUE param_vars, VALUE local_vars) +{ + /* Special block for Array#each + ary.each{|e| + BODY + } + => + {|e, _self| + _i = 0 + while _i < _self.length + e = _self[_i] + redo_point: + BODY + next_point: + _i = _i.succ + end + } + + ary.each{ + BODY + } + => + {|_i, _self| + _i = 0 + while _i < _self.length + redo_point: + BODY + next_point: + _i = _i.succ + end + } + */ + + ID _self, _i; + + CONST_ID(_self, "#_self"); + CONST_ID(_i, "#_i"); + if (iseq->argc == 0) { + ID _e; + CONST_ID(_e, "#_e"); + rb_ary_push(param_vars, ID2SYM(_e)); + rb_ary_push(param_vars, ID2SYM(_self)); + iseq->argc += 2; + rb_ary_push(local_vars, ID2SYM(_i)); + + node = + new_block(NEW_DASGN(_i, NEW_LIT(INT2FIX(0))), + NEW_WHILE(NEW_CALL(NEW_DVAR(_i), idLT, + new_ary(NEW_CALL + (NEW_DVAR(_self), idLength, + 0), 0)), + new_block(NEW_OPTBLOCK(node), + NEW_DASGN(_i, + NEW_CALL(NEW_DVAR(_i), + idSucc, 0))), + Qundef)); + } + else { + ID e = SYM2ID(rb_ary_entry(param_vars, 0)); + NODE *assign; + + rb_ary_push(param_vars, ID2SYM(_self)); + iseq->argc++; + rb_ary_push(local_vars, ID2SYM(_i)); + + if (nd_type(lnode) == NODE_DASGN_CURR) { + assign = NEW_DASGN(e, + NEW_CALL(NEW_DVAR(_self), idAREF, + new_ary(NEW_DVAR(_i), 0))); + } + else { + assign = new_assign(lnode, + NEW_CALL(NEW_DVAR(_self), idAREF, + new_ary(NEW_DVAR(_i), 0))); + } + + node = + new_block(NEW_DASGN(_i, NEW_LIT(INT2FIX(0))), + NEW_WHILE(NEW_CALL(NEW_DVAR(_i), idLT, + new_ary(NEW_CALL + (NEW_DVAR(_self), idLength, + 0), 0)), new_block(assign, + new_block + (NEW_OPTBLOCK + (node), + NEW_DASGN + (_i, + NEW_CALL + (NEW_DVAR + (_i), + idSucc, + 0)))), + Qundef)); + } + return node; +} + +VALUE +invoke_Array_each_special_block(VALUE ary) +{ + rb_thread_t *th = GET_THREAD(); + rb_block_t *orig_block = GC_GUARDED_PTR_REF(th->cfp->lfp[0]); + + if (BUILTIN_TYPE(orig_block->iseq) != T_NODE) { + VALUE tsiseqval = iseq_special_block(orig_block->iseq, + build_Array_each_node); + rb_iseq_t *tsiseq; + VALUE argv[2]; + + if (tsiseqval) { + VALUE val; + rb_block_t block = *orig_block; + GetISeqPtr(tsiseqval, tsiseq); + block.iseq = tsiseq; + th->cfp->lfp[0] = GC_GUARDED_PTR(&block); + argv[0] = 0; + argv[1] = ary; + val = rb_yield_values(2, argv); + if (val == Qundef) { + return ary; + } + else { + return val; + } + } + } + return Qundef; +} diff --git a/class.c b/class.c new file mode 100644 index 0000000..4cda7ae --- /dev/null +++ b/class.c @@ -0,0 +1,1006 @@ +/********************************************************************** + + class.c - + + $Author: yugui $ + created at: Tue Aug 10 15:05:44 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "node.h" +#include + +extern st_table *rb_class_tbl; + +static VALUE +class_alloc(VALUE flags, VALUE klass) +{ + rb_classext_t *ext = ALLOC(rb_classext_t); + NEWOBJ(obj, struct RClass); + OBJSETUP(obj, klass, flags); + obj->ptr = ext; + RCLASS_IV_TBL(obj) = 0; + RCLASS_M_TBL(obj) = 0; + RCLASS_SUPER(obj) = 0; + RCLASS_IV_INDEX_TBL(obj) = 0; + return (VALUE)obj; +} + +VALUE +rb_class_boot(VALUE super) +{ + VALUE klass = class_alloc(T_CLASS, rb_cClass); + + RCLASS_SUPER(klass) = super; + RCLASS_M_TBL(klass) = st_init_numtable(); + + OBJ_INFECT(klass, super); + return (VALUE)klass; +} + +void +rb_check_inheritable(VALUE super) +{ + if (TYPE(super) != T_CLASS) { + rb_raise(rb_eTypeError, "superclass must be a Class (%s given)", + rb_obj_classname(super)); + } + if (RBASIC(super)->flags & FL_SINGLETON) { + rb_raise(rb_eTypeError, "can't make subclass of singleton class"); + } +} + +VALUE +rb_class_new(VALUE super) +{ + Check_Type(super, T_CLASS); + rb_check_inheritable(super); + if (super == rb_cClass) { + rb_raise(rb_eTypeError, "can't make subclass of Class"); + } + return rb_class_boot(super); +} + +struct clone_method_data { + st_table *tbl; + VALUE klass; +}; + +VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase); + +static int +clone_method(ID mid, NODE *body, struct clone_method_data *data) +{ + if (body == 0) { + st_insert(data->tbl, mid, 0); + } + else { + NODE *fbody = body->nd_body->nd_body; + + if (nd_type(fbody) == RUBY_VM_METHOD_NODE) { + fbody = NEW_NODE(RUBY_VM_METHOD_NODE, 0, + rb_iseq_clone((VALUE)fbody->nd_body, data->klass), + 0); + } + st_insert(data->tbl, mid, + (st_data_t) + NEW_FBODY( + NEW_METHOD(fbody, + data->klass, /* TODO */ + body->nd_body->nd_noex), + 0)); + } + return ST_CONTINUE; +} + +/* :nodoc: */ +VALUE +rb_mod_init_copy(VALUE clone, VALUE orig) +{ + rb_obj_init_copy(clone, orig); + if (!FL_TEST(CLASS_OF(clone), FL_SINGLETON)) { + RBASIC(clone)->klass = rb_singleton_class_clone(orig); + } + RCLASS_SUPER(clone) = RCLASS_SUPER(orig); + if (RCLASS_IV_TBL(orig)) { + ID id; + + RCLASS_IV_TBL(clone) = st_copy(RCLASS_IV_TBL(orig)); + CONST_ID(id, "__classpath__"); + st_delete(RCLASS_IV_TBL(clone), (st_data_t*)&id, 0); + CONST_ID(id, "__classid__"); + st_delete(RCLASS_IV_TBL(clone), (st_data_t*)&id, 0); + } + if (RCLASS_M_TBL(orig)) { + struct clone_method_data data; + data.tbl = RCLASS_M_TBL(clone) = st_init_numtable(); + data.klass = clone; + st_foreach(RCLASS_M_TBL(orig), clone_method, + (st_data_t)&data); + } + + return clone; +} + +/* :nodoc: */ +VALUE +rb_class_init_copy(VALUE clone, VALUE orig) +{ + if (RCLASS_SUPER(clone) != 0) { + rb_raise(rb_eTypeError, "already initialized class"); + } + if (FL_TEST(orig, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "can't copy singleton class"); + } + return rb_mod_init_copy(clone, orig); +} + +VALUE +rb_singleton_class_clone(VALUE obj) +{ + VALUE klass = RBASIC(obj)->klass; + + if (!FL_TEST(klass, FL_SINGLETON)) + return klass; + else { + struct clone_method_data data; + /* copy singleton(unnamed) class */ + VALUE clone = class_alloc(RBASIC(klass)->flags, 0); + + if (BUILTIN_TYPE(obj) == T_CLASS) { + RBASIC(clone)->klass = (VALUE)clone; + } + else { + RBASIC(clone)->klass = rb_singleton_class_clone(klass); + } + + RCLASS_SUPER(clone) = RCLASS_SUPER(klass); + if (RCLASS_IV_TBL(klass)) { + RCLASS_IV_TBL(clone) = st_copy(RCLASS_IV_TBL(klass)); + } + RCLASS_M_TBL(clone) = st_init_numtable(); + data.tbl = RCLASS_M_TBL(clone); + data.klass = (VALUE)clone; + st_foreach(RCLASS_M_TBL(klass), clone_method, + (st_data_t)&data); + rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone); + FL_SET(clone, FL_SINGLETON); + return (VALUE)clone; + } +} + +void +rb_singleton_class_attached(VALUE klass, VALUE obj) +{ + if (FL_TEST(klass, FL_SINGLETON)) { + ID attached; + if (!RCLASS_IV_TBL(klass)) { + RCLASS_IV_TBL(klass) = st_init_numtable(); + } + CONST_ID(attached, "__attached__"); + st_insert(RCLASS_IV_TBL(klass), attached, obj); + } +} + + +static VALUE +make_metametaclass(VALUE metaclass) +{ + VALUE metametaclass, super_of_metaclass; + + if (RBASIC(metaclass)->klass == metaclass) { /* for meta^(n)-class of Class */ + metametaclass = rb_class_boot(Qnil); + RBASIC(metametaclass)->klass = metametaclass; + } + else { + metametaclass = rb_class_boot(Qnil); + RBASIC(metametaclass)->klass = + (RBASIC(RBASIC(metaclass)->klass)->klass == RBASIC(metaclass)->klass) + ? make_metametaclass(RBASIC(metaclass)->klass) + : RBASIC(RBASIC(metaclass)->klass)->klass; + } + + FL_SET(metametaclass, FL_SINGLETON); + rb_singleton_class_attached(metametaclass, metaclass); + RBASIC(metaclass)->klass = metametaclass; + + super_of_metaclass = RCLASS_SUPER(metaclass); + while (FL_TEST(super_of_metaclass, T_ICLASS)) { + super_of_metaclass = RCLASS_SUPER(super_of_metaclass); + } + RCLASS_SUPER(metametaclass) = + rb_iv_get(RBASIC(super_of_metaclass)->klass, "__attached__") == super_of_metaclass + ? RBASIC(super_of_metaclass)->klass + : make_metametaclass(super_of_metaclass); + OBJ_INFECT(metametaclass, RCLASS_SUPER(metametaclass)); + + return metametaclass; +} + + +VALUE +rb_make_metaclass(VALUE obj, VALUE super) +{ + if (BUILTIN_TYPE(obj) == T_CLASS && FL_TEST(obj, FL_SINGLETON)) { /* obj is a metaclass */ + return make_metametaclass(obj); + } + else { + VALUE metasuper; + VALUE klass = rb_class_boot(super); + + FL_SET(klass, FL_SINGLETON); + RBASIC(obj)->klass = klass; + rb_singleton_class_attached(klass, obj); + + metasuper = RBASIC(rb_class_real(super))->klass; + /* metaclass of a superclass may be NULL at boot time */ + if (metasuper) { + RBASIC(klass)->klass = metasuper; + } + return klass; + } +} + +VALUE +rb_define_class_id(ID id, VALUE super) +{ + VALUE klass; + + if (!super) super = rb_cObject; + klass = rb_class_new(super); + rb_make_metaclass(klass, RBASIC(super)->klass); + + return klass; +} + +VALUE +rb_class_inherited(VALUE super, VALUE klass) +{ + ID inherited; + if (!super) super = rb_cObject; + CONST_ID(inherited, "inherited"); + return rb_funcall(super, inherited, 1, klass); +} + +VALUE +rb_define_class(const char *name, VALUE super) +{ + VALUE klass; + ID id; + + id = rb_intern(name); + if (rb_const_defined(rb_cObject, id)) { + klass = rb_const_get(rb_cObject, id); + if (TYPE(klass) != T_CLASS) { + rb_raise(rb_eTypeError, "%s is not a class", name); + } + if (rb_class_real(RCLASS_SUPER(klass)) != super) { + rb_name_error(id, "%s is already defined", name); + } + return klass; + } + if (!super) { + rb_warn("no super class for `%s', Object assumed", name); + } + klass = rb_define_class_id(id, super); + st_add_direct(rb_class_tbl, id, klass); + rb_name_class(klass, id); + rb_const_set(rb_cObject, id, klass); + rb_class_inherited(super, klass); + + return klass; +} + +VALUE +rb_define_class_under(VALUE outer, const char *name, VALUE super) +{ + return rb_define_class_id_under(outer, rb_intern(name), super); +} + +VALUE +rb_define_class_id_under(VALUE outer, ID id, VALUE super) +{ + VALUE klass; + + if (rb_const_defined_at(outer, id)) { + klass = rb_const_get_at(outer, id); + if (TYPE(klass) != T_CLASS) { + rb_raise(rb_eTypeError, "%s is not a class", rb_id2name(id)); + } + if (rb_class_real(RCLASS_SUPER(klass)) != super) { + rb_name_error(id, "%s is already defined", rb_id2name(id)); + } + return klass; + } + if (!super) { + rb_warn("no super class for `%s::%s', Object assumed", + rb_class2name(outer), rb_id2name(id)); + } + klass = rb_define_class_id(id, super); + rb_set_class_path_string(klass, outer, rb_id2str(id)); + rb_const_set(outer, id, klass); + rb_class_inherited(super, klass); + + return klass; +} + +VALUE +rb_module_new(void) +{ + VALUE mdl = class_alloc(T_MODULE, rb_cModule); + + RCLASS_M_TBL(mdl) = st_init_numtable(); + + return (VALUE)mdl; +} + +VALUE +rb_define_module_id(ID id) +{ + VALUE mdl; + + mdl = rb_module_new(); + rb_name_class(mdl, id); + + return mdl; +} + +VALUE +rb_define_module(const char *name) +{ + VALUE module; + ID id; + + id = rb_intern(name); + if (rb_const_defined(rb_cObject, id)) { + module = rb_const_get(rb_cObject, id); + if (TYPE(module) == T_MODULE) + return module; + rb_raise(rb_eTypeError, "%s is not a module", rb_obj_classname(module)); + } + module = rb_define_module_id(id); + st_add_direct(rb_class_tbl, id, module); + rb_const_set(rb_cObject, id, module); + + return module; +} + +VALUE +rb_define_module_under(VALUE outer, const char *name) +{ + return rb_define_module_id_under(outer, rb_intern(name)); +} + +VALUE +rb_define_module_id_under(VALUE outer, ID id) +{ + VALUE module; + + if (rb_const_defined_at(outer, id)) { + module = rb_const_get_at(outer, id); + if (TYPE(module) == T_MODULE) + return module; + rb_raise(rb_eTypeError, "%s::%s is not a module", + rb_class2name(outer), rb_obj_classname(module)); + } + module = rb_define_module_id(id); + rb_const_set(outer, id, module); + rb_set_class_path_string(module, outer, rb_id2str(id)); + + return module; +} + +static VALUE +include_class_new(VALUE module, VALUE super) +{ + VALUE klass = class_alloc(T_ICLASS, rb_cClass); + + if (BUILTIN_TYPE(module) == T_ICLASS) { + module = RBASIC(module)->klass; + } + if (!RCLASS_IV_TBL(module)) { + RCLASS_IV_TBL(module) = st_init_numtable(); + } + RCLASS_IV_TBL(klass) = RCLASS_IV_TBL(module); + RCLASS_M_TBL(klass) = RCLASS_M_TBL(module); + RCLASS_SUPER(klass) = super; + if (TYPE(module) == T_ICLASS) { + RBASIC(klass)->klass = RBASIC(module)->klass; + } + else { + RBASIC(klass)->klass = module; + } + OBJ_INFECT(klass, module); + OBJ_INFECT(klass, super); + + return (VALUE)klass; +} + +void +rb_include_module(VALUE klass, VALUE module) +{ + VALUE p, c; + int changed = 0; + + rb_frozen_class_p(klass); + if (!OBJ_UNTRUSTED(klass)) { + rb_secure(4); + } + + if (TYPE(module) != T_MODULE) { + Check_Type(module, T_MODULE); + } + + OBJ_INFECT(klass, module); + c = klass; + while (module) { + int superclass_seen = Qfalse; + + if (RCLASS_M_TBL(klass) == RCLASS_M_TBL(module)) + rb_raise(rb_eArgError, "cyclic include detected"); + /* ignore if the module included already in superclasses */ + for (p = RCLASS_SUPER(klass); p; p = RCLASS_SUPER(p)) { + switch (BUILTIN_TYPE(p)) { + case T_ICLASS: + if (RCLASS_M_TBL(p) == RCLASS_M_TBL(module)) { + if (!superclass_seen) { + c = p; /* move insertion point */ + } + goto skip; + } + break; + case T_CLASS: + superclass_seen = Qtrue; + break; + } + } + c = RCLASS_SUPER(c) = include_class_new(module, RCLASS_SUPER(c)); + changed = 1; + skip: + module = RCLASS_SUPER(module); + } + if (changed) rb_clear_cache(); +} + +/* + * call-seq: + * mod.included_modules -> array + * + * Returns the list of modules included in mod. + * + * module Mixin + * end + * + * module Outer + * include Mixin + * end + * + * Mixin.included_modules #=> [] + * Outer.included_modules #=> [Mixin] + */ + +VALUE +rb_mod_included_modules(VALUE mod) +{ + VALUE ary = rb_ary_new(); + VALUE p; + + for (p = RCLASS_SUPER(mod); p; p = RCLASS_SUPER(p)) { + if (BUILTIN_TYPE(p) == T_ICLASS) { + rb_ary_push(ary, RBASIC(p)->klass); + } + } + return ary; +} + +/* + * call-seq: + * mod.include?(module) => true or false + * + * Returns true if module is included in + * mod or one of mod's ancestors. + * + * module A + * end + * class B + * include A + * end + * class C < B + * end + * B.include?(A) #=> true + * C.include?(A) #=> true + * A.include?(A) #=> false + */ + +VALUE +rb_mod_include_p(VALUE mod, VALUE mod2) +{ + VALUE p; + + Check_Type(mod2, T_MODULE); + for (p = RCLASS_SUPER(mod); p; p = RCLASS_SUPER(p)) { + if (BUILTIN_TYPE(p) == T_ICLASS) { + if (RBASIC(p)->klass == mod2) return Qtrue; + } + } + return Qfalse; +} + +/* + * call-seq: + * mod.ancestors -> array + * + * Returns a list of modules included in mod (including + * mod itself). + * + * module Mod + * include Math + * include Comparable + * end + * + * Mod.ancestors #=> [Mod, Comparable, Math] + * Math.ancestors #=> [Math] + */ + +VALUE +rb_mod_ancestors(VALUE mod) +{ + VALUE p, ary = rb_ary_new(); + + for (p = mod; p; p = RCLASS_SUPER(p)) { + if (FL_TEST(p, FL_SINGLETON)) + continue; + if (BUILTIN_TYPE(p) == T_ICLASS) { + rb_ary_push(ary, RBASIC(p)->klass); + } + else { + rb_ary_push(ary, p); + } + } + return ary; +} + +#define VISI(x) ((x)&NOEX_MASK) +#define VISI_CHECK(x,f) (VISI(x) == (f)) + +static int +ins_methods_push(ID name, long type, VALUE ary, long visi) +{ + if (type == -1) return ST_CONTINUE; + + switch (visi) { + case NOEX_PRIVATE: + case NOEX_PROTECTED: + case NOEX_PUBLIC: + visi = (type == visi); + break; + default: + visi = (type != NOEX_PRIVATE); + break; + } + if (visi) { + rb_ary_push(ary, ID2SYM(name)); + } + return ST_CONTINUE; +} + +static int +ins_methods_i(ID name, long type, VALUE ary) +{ + return ins_methods_push(name, type, ary, -1); /* everything but private */ +} + +static int +ins_methods_prot_i(ID name, long type, VALUE ary) +{ + return ins_methods_push(name, type, ary, NOEX_PROTECTED); +} + +static int +ins_methods_priv_i(ID name, long type, VALUE ary) +{ + return ins_methods_push(name, type, ary, NOEX_PRIVATE); +} + +static int +ins_methods_pub_i(ID name, long type, VALUE ary) +{ + return ins_methods_push(name, type, ary, NOEX_PUBLIC); +} + +static int +method_entry(ID key, NODE *body, st_table *list) +{ + long type; + + if (key == ID_ALLOCATOR) { + return ST_CONTINUE; + } + + if (!st_lookup(list, key, 0)) { + if (body ==0 || !body->nd_body->nd_body) { + type = -1; /* none */ + } + else { + type = VISI(body->nd_body->nd_noex); + } + st_add_direct(list, key, type); + } + return ST_CONTINUE; +} + +static VALUE +class_instance_method_list(int argc, VALUE *argv, VALUE mod, int (*func) (ID, long, VALUE)) +{ + VALUE ary; + int recur; + st_table *list; + + if (argc == 0) { + recur = Qtrue; + } + else { + VALUE r; + rb_scan_args(argc, argv, "01", &r); + recur = RTEST(r); + } + + list = st_init_numtable(); + for (; mod; mod = RCLASS_SUPER(mod)) { + st_foreach(RCLASS_M_TBL(mod), method_entry, (st_data_t)list); + if (BUILTIN_TYPE(mod) == T_ICLASS) continue; + if (FL_TEST(mod, FL_SINGLETON)) continue; + if (!recur) break; + } + ary = rb_ary_new(); + st_foreach(list, func, ary); + st_free_table(list); + + return ary; +} + +/* + * call-seq: + * mod.instance_methods(include_super=true) => array + * + * Returns an array containing the names of instance methods that is callable + * from outside in the receiver. For a module, these are the public methods; + * for a class, they are the instance (not singleton) methods. With no + * argument, or with an argument that is false, the + * instance methods in mod are returned, otherwise the methods + * in mod and mod's superclasses are returned. + * + * module A + * def method1() end + * end + * class B + * def method2() end + * end + * class C < B + * def method3() end + * end + * + * A.instance_methods #=> [:method1] + * B.instance_methods(false) #=> [:method2] + * C.instance_methods(false) #=> [:method3] + * C.instance_methods(true).length #=> 43 + */ + +VALUE +rb_class_instance_methods(int argc, VALUE *argv, VALUE mod) +{ + return class_instance_method_list(argc, argv, mod, ins_methods_i); +} + +/* + * call-seq: + * mod.protected_instance_methods(include_super=true) => array + * + * Returns a list of the protected instance methods defined in + * mod. If the optional parameter is not false, the + * methods of any ancestors are included. + */ + +VALUE +rb_class_protected_instance_methods(int argc, VALUE *argv, VALUE mod) +{ + return class_instance_method_list(argc, argv, mod, ins_methods_prot_i); +} + +/* + * call-seq: + * mod.private_instance_methods(include_super=true) => array + * + * Returns a list of the private instance methods defined in + * mod. If the optional parameter is not false, the + * methods of any ancestors are included. + * + * module Mod + * def method1() end + * private :method1 + * def method2() end + * end + * Mod.instance_methods #=> [:method2] + * Mod.private_instance_methods #=> [:method1] + */ + +VALUE +rb_class_private_instance_methods(int argc, VALUE *argv, VALUE mod) +{ + return class_instance_method_list(argc, argv, mod, ins_methods_priv_i); +} + +/* + * call-seq: + * mod.public_instance_methods(include_super=true) => array + * + * Returns a list of the public instance methods defined in mod. + * If the optional parameter is not false, the methods of + * any ancestors are included. + */ + +VALUE +rb_class_public_instance_methods(int argc, VALUE *argv, VALUE mod) +{ + return class_instance_method_list(argc, argv, mod, ins_methods_pub_i); +} + +/* + * call-seq: + * obj.singleton_methods(all=true) => array + * + * Returns an array of the names of singleton methods for obj. + * If the optional all parameter is true, the list will include + * methods in modules included in obj. + * + * module Other + * def three() end + * end + * + * class Single + * def Single.four() end + * end + * + * a = Single.new + * + * def a.one() + * end + * + * class << a + * include Other + * def two() + * end + * end + * + * Single.singleton_methods #=> [:four] + * a.singleton_methods(false) #=> [:two, :one] + * a.singleton_methods #=> [:two, :one, :three] + */ + +VALUE +rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj) +{ + VALUE recur, ary, klass; + st_table *list; + + if (argc == 0) { + recur = Qtrue; + } + else { + rb_scan_args(argc, argv, "01", &recur); + } + klass = CLASS_OF(obj); + list = st_init_numtable(); + if (klass && FL_TEST(klass, FL_SINGLETON)) { + st_foreach(RCLASS_M_TBL(klass), method_entry, (st_data_t)list); + klass = RCLASS_SUPER(klass); + } + if (RTEST(recur)) { + while (klass && (FL_TEST(klass, FL_SINGLETON) || TYPE(klass) == T_ICLASS)) { + st_foreach(RCLASS_M_TBL(klass), method_entry, (st_data_t)list); + klass = RCLASS_SUPER(klass); + } + } + ary = rb_ary_new(); + st_foreach(list, ins_methods_i, ary); + st_free_table(list); + + return ary; +} + +void +rb_define_method_id(VALUE klass, ID name, VALUE (*func)(ANYARGS), int argc) +{ + rb_add_method(klass, name, NEW_CFUNC(func,argc), NOEX_PUBLIC); +} + +void +rb_define_method(VALUE klass, const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_add_method(klass, rb_intern(name), NEW_CFUNC(func, argc), NOEX_PUBLIC); +} + +void +rb_define_protected_method(VALUE klass, const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_add_method(klass, rb_intern(name), NEW_CFUNC(func, argc), NOEX_PROTECTED); +} + +void +rb_define_private_method(VALUE klass, const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_add_method(klass, rb_intern(name), NEW_CFUNC(func, argc), NOEX_PRIVATE); +} + +void +rb_undef_method(VALUE klass, const char *name) +{ + rb_add_method(klass, rb_intern(name), 0, NOEX_UNDEF); +} + +#define SPECIAL_SINGLETON(x,c) do {\ + if (obj == (x)) {\ + return c;\ + }\ +} while (0) + +VALUE +rb_singleton_class(VALUE obj) +{ + VALUE klass; + + if (FIXNUM_P(obj) || SYMBOL_P(obj)) { + rb_raise(rb_eTypeError, "can't define singleton"); + } + if (rb_special_const_p(obj)) { + SPECIAL_SINGLETON(Qnil, rb_cNilClass); + SPECIAL_SINGLETON(Qfalse, rb_cFalseClass); + SPECIAL_SINGLETON(Qtrue, rb_cTrueClass); + rb_bug("unknown immediate %ld", obj); + } + + if (FL_TEST(RBASIC(obj)->klass, FL_SINGLETON) && + rb_iv_get(RBASIC(obj)->klass, "__attached__") == obj) { + klass = RBASIC(obj)->klass; + } + else { + klass = rb_make_metaclass(obj, RBASIC(obj)->klass); + } + + if (BUILTIN_TYPE(obj) == T_CLASS) { + if (rb_iv_get(RBASIC(klass)->klass, "__attached__") != klass) + make_metametaclass(klass); + } + if (OBJ_TAINTED(obj)) { + OBJ_TAINT(klass); + } + else { + FL_UNSET(klass, FL_TAINT); + } + if (OBJ_UNTRUSTED(obj)) { + OBJ_UNTRUST(klass); + } + else { + FL_UNSET(klass, FL_UNTRUSTED); + } + if (OBJ_FROZEN(obj)) OBJ_FREEZE(klass); + + return klass; +} + +void +rb_define_singleton_method(VALUE obj, const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_define_method(rb_singleton_class(obj), name, func, argc); +} + +void +rb_define_module_function(VALUE module, const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_define_private_method(module, name, func, argc); + rb_define_singleton_method(module, name, func, argc); +} + +void +rb_define_global_function(const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_define_module_function(rb_mKernel, name, func, argc); +} + +void +rb_define_alias(VALUE klass, const char *name1, const char *name2) +{ + rb_alias(klass, rb_intern(name1), rb_intern(name2)); +} + +void +rb_define_attr(VALUE klass, const char *name, int read, int write) +{ + rb_attr(klass, rb_intern(name), read, write, Qfalse); +} + +#include + +int +rb_scan_args(int argc, const VALUE *argv, const char *fmt, ...) +{ + int n, i = 0; + const char *p = fmt; + VALUE *var; + va_list vargs; + + va_start(vargs, fmt); + + if (*p == '*') goto rest_arg; + + if (ISDIGIT(*p)) { + n = *p - '0'; + if (n > argc) + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, n); + for (i=0; i i) { + if (var) *var = argv[i]; + } + else { + if (var) *var = Qnil; + } + } + p++; + } + + if(*p == '*') { + rest_arg: + var = va_arg(vargs, VALUE*); + if (argc > i) { + if (var) *var = rb_ary_new4(argc-i, argv+i); + i = argc; + } + else { + if (var) *var = rb_ary_new(); + } + p++; + } + + if (*p == '&') { + var = va_arg(vargs, VALUE*); + if (rb_block_given_p()) { + *var = rb_block_proc(); + } + else { + *var = Qnil; + } + p++; + } + va_end(vargs); + + if (*p != '\0') { + goto error; + } + + if (argc > i) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, i); + } + + return argc; + + error: + rb_fatal("bad scan arg format: %s", fmt); + return 0; +} diff --git a/common.mk b/common.mk new file mode 100644 index 0000000..5f3309c --- /dev/null +++ b/common.mk @@ -0,0 +1,760 @@ +bin: $(PROGRAM) $(WPROGRAM) +lib: $(LIBRUBY) +dll: $(LIBRUBY_SO) + +.SUFFIXES: .inc .h .c .y + +RUBYLIB = - +RUBYOPT = - + +SPEC_GIT_BASE = git://github.com/rubyspec +MSPEC_GIT_URL = $(SPEC_GIT_BASE)/mspec.git +RUBYSPEC_GIT_URL = $(SPEC_GIT_BASE)/rubyspec.git + +STATIC_RUBY = static-ruby + +EXTCONF = extconf.rb +RBCONFIG = ./.rbconfig.time +LIBRUBY_EXTS = ./.libruby-with-ext.time +RDOCOUT = $(EXTOUT)/rdoc +ID_H_TARGET = -id.h- + +DMYEXT = dmyext.$(OBJEXT) +NORMALMAINOBJ = main.$(OBJEXT) +MAINOBJ = $(NORMALMAINOBJ) +EXTOBJS = +DLDOBJS = $(DMYEXT) +MINIOBJS = $(ARCHMINIOBJS) dmyencoding.$(OBJEXT) miniprelude.$(OBJEXT) +ENC_MK = enc.mk + +COMMONOBJS = array.$(OBJEXT) \ + bignum.$(OBJEXT) \ + class.$(OBJEXT) \ + compar.$(OBJEXT) \ + complex.$(OBJEXT) \ + dir.$(OBJEXT) \ + enum.$(OBJEXT) \ + enumerator.$(OBJEXT) \ + error.$(OBJEXT) \ + eval.$(OBJEXT) \ + load.$(OBJEXT) \ + proc.$(OBJEXT) \ + file.$(OBJEXT) \ + gc.$(OBJEXT) \ + hash.$(OBJEXT) \ + inits.$(OBJEXT) \ + io.$(OBJEXT) \ + marshal.$(OBJEXT) \ + math.$(OBJEXT) \ + numeric.$(OBJEXT) \ + object.$(OBJEXT) \ + pack.$(OBJEXT) \ + parse.$(OBJEXT) \ + process.$(OBJEXT) \ + random.$(OBJEXT) \ + range.$(OBJEXT) \ + rational.$(OBJEXT) \ + re.$(OBJEXT) \ + regcomp.$(OBJEXT) \ + regenc.$(OBJEXT) \ + regerror.$(OBJEXT) \ + regexec.$(OBJEXT) \ + regparse.$(OBJEXT) \ + regsyntax.$(OBJEXT) \ + ruby.$(OBJEXT) \ + safe.$(OBJEXT) \ + signal.$(OBJEXT) \ + sprintf.$(OBJEXT) \ + st.$(OBJEXT) \ + strftime.$(OBJEXT) \ + string.$(OBJEXT) \ + struct.$(OBJEXT) \ + time.$(OBJEXT) \ + transcode.$(OBJEXT) \ + util.$(OBJEXT) \ + variable.$(OBJEXT) \ + version.$(OBJEXT) \ + compile.$(OBJEXT) \ + debug.$(OBJEXT) \ + iseq.$(OBJEXT) \ + vm.$(OBJEXT) \ + vm_dump.$(OBJEXT) \ + thread.$(OBJEXT) \ + cont.$(OBJEXT) \ + $(BUILTIN_ENCOBJS) \ + $(BUILTIN_TRANSOBJS) \ + $(MISSING) + +EXPORTOBJS = dln.$(OBJEXT) \ + encoding.$(OBJEXT) \ + $(COMMONOBJS) + +OBJS = $(EXPORTOBJS) prelude.$(OBJEXT) + +GOLFOBJS = goruby.$(OBJEXT) golf_prelude.$(OBJEXT) + +PRELUDE_SCRIPTS = $(srcdir)/prelude.rb $(srcdir)/enc/prelude.rb $(srcdir)/gem_prelude.rb +PRELUDES = prelude.c miniprelude.c +GOLFPRELUDES = golf_prelude.c + +SCRIPT_ARGS = --dest-dir="$(DESTDIR)" \ + --extout="$(EXTOUT)" \ + --mflags="$(MFLAGS)" \ + --make-flags="$(MAKEFLAGS)" +EXTMK_ARGS = $(SCRIPT_ARGS) --extension $(EXTS) --extstatic $(EXTSTATIC) \ + --make-flags="MINIRUBY='$(MINIRUBY)'" -- +INSTRUBY = $(MINIRUBY) $(srcdir)/instruby.rb +INSTRUBY_ARGS = $(SCRIPT_ARGS) \ + --data-mode=$(INSTALL_DATA_MODE) \ + --prog-mode=$(INSTALL_PROG_MODE) \ + --installed-list $(INSTALLED_LIST) +INSTALL_PROG_MODE = 0755 +INSTALL_DATA_MODE = 0644 + +PRE_LIBRUBY_UPDATE = $(MINIRUBY) -e 'ARGV[1] or File.unlink(ARGV[0]) rescue nil' -- \ + $(LIBRUBY_EXTS) $(LIBRUBY_SO_UPDATE) + +TESTSDIR = $(srcdir)/test +TESTWORKDIR = testwork + +TESTRUN_SCRIPT = $(srcdir)/test.rb + +BOOTSTRAPRUBY = $(BASERUBY) + +COMPILE_PRELUDE = $(MINIRUBY) -I$(srcdir) -rrbconfig $(srcdir)/tool/compile_prelude.rb + +all: encs exts main + +main: exts + @$(RUNCMD) $(MKMAIN_CMD) MAKE=$(MAKE) + +exts: $(MKMAIN_CMD) + +$(MKMAIN_CMD): $(MKFILES) incs $(PREP) $(RBCONFIG) $(LIBRUBY) + @$(MINIRUBY) $(srcdir)/ext/extmk.rb --make="$(MAKE)" --command-output=$@ $(EXTMK_ARGS) + +prog: $(PROGRAM) $(WPROGRAM) + +loadpath: $(PREP) + $(MINIRUBY) -e 'p $$:' + +$(PREP): $(MKFILES) + +miniruby$(EXEEXT): config.status $(NORMALMAINOBJ) $(MINIOBJS) $(COMMONOBJS) $(DMYEXT) $(ARCHFILE) + +GORUBY = go$(RUBY_INSTALL_NAME) +golf: $(LIBRUBY) $(GOLFOBJS) + $(MAKE) $(MFLAGS) MAINOBJ="$(GOLFOBJS)" PROGRAM=$(GORUBY)$(EXEEXT) program + +program: $(PROGRAM) + +$(PROGRAM): $(LIBRUBY) $(MAINOBJ) $(OBJS) $(EXTOBJS) $(SETUP) $(PREP) + +$(LIBRUBY_A): $(OBJS) $(DMYEXT) $(ARCHFILE) + +$(LIBRUBY_SO): $(OBJS) $(DLDOBJS) $(LIBRUBY_A) $(PREP) $(LIBRUBY_SO_UPDATE) $(BUILTIN_ENCOBJS) + +$(LIBRUBY_EXTS): + @exit > $@ + +$(STATIC_RUBY)$(EXEEXT): $(MAINOBJ) $(DLDOBJS) $(EXTOBJS) $(LIBRUBY_A) + @$(RM) $@ + $(PURIFY) $(CC) $(MAINOBJ) $(DLDOBJS) $(EXTOBJS) $(LIBRUBY_A) $(MAINLIBS) $(EXTLIBS) $(LIBS) $(OUTFLAG)$@ $(LDFLAGS) $(XLDFLAGS) + +ruby.imp: $(EXPORTOBJS) + @$(NM) -Pgp $(EXPORTOBJS) | \ + awk 'BEGIN{print "#!"}; $$2~/^[BDT]$$/&&$$1!~/^(Init_|\.)/{print $$1}' | \ + sort -u -o $@ + +install: install-nodoc $(RDOCTARGET) +install-all: install-nodoc install-doc + +install-nodoc: pre-install-nodoc do-install-nodoc post-install-nodoc +pre-install-nodoc:: pre-install-local pre-install-ext +do-install-nodoc: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --mantype="$(MANTYPE)" +post-install-nodoc:: post-install-local post-install-ext + +install-local: pre-install-local do-install-local post-install-local +pre-install-local:: pre-install-bin pre-install-lib pre-install-man +do-install-local: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=local --mantype="$(MANTYPE)" +post-install-local:: post-install-bin post-install-lib post-install-man + +install-ext: pre-install-ext do-install-ext post-install-ext +pre-install-ext:: pre-install-ext-arch pre-install-ext-comm +do-install-ext: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=ext +post-install-ext:: post-install-ext-arch post-install-ext-comm + +install-arch: pre-install-arch do-install-arch post-install-arch +pre-install-arch:: pre-install-bin pre-install-ext-arch +do-install-arch: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=bin --install=ext-arch +post-install-arch:: post-install-bin post-install-ext-arch + +install-comm: pre-install-comm do-install-comm post-install-comm +pre-install-comm:: pre-install-lib pre-install-ext-comm pre-install-man +do-install-comm: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=lib --install=ext-comm --install=man +post-install-comm:: post-install-lib post-install-ext-comm post-install-man + +install-bin: pre-install-bin do-install-bin post-install-bin +pre-install-bin:: install-prereq +do-install-bin: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=bin +post-install-bin:: + @$(NULLCMD) + +install-lib: pre-install-lib do-install-lib post-install-lib +pre-install-lib:: install-prereq +do-install-lib: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=lib +post-install-lib:: + @$(NULLCMD) + +install-ext-comm: pre-install-ext-comm do-install-ext-comm post-install-ext-comm +pre-install-ext-comm:: install-prereq +do-install-ext-comm: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=ext-comm +post-install-ext-comm:: + @$(NULLCMD) + +install-ext-arch: pre-install-ext-arch do-install-ext-arch post-install-ext-arch +pre-install-ext-arch:: install-prereq +do-install-ext-arch: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=ext-arch +post-install-ext-arch:: + @$(NULLCMD) + +install-man: pre-install-man do-install-man post-install-man +pre-install-man:: install-prereq +do-install-man: $(PREP) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=man --mantype="$(MANTYPE)" +post-install-man:: + @$(NULLCMD) + +what-where: no-install +no-install: no-install-nodoc no-install-doc +what-where-all: no-install-all +no-install-all: no-install-nodoc + +what-where-nodoc: no-install-nodoc +no-install-nodoc: pre-no-install-nodoc dont-install-nodoc post-no-install-nodoc +pre-no-install-nodoc:: pre-no-install-local pre-no-install-ext +dont-install-nodoc: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --mantype="$(MANTYPE)" +post-no-install-nodoc:: post-no-install-local post-no-install-ext + +what-where-local: no-install-local +no-install-local: pre-no-install-local dont-install-local post-no-install-local +pre-no-install-local:: pre-no-install-bin pre-no-install-lib pre-no-install-man +dont-install-local: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=local --mantype="$(MANTYPE)" +post-no-install-local:: post-no-install-bin post-no-install-lib post-no-install-man + +what-where-ext: no-install-ext +no-install-ext: pre-no-install-ext dont-install-ext post-no-install-ext +pre-no-install-ext:: pre-no-install-ext-arch pre-no-install-ext-comm +dont-install-ext: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=ext +post-no-install-ext:: post-no-install-ext-arch post-no-install-ext-comm + +what-where-arch: no-install-arch +no-install-arch: pre-no-install-arch dont-install-arch post-no-install-arch +pre-no-install-arch:: pre-no-install-bin pre-no-install-ext-arch +dont-install-arch: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=bin --install=ext-arch +post-no-install-arch:: post-no-install-lib post-no-install-man post-no-install-ext-arch + +what-where-comm: no-install-comm +no-install-comm: pre-no-install-comm dont-install-comm post-no-install-comm +pre-no-install-comm:: pre-no-install-lib pre-no-install-ext-comm pre-no-install-man +dont-install-comm: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=lib --install=ext-comm --install=man +post-no-install-comm:: post-no-install-lib post-no-install-ext-comm post-no-install-man + +what-where-bin: no-install-bin +no-install-bin: pre-no-install-bin dont-install-bin post-no-install-bin +pre-no-install-bin:: install-prereq +dont-install-bin: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=bin +post-no-install-bin:: + @$(NULLCMD) + +what-where-lib: no-install-lib +no-install-lib: pre-no-install-lib dont-install-lib post-no-install-lib +pre-no-install-lib:: install-prereq +dont-install-lib: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=lib +post-no-install-lib:: + @$(NULLCMD) + +what-where-ext-comm: no-install-ext-comm +no-install-ext-comm: pre-no-install-ext-comm dont-install-ext-comm post-no-install-ext-comm +pre-no-install-ext-comm:: install-prereq +dont-install-ext-comm: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=ext-comm +post-no-install-ext-comm:: + @$(NULLCMD) + +what-where-ext-arch: no-install-ext-arch +no-install-ext-arch: pre-no-install-ext-arch dont-install-ext-arch post-no-install-ext-arch +pre-no-install-ext-arch:: install-prereq +dont-install-ext-arch: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=ext-arch +post-no-install-ext-arch:: + @$(NULLCMD) + +what-where-man: no-install-man +no-install-man: pre-no-install-man dont-install-man post-no-install-man +pre-no-install-man:: install-prereq +dont-install-man: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=man --mantype="$(MANTYPE)" +post-no-install-man:: + @$(NULLCMD) + +install-doc: rdoc pre-install-doc do-install-doc post-install-doc +pre-install-doc:: install-prereq +do-install-doc: $(PROGRAM) + $(INSTRUBY) --make="$(MAKE)" $(INSTRUBY_ARGS) --install=rdoc --rdoc-output="$(RDOCOUT)" +post-install-doc:: + @$(NULLCMD) + +rdoc: $(PROGRAM) PHONY + @echo Generating RDoc documentation + $(RUNRUBY) "$(srcdir)/bin/rdoc" --all --ri --op "$(RDOCOUT)" "$(srcdir)" + +what-where-doc: no-install-doc +no-install-doc: pre-no-install-doc dont-install-doc post-no-install-doc +pre-no-install-doc:: install-prereq +dont-install-doc:: $(PREP) + $(INSTRUBY) -n --make="$(MAKE)" $(INSTRUBY_ARGS) --install=rdoc --rdoc-output="$(RDOCOUT)" +post-no-install-doc:: + @$(NULLCMD) + +CLEAR_INSTALLED_LIST = clear-installed-list + +install-prereq: $(CLEAR_INSTALLED_LIST) + +clear-installed-list: + @exit > $(INSTALLED_LIST) + +clean: clean-ext clean-local clean-enc clean-golf clean-rdoc clean-extout +clean-local:: + @$(RM) $(OBJS) $(MINIOBJS) $(MAINOBJ) $(LIBRUBY_A) $(LIBRUBY_SO) $(LIBRUBY) $(LIBRUBY_ALIASES) + @$(RM) $(PROGRAM) $(WPROGRAM) miniruby$(EXEEXT) dmyext.$(OBJEXT) $(ARCHFILE) .*.time + @$(RM) *.inc y.tab.c y.output encdb.h transdb.h +clean-ext:: +clean-enc: + @-$(MAKE) -f $(ENC_MK) $(MFLAGS) clean +clean-golf: + @$(RM) $(GORUBY)$(EXEEXT) $(GOLFOBJS) +clean-rdoc: +clean-extout: + @-$(RMDIRS) $(EXTOUT)/$(arch) + +distclean: distclean-ext distclean-local distclean-enc distclean-golf distclean-extout +distclean-local:: clean-local + @$(RM) $(MKFILES) rbconfig.rb yasmdata.rb encdb.h + @$(RM) config.cache config.log config.status config.status.lineno $(PRELUDES) + @$(RM) *~ *.bak *.stackdump core *.core gmon.out $(PREP) +distclean-ext:: +distclean-enc: clean-enc + @-$(MAKE) -f $(ENC_MK) $(MFLAGS) distclean +distclean-golf: clean-golf + @$(RM) $(GOLFPRELUDES) +distclean-rdoc: +distclean-extout: clean-extout + @-$(RMDIRS) $(EXTOUT) + +realclean:: realclean-ext realclean-local realclean-enc realclean-golf realclean-extout +realclean-local:: distclean-local + @$(RM) parse.c parse.h lex.c newline.c revision.h +realclean-ext:: +realclean-enc:: distclean-enc +realclean-golf: distclean-golf +realclean-extout: distclean-extout + +check: test test-all + +btest: miniruby$(EXEEXT) PHONY + $(BOOTSTRAPRUBY) "$(srcdir)/bootstraptest/runner.rb" --ruby="$(MINIRUBY)" $(OPTS) + +btest-ruby: miniruby$(EXEEXT) $(RBCONFIG) $(PROGRAM) PHONY + @$(RUNRUBY) "$(srcdir)/bootstraptest/runner.rb" --ruby="$(PROGRAM) -I$(srcdir)/lib" -q $(OPTS) + +test-sample: miniruby$(EXEEXT) $(RBCONFIG) $(PROGRAM) PHONY + @$(RUNRUBY) $(srcdir)/rubytest.rb + +test-knownbug: miniruby$(EXEEXT) $(PROGRAM) $(RBCONFIG) PHONY + $(MINIRUBY) "$(srcdir)/bootstraptest/runner.rb" --ruby="$(PROGRAM)" $(OPTS) $(srcdir)/KNOWNBUGS.rb + +test: test-sample btest-ruby test-knownbug + +test-all: + $(RUNRUBY) "$(srcdir)/test/runner.rb" $(TESTS) + +extconf: $(PREP) + $(MAKEDIRS) "$(EXTCONFDIR)" + $(RUNRUBY) -C "$(EXTCONFDIR)" $(EXTCONF) $(EXTCONFARGS) + +$(RBCONFIG): $(srcdir)/mkconfig.rb config.status $(PREP) + @$(MINIRUBY) $(srcdir)/mkconfig.rb -timestamp=$@ \ + -install_name=$(RUBY_INSTALL_NAME) \ + -so_name=$(RUBY_SO_NAME) rbconfig.rb + +encs: enc trans +encs enc trans: $(ENC_MK) $(LIBRUBY) $(PREP) + $(MAKE) -f $(ENC_MK) RUBY="$(MINIRUBY)" MINIRUBY="$(MINIRUBY)" $(MFLAGS) $@ + +enc: {$(VPATH)}encdb.h +trans: {$(VPATH)}transdb.h + +$(ENC_MK): $(srcdir)/enc/make_encmake.rb $(srcdir)/enc/Makefile.in $(srcdir)/enc/depend \ + $(srcdir)/lib/mkmf.rb $(RBCONFIG) + $(MINIRUBY) $(srcdir)/enc/make_encmake.rb --builtin-encs="$(BUILTIN_ENCOBJS)" --builtin-transes="$(BUILTIN_TRANSOBJS)" $@ $(ENCS) + +.PRECIOUS: $(MKFILES) + +.PHONY: test install install-nodoc install-doc dist + +PHONY: + +{$(VPATH)}parse.c: {$(VPATH)}parse.y $(srcdir)/tool/ytab.sed +parse.h {$(VPATH)}parse.h: {$(VPATH)}parse.c + +{$(srcdir)}.y.c: + $(YACC) -d $(YFLAGS) -o y.tab.c $(SRC_FILE) + sed -f $(srcdir)/tool/ytab.sed -e "/^#/s!y\.tab\.c!$@!" y.tab.c > $@.new + @$(MV) $@.new $@ + sed -e "/^#line.*y\.tab\.h/d;/^#line.*parse\.y/d" y.tab.h > $(@:.c=.h).new + @$(IFCHANGE) $(@:.c=.h) $(@:.c=.h).new + @$(RM) y.tab.c y.tab.h + +acosh.$(OBJEXT): {$(VPATH)}acosh.c +alloca.$(OBJEXT): {$(VPATH)}alloca.c {$(VPATH)}config.h +crypt.$(OBJEXT): {$(VPATH)}crypt.c +dup2.$(OBJEXT): {$(VPATH)}dup2.c +erf.$(OBJEXT): {$(VPATH)}erf.c +finite.$(OBJEXT): {$(VPATH)}finite.c +flock.$(OBJEXT): {$(VPATH)}flock.c +memcmp.$(OBJEXT): {$(VPATH)}memcmp.c +memmove.$(OBJEXT): {$(VPATH)}memmove.c +mkdir.$(OBJEXT): {$(VPATH)}mkdir.c +strchr.$(OBJEXT): {$(VPATH)}strchr.c +strdup.$(OBJEXT): {$(VPATH)}strdup.c +strerror.$(OBJEXT): {$(VPATH)}strerror.c +strstr.$(OBJEXT): {$(VPATH)}strstr.c +strtod.$(OBJEXT): {$(VPATH)}strtod.c +strtol.$(OBJEXT): {$(VPATH)}strtol.c +nt.$(OBJEXT): {$(VPATH)}nt.c +os2.$(OBJEXT): {$(VPATH)}os2.c +dl_os2.$(OBJEXT): {$(VPATH)}dl_os2.c +ia64.$(OBJEXT): {$(VPATH)}ia64.s + $(CC) $(CFLAGS) -c $< + +# when I use -I., there is confliction at "OpenFile" +# so, set . into environment varible "include" +win32.$(OBJEXT): {$(VPATH)}win32.c $(RUBY_H_INCLUDES) + +### + +RUBY_H_INCLUDES = {$(VPATH)}ruby.h {$(VPATH)}config.h {$(VPATH)}defines.h \ + {$(VPATH)}intern.h {$(VPATH)}missing.h {$(VPATH)}st.h +ENCODING_H_INCLUDES= {$(VPATH)}encoding.h {$(VPATH)}oniguruma.h +ID_H_INCLUDES = {$(VPATH)}id.h +VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}vm_opts.h \ + {$(VPATH)}thread_$(THREAD_MODEL).h \ + {$(VPATH)}node.h $(ID_H_INCLUDES) + +array.$(OBJEXT): {$(VPATH)}array.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h +bignum.$(OBJEXT): {$(VPATH)}bignum.c $(RUBY_H_INCLUDES) +class.$(OBJEXT): {$(VPATH)}class.c $(RUBY_H_INCLUDES) {$(VPATH)}node.h +compar.$(OBJEXT): {$(VPATH)}compar.c $(RUBY_H_INCLUDES) +complex.$(OBJEXT): {$(VPATH)}complex.c $(RUBY_H_INCLUDES) +dir.$(OBJEXT): {$(VPATH)}dir.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h \ + $(ENCODING_H_INCLUDES) +dln.$(OBJEXT): {$(VPATH)}dln.c {$(VPATH)}dln.h $(RUBY_H_INCLUDES) +dmydln.$(OBJEXT): {$(VPATH)}dmydln.c dln.$(OBJEXT) +dmyext.$(OBJEXT): {$(VPATH)}dmyext.c +dmyencoding.$(OBJEXT): {$(VPATH)}dmyencoding.c $(RUBY_H_INCLUDES) \ + {$(VPATH)}regenc.h {$(VPATH)}util.h $(ENCODING_H_INCLUDES) \ + {$(VPATH)}encoding.c +encoding.$(OBJEXT): {$(VPATH)}encoding.c $(RUBY_H_INCLUDES) \ + $(ENCODING_H_INCLUDES) {$(VPATH)}regenc.h {$(VPATH)}util.h +enum.$(OBJEXT): {$(VPATH)}enum.c $(RUBY_H_INCLUDES) {$(VPATH)}node.h \ + {$(VPATH)}util.h +enumerator.$(OBJEXT): {$(VPATH)}enumerator.c $(RUBY_H_INCLUDES) +error.$(OBJEXT): {$(VPATH)}error.c {$(VPATH)}known_errors.inc $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +eval.$(OBJEXT): {$(VPATH)}eval.c {$(VPATH)}eval_intern.h \ + $(RUBY_H_INCLUDES) $(VM_CORE_H_INCLUDES) {$(VPATH)}eval_error.c \ + {$(VPATH)}eval_jump.c {$(VPATH)}debug.h {$(VPATH)}gc.h \ + {$(VPATH)}iseq.h +load.$(OBJEXT): {$(VPATH)}load.c {$(VPATH)}eval_intern.h \ + {$(VPATH)}util.h $(RUBY_H_INCLUDES) $(VM_CORE_H_INCLUDES) \ + {$(VPATH)}dln.h {$(VPATH)}debug.h +file.$(OBJEXT): {$(VPATH)}file.c $(RUBY_H_INCLUDES) {$(VPATH)}io.h \ + $(ENCODING_H_INCLUDES) {$(VPATH)}util.h {$(VPATH)}dln.h +gc.$(OBJEXT): {$(VPATH)}gc.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \ + {$(VPATH)}regex.h $(ENCODING_H_INCLUDES) $(VM_CORE_H_INCLUDES) \ + {$(VPATH)}gc.h {$(VPATH)}io.h {$(VPATH)}eval_intern.h {$(VPATH)}util.h \ + {$(VPATH)}debug.h +hash.$(OBJEXT): {$(VPATH)}hash.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h +inits.$(OBJEXT): {$(VPATH)}inits.c $(RUBY_H_INCLUDES) +io.$(OBJEXT): {$(VPATH)}io.c $(RUBY_H_INCLUDES) {$(VPATH)}io.h \ + {$(VPATH)}util.h $(ENCODING_H_INCLUDES) +main.$(OBJEXT): {$(VPATH)}main.c $(RUBY_H_INCLUDES) {$(VPATH)}debug.h \ + {$(VPATH)}node.h +marshal.$(OBJEXT): {$(VPATH)}marshal.c $(RUBY_H_INCLUDES) {$(VPATH)}io.h \ + $(ENCODING_H_INCLUDES) {$(VPATH)}util.h +math.$(OBJEXT): {$(VPATH)}math.c $(RUBY_H_INCLUDES) +numeric.$(OBJEXT): {$(VPATH)}numeric.c $(RUBY_H_INCLUDES) \ + $(ENCODING_H_INCLUDES) +object.$(OBJEXT): {$(VPATH)}object.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h +pack.$(OBJEXT): {$(VPATH)}pack.c $(RUBY_H_INCLUDES) +parse.$(OBJEXT): {$(VPATH)}parse.c $(RUBY_H_INCLUDES) \ + {$(VPATH)}node.h $(ENCODING_H_INCLUDES) $(ID_H_INCLUDES) \ + {$(VPATH)}regenc.h {$(VPATH)}regex.h {$(VPATH)}util.h {$(VPATH)}lex.c \ + {$(VPATH)}defs/keywords {$(VPATH)}id.c +proc.$(OBJEXT): {$(VPATH)}proc.c {$(VPATH)}eval_intern.h \ + $(RUBY_H_INCLUDES) {$(VPATH)}gc.h $(VM_CORE_H_INCLUDES) \ + {$(VPATH)}debug.h +process.$(OBJEXT): {$(VPATH)}process.c $(RUBY_H_INCLUDES) \ + {$(VPATH)}util.h {$(VPATH)}io.h $(ENCODING_H_INCLUDES) {$(VPATH)}dln.h \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +random.$(OBJEXT): {$(VPATH)}random.c $(RUBY_H_INCLUDES) +range.$(OBJEXT): {$(VPATH)}range.c $(RUBY_H_INCLUDES) \ + $(ENCODING_H_INCLUDES) +rational.$(OBJEXT): {$(VPATH)}rational.c $(RUBY_H_INCLUDES) +re.$(OBJEXT): {$(VPATH)}re.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \ + {$(VPATH)}regex.h $(ENCODING_H_INCLUDES) {$(VPATH)}util.h \ + {$(VPATH)}regint.h {$(VPATH)}regenc.h +regcomp.$(OBJEXT): {$(VPATH)}regcomp.c {$(VPATH)}regparse.h \ + {$(VPATH)}regint.h {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h \ + $(RUBY_H_INCLUDES) +regenc.$(OBJEXT): {$(VPATH)}regenc.c {$(VPATH)}regint.h \ + {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h $(RUBY_H_INCLUDES) +regerror.$(OBJEXT): {$(VPATH)}regerror.c {$(VPATH)}regint.h \ + {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h $(RUBY_H_INCLUDES) +regexec.$(OBJEXT): {$(VPATH)}regexec.c {$(VPATH)}regint.h \ + {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h $(RUBY_H_INCLUDES) +regparse.$(OBJEXT): {$(VPATH)}regparse.c {$(VPATH)}regparse.h \ + {$(VPATH)}regint.h {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h \ + $(RUBY_H_INCLUDES) +regsyntax.$(OBJEXT): {$(VPATH)}regsyntax.c {$(VPATH)}regint.h \ + {$(VPATH)}regenc.h {$(VPATH)}oniguruma.h $(RUBY_H_INCLUDES) +ruby.$(OBJEXT): {$(VPATH)}ruby.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h \ + $(ENCODING_H_INCLUDES) {$(VPATH)}eval_intern.h $(VM_CORE_H_INCLUDES) \ + {$(VPATH)}dln.h {$(VPATH)}debug.h +safe.$(OBJEXT): {$(VPATH)}safe.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +signal.$(OBJEXT): {$(VPATH)}signal.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +sprintf.$(OBJEXT): {$(VPATH)}sprintf.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \ + {$(VPATH)}regex.h {$(VPATH)}vsnprintf.c $(ENCODING_H_INCLUDES) +st.$(OBJEXT): {$(VPATH)}st.c {$(VPATH)}config.h {$(VPATH)}defines.h \ + {$(VPATH)}st.h {$(VPATH)}ruby.h {$(VPATH)}missing.h {$(VPATH)}intern.h +strftime.$(OBJEXT): {$(VPATH)}strftime.c {$(VPATH)}ruby.h \ + {$(VPATH)}config.h {$(VPATH)}defines.h {$(VPATH)}missing.h \ + {$(VPATH)}intern.h {$(VPATH)}st.h +string.$(OBJEXT): {$(VPATH)}string.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \ + {$(VPATH)}regex.h $(ENCODING_H_INCLUDES) +struct.$(OBJEXT): {$(VPATH)}struct.c $(RUBY_H_INCLUDES) +thread.$(OBJEXT): {$(VPATH)}thread.c {$(VPATH)}eval_intern.h \ + $(RUBY_H_INCLUDES) {$(VPATH)}gc.h $(VM_CORE_H_INCLUDES) \ + {$(VPATH)}debug.h {$(VPATH)}thread_$(THREAD_MODEL).c +transcode.$(OBJEXT): {$(VPATH)}transcode.c $(RUBY_H_INCLUDES) \ + $(ENCODING_H_INCLUDES) {$(VPATH)}transcode_data.h +cont.$(OBJEXT): {$(VPATH)}cont.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}gc.h {$(VPATH)}eval_intern.h \ + {$(VPATH)}debug.h +time.$(OBJEXT): {$(VPATH)}time.c $(RUBY_H_INCLUDES) \ + $(ENCODING_H_INCLUDES) +util.$(OBJEXT): {$(VPATH)}util.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h +variable.$(OBJEXT): {$(VPATH)}variable.c $(RUBY_H_INCLUDES) \ + {$(VPATH)}node.h {$(VPATH)}util.h {$(VPATH)}encoding.h +version.$(OBJEXT): {$(VPATH)}version.c $(RUBY_H_INCLUDES) \ + {$(VPATH)}version.h $(srcdir)/revision.h + +compile.$(OBJEXT): {$(VPATH)}compile.c {$(VPATH)}iseq.h \ + $(RUBY_H_INCLUDES) $(VM_CORE_H_INCLUDES) {$(VPATH)}insns.inc \ + {$(VPATH)}insns_info.inc {$(VPATH)}optinsn.inc {$(VPATH)}debug.h +iseq.$(OBJEXT): {$(VPATH)}iseq.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \ + $(RUBY_H_INCLUDES) $(VM_CORE_H_INCLUDES) {$(VPATH)}insns.inc \ + {$(VPATH)}insns_info.inc {$(VPATH)}node_name.inc {$(VPATH)}debug.h +vm.$(OBJEXT): {$(VPATH)}vm.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \ + {$(VPATH)}eval_intern.h $(RUBY_H_INCLUDES) $(ENCODING_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}vm_method.c {$(VPATH)}vm_eval.c \ + {$(VPATH)}vm_insnhelper.c {$(VPATH)}vm_insnhelper.h {$(VPATH)}vm_exec.c \ + {$(VPATH)}vm_exec.h {$(VPATH)}insns.def {$(VPATH)}vmtc.inc \ + {$(VPATH)}vm.inc {$(VPATH)}insns.inc {$(VPATH)}debug.h +vm_dump.$(OBJEXT): {$(VPATH)}vm_dump.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +debug.$(OBJEXT): {$(VPATH)}debug.c $(RUBY_H_INCLUDES) \ + $(ENCODING_H_INCLUDES) $(VM_CORE_H_INCLUDES) {$(VPATH)}eval_intern.h \ + {$(VPATH)}util.h {$(VPATH)}debug.h +id.$(OBJEXT): {$(VPATH)}id.c $(RUBY_H_INCLUDES) $(ID_H_INCLUDES) +miniprelude.$(OBJEXT): {$(VPATH)}miniprelude.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +prelude.$(OBJEXT): {$(VPATH)}prelude.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +golf_prelude.$(OBJEXT): {$(VPATH)}golf_prelude.c $(RUBY_H_INCLUDES) \ + $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h +goruby.$(OBJEXT): {$(VPATH)}goruby.c {$(VPATH)}main.c $(RUBY_H_INCLUDES) \ + {$(VPATH)}debug.h {$(VPATH)}node.h + +ascii.$(OBJEXT): {$(VPATH)}ascii.c {$(VPATH)}regenc.h {$(VPATH)}config.h \ + {$(VPATH)}defines.h {$(VPATH)}oniguruma.h +us_ascii.$(OBJEXT): {$(VPATH)}us_ascii.c {$(VPATH)}regenc.h \ + {$(VPATH)}config.h {$(VPATH)}defines.h {$(VPATH)}oniguruma.h +unicode.$(OBJEXT): {$(VPATH)}unicode.c {$(VPATH)}regint.h \ + {$(VPATH)}config.h {$(VPATH)}defines.h {$(VPATH)}regenc.h \ + {$(VPATH)}oniguruma.h {$(VPATH)}st.h {$(VPATH)}ruby.h \ + {$(VPATH)}missing.h {$(VPATH)}intern.h +utf_8.$(OBJEXT): {$(VPATH)}utf_8.c {$(VPATH)}regenc.h {$(VPATH)}config.h \ + {$(VPATH)}defines.h {$(VPATH)}oniguruma.h + +newline.c: $(srcdir)/enc/trans/newline.trans $(srcdir)/tool/transcode-tblgen.rb +newline.$(OBJEXT): {$(VPATH)}newline.c {$(VPATH)}defines.h \ + {$(VPATH)}intern.h {$(VPATH)}missing.h {$(VPATH)}st.h \ + {$(VPATH)}transcode_data.h {$(VPATH)}ruby.h {$(VPATH)}config.h + +INSNS2VMOPT = --srcdir="$(srcdir)" + +$(INSNS): $(srcdir)/insns.def {$(VPATH)}vm_opts.h $(srcdir)/defs/opt_operand.def $(srcdir)/defs/opt_insn_unif.def + +minsns.inc: $(srcdir)/template/minsns.inc.tmpl + +opt_sc.inc: $(srcdir)/template/opt_sc.inc.tmpl + +optinsn.inc: $(srcdir)/template/optinsn.inc.tmpl + +optunifs.inc: $(srcdir)/template/optunifs.inc.tmpl + +insns.inc: $(srcdir)/template/insns.inc.tmpl + +insns_info.inc: $(srcdir)/template/insns_info.inc.tmpl + +vmtc.inc: $(srcdir)/template/vmtc.inc.tmpl + +vm.inc: $(srcdir)/template/vm.inc.tmpl + +srcs: {$(VPATH)}parse.c {$(VPATH)}lex.c {$(VPATH)}newline.c $(srcdir)/ext/ripper/ripper.c srcs-enc + +srcs-enc: $(ENC_MK) + $(MAKE) -f $(ENC_MK) RUBY="$(MINIRUBY)" MINIRUBY="$(MINIRUBY)" $(MFLAGS) srcs + +incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}encdb.h {$(VPATH)}transdb.h {$(VPATH)}known_errors.inc $(srcdir)/revision.h + +insns: $(INSNS) + +node_name.inc: {$(VPATH)}node.h + +encdb.h: $(PREP) $(srcdir)/tool/generic_erb.rb $(srcdir)/template/encdb.h.tmpl + $(MINIRUBY) $(srcdir)/tool/generic_erb.rb -c -o $@ $(srcdir)/template/encdb.h.tmpl $(srcdir)/enc enc + +transdb.h: $(PREP) srcs-enc $(srcdir)/tool/generic_erb.rb $(srcdir)/template/transdb.h.tmpl + $(MINIRUBY) $(srcdir)/tool/generic_erb.rb -c -o $@ $(srcdir)/template/transdb.h.tmpl $(srcdir)/enc/trans enc/trans + +# {$(VPATH)}id.h: {$(VPATH)}parse.h $(srcdir)/tool/generic_erb.rb $(srcdir)/template/id.h.tmpl +# $(BASERUBY) $(srcdir)/tool/generic_erb.rb --if-change --output=$@ \ +# $(srcdir)/template/id.h.tmpl --vpath=$(VPATH) parse.h + +known_errors.inc: $(srcdir)/template/known_errors.inc.tmpl $(srcdir)/defs/known_errors.def + +miniprelude.c: $(srcdir)/tool/compile_prelude.rb $(srcdir)/prelude.rb + +prelude.c: $(srcdir)/tool/compile_prelude.rb $(RBCONFIG) $(PRELUDE_SCRIPTS) $(PREP) + $(COMPILE_PRELUDE) $(PRELUDE_SCRIPTS) $@ + +golf_prelude.c: $(srcdir)/tool/compile_prelude.rb $(RBCONFIG) $(srcdir)/prelude.rb $(srcdir)/golf_prelude.rb $(PREP) + $(COMPILE_PRELUDE) $(srcdir)/golf_prelude.rb $@ + +prereq: incs srcs preludes + +preludes: {$(VPATH)}miniprelude.c +preludes: {$(srcdir)}golf_prelude.c + +$(srcdir)/revision.h: $(srcdir)/version.h $(srcdir)/ChangeLog $(srcdir)/tool/file2lastrev.rb $(REVISION_FORCE) + @-$(BASERUBY) $(srcdir)/tool/file2lastrev.rb --revision.h "$(@D)" > "$@.tmp" + @$(IFCHANGE) "$@" "$@.tmp" + +$(srcdir)/ext/ripper/ripper.c: + cd $(srcdir)/ext/ripper && $(exec) $(MAKE) -f depend $(MFLAGS) top_srcdir=../.. srcdir=. + +## + +run: miniruby$(EXEEXT) PHONY + $(MINIRUBY) $(TESTRUN_SCRIPT) $(RUNOPT) + +runruby: $(PROGRAM) PHONY + $(RUNRUBY) $(TESTRUN_SCRIPT) + +parse: miniruby$(EXEEXT) PHONY + $(MINIRUBY) $(srcdir)/tool/parse.rb $(TESTRUN_SCRIPT) + +COMPARE_RUBY = $(BASERUBY) +ITEM = +OPTS = + +benchmark: $(PROGRAM) PHONY + $(BASERUBY) $(srcdir)/benchmark/driver.rb -v \ + --executables="$(COMPARE_RUBY); $(RUNRUBY)" \ + --pattern='bm_' --directory=$(srcdir)/benchmark $(OPTS) + +benchmark-each: $(PROGRAM) PHONY + $(BASERUBY) $(srcdir)/benchmark/driver.rb -v \ + --executables="$(COMPARE_RUBY); $(RUNRUBY)" \ + --pattern=$(ITEM) --directory=$(srcdir)/benchmark $(OPTS) + +tbench: $(PROGRAM) PHONY + $(BASERUBY) $(srcdir)/benchmark/driver.rb -v \ + --executables="$(COMPARE_RUBY); $(RUNRUBY)" \ + --pattern='bmx_' --directory=$(srcdir)/benchmark $(OPTS) + +run.gdb: + echo b ruby_debug_breakpoint > run.gdb + echo '# handle SIGINT nostop' >> run.gdb + echo '# handle SIGPIPE nostop' >> run.gdb + echo '# b rb_longjmp' >> run.gdb + echo source $(srcdir)/breakpoints.gdb >> run.gdb + echo source $(srcdir)/.gdbinit >> run.gdb + echo run >> run.gdb + +gdb: miniruby$(EXEEXT) run.gdb PHONY + gdb -x run.gdb --quiet --args $(MINIRUBY) $(TESTRUN_SCRIPT) + +gdb-ruby: $(PROGRAM) run.gdb PHONY + gdb -x run.gdb --quiet --args $(PROGRAM) $(TESTRUN_SCRIPT) + +dist: + $(BASERUBY) $(srcdir)/tool/make-snapshot tmp $(RELNAME) + +up:: + -@$(MAKE) $(MFLAGS) REVISION_FORCE=PHONY "$(srcdir)/revision.h" + +help: PHONY + @echo " Makefile of Ruby" + @echo "" + @echo "targets:" + @echo " all (default): builds ruby" + @echo " miniruby: builds only miniruby" + @echo " run: runs test.rb by miniruby" + @echo " runruby: runs test.rb by ruby you just built" + @echo " gdb: runs test.rb by miniruby under gdb" + @echo " gdb-ruby: runs test.rb by ruby under gdb" + @echo " all: compile ruby and extensions" + @echo " check: equals make test test-all" + @echo " test: ruby core tests" + @echo " test-all: all ruby tests" + @echo " test-rubyspec run RubySpec test suite" + @echo " update-rubyspec update local copy of RubySpec" + @echo " install: install all ruby distributions" + @echo " install-nodoc: install without rdoc" + @echo " clean: clean built objects" + @echo " golf: for golfers" + @echo + @echo "see DeveloperHowto for more detail: " + @echo " http://redmine.ruby-lang.org/wiki/ruby/DeveloperHowto" diff --git a/compar.c b/compar.c new file mode 100644 index 0000000..f232d4d --- /dev/null +++ b/compar.c @@ -0,0 +1,213 @@ +/********************************************************************** + + compar.c - + + $Author: nobu $ + created at: Thu Aug 26 14:39:48 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" + +VALUE rb_mComparable; + +static ID cmp; + +void +rb_cmperr(VALUE x, VALUE y) +{ + const char *classname; + + if (SPECIAL_CONST_P(y)) { + y = rb_inspect(y); + classname = StringValuePtr(y); + } + else { + classname = rb_obj_classname(y); + } + rb_raise(rb_eArgError, "comparison of %s with %s failed", + rb_obj_classname(x), classname); +} + +static VALUE +cmp_eq(VALUE *a) +{ + VALUE c = rb_funcall(a[0], cmp, 1, a[1]); + + if (NIL_P(c)) return Qfalse; + if (rb_cmpint(c, a[0], a[1]) == 0) return Qtrue; + return Qfalse; +} + +static VALUE +cmp_failed(void) +{ + return Qfalse; +} + +/* + * call-seq: + * obj == other => true or false + * + * Compares two objects based on the receiver's <=> + * method, returning true if it returns 0. Also returns true if + * _obj_ and _other_ are the same object. + */ + +static VALUE +cmp_equal(VALUE x, VALUE y) +{ + VALUE a[2]; + + if (x == y) return Qtrue; + + a[0] = x; a[1] = y; + return rb_rescue(cmp_eq, (VALUE)a, cmp_failed, 0); +} + +/* + * call-seq: + * obj > other => true or false + * + * Compares two objects based on the receiver's <=> + * method, returning true if it returns 1. + */ + +static VALUE +cmp_gt(VALUE x, VALUE y) +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) > 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj >= other => true or false + * + * Compares two objects based on the receiver's <=> + * method, returning true if it returns 0 or 1. + */ + +static VALUE +cmp_ge(VALUE x, VALUE y) +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) >= 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj < other => true or false + * + * Compares two objects based on the receiver's <=> + * method, returning true if it returns -1. + */ + +static VALUE +cmp_lt(VALUE x, VALUE y) +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) < 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj <= other => true or false + * + * Compares two objects based on the receiver's <=> + * method, returning true if it returns -1 or 0. + */ + +static VALUE +cmp_le(VALUE x, VALUE y) +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) <= 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj.between?(min, max) => true or false + * + * Returns false if obj <=> + * min is less than zero or if anObject <=> + * max is greater than zero, true otherwise. + * + * 3.between?(1, 5) #=> true + * 6.between?(1, 5) #=> false + * 'cat'.between?('ant', 'dog') #=> true + * 'gnu'.between?('ant', 'dog') #=> false + * + */ + +static VALUE +cmp_between(VALUE x, VALUE min, VALUE max) +{ + if (RTEST(cmp_lt(x, min))) return Qfalse; + if (RTEST(cmp_gt(x, max))) return Qfalse; + return Qtrue; +} + +/* + * The Comparable mixin is used by classes whose objects + * may be ordered. The class must define the <=> operator, + * which compares the receiver against another object, returning -1, 0, + * or +1 depending on whether the receiver is less than, equal to, or + * greater than the other object. Comparable uses + * <=> to implement the conventional comparison operators + * (<, <=, ==, >=, + * and >) and the method between?. + * + * class SizeMatters + * include Comparable + * attr :str + * def <=>(anOther) + * str.size <=> anOther.str.size + * end + * def initialize(str) + * @str = str + * end + * def inspect + * @str + * end + * end + * + * s1 = SizeMatters.new("Z") + * s2 = SizeMatters.new("YY") + * s3 = SizeMatters.new("XXX") + * s4 = SizeMatters.new("WWWW") + * s5 = SizeMatters.new("VVVVV") + * + * s1 < s2 #=> true + * s4.between?(s1, s3) #=> false + * s4.between?(s3, s5) #=> true + * [ s3, s2, s5, s4, s1 ].sort #=> [Z, YY, XXX, WWWW, VVVVV] + * + */ + +void +Init_Comparable(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + rb_mComparable = rb_define_module("Comparable"); + rb_define_method(rb_mComparable, "==", cmp_equal, 1); + rb_define_method(rb_mComparable, ">", cmp_gt, 1); + rb_define_method(rb_mComparable, ">=", cmp_ge, 1); + rb_define_method(rb_mComparable, "<", cmp_lt, 1); + rb_define_method(rb_mComparable, "<=", cmp_le, 1); + rb_define_method(rb_mComparable, "between?", cmp_between, 2); + + cmp = rb_intern("<=>"); +} diff --git a/compile.c b/compile.c new file mode 100644 index 0000000..1261815 --- /dev/null +++ b/compile.c @@ -0,0 +1,5441 @@ +/********************************************************************** + + compile.c - ruby node tree -> VM instruction sequence + + $Author: yugui $ + created at: 04/01/01 03:42:15 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" + +#define USE_INSN_STACK_INCREASE 1 +#include "vm_core.h" +#include "iseq.h" +#include "insns.inc" +#include "insns_info.inc" + + +typedef struct iseq_link_element { + enum { + ISEQ_ELEMENT_NONE = INT2FIX(0x00), + ISEQ_ELEMENT_LABEL = INT2FIX(0x01), + ISEQ_ELEMENT_INSN = INT2FIX(0x02), + ISEQ_ELEMENT_ADJUST = INT2FIX(0x03) + } type; + struct iseq_link_element *next; + struct iseq_link_element *prev; +} LINK_ELEMENT; + +typedef struct iseq_link_anchor { + LINK_ELEMENT anchor; + LINK_ELEMENT *last; +} LINK_ANCHOR; + +typedef struct iseq_label_data { + LINK_ELEMENT link; + int label_no; + int position; + int sc_state; + int set; + int sp; +} LABEL; + +typedef struct iseq_insn_data { + LINK_ELEMENT link; + enum ruby_vminsn_type insn_id; + int line_no; + int operand_size; + int sc_state; + VALUE *operands; +} INSN; + +typedef struct iseq_adjust_data { + LINK_ELEMENT link; + LABEL *label; + int line_no; +} ADJUST; + +struct ensure_range { + LABEL *begin; + LABEL *end; + struct ensure_range *next; +}; + +struct iseq_compile_data_ensure_node_stack { + NODE *ensure_node; + struct iseq_compile_data_ensure_node_stack *prev; + struct ensure_range *erange; +}; + +/** + * debug function(macro) interface depend on CPDEBUG + * if it is less than 0, runtime option is in effect. + * + * debug level: + * 0: no debug output + * 1: show node type + * 2: show node important parameters + * ... + * 5: show other parameters + * 10: show every AST array + */ + +#ifndef CPDEBUG +#define CPDEBUG 0 +#endif + +#if CPDEBUG >= 0 +#define compile_debug CPDEBUG +#else +#define compile_debug iseq->compile_data->option->debug_level +#endif + +NORETURN(PRINTF_ARGS(void rb_compile_bug(const char*, int, const char*, ...), 3, 4)); + +#if CPDEBUG + +#define compile_debug_print_indent(level) \ + ruby_debug_print_indent(level, compile_debug, gl_node_level * 2) + +#define debugp(header, value) (void) \ + (compile_debug_print_indent(1) && \ + ruby_debug_print_value(1, compile_debug, header, value)) + +#define debugi(header, id) (void) \ + (compile_debug_print_indent(1) && \ + ruby_debug_print_id(1, compile_debug, header, id)) + +#define debugp_param(header, value) (void) \ + (compile_debug_print_indent(1) && \ + ruby_debug_print_value(1, compile_debug, header, value)) + +#define debugp_verbose(header, value) (void) \ + (compile_debug_print_indent(2) && \ + ruby_debug_print_value(2, compile_debug, header, value)) + +#define debugp_verbose_node(header, value) (void) \ + (compile_debug_print_indent(10) && \ + ruby_debug_print_value(10, compile_debug, header, value)) + +#define debug_node_start(node) ((void) \ + (compile_debug_print_indent(1) && \ + (ruby_debug_print_node(1, CPDEBUG, "", (NODE *)node), gl_node_level)), \ + gl_node_level++) + +#define debug_node_end() gl_node_level --; + +#else + +static inline ID +r_id(ID id) +{ + return id; +} + +static inline VALUE +r_value(VALUE value) +{ + return value; +} + +#define debugi(header, id) r_id(id) +#define debugp(header, value) r_value(value) +#define debugp_verbose(header, value) r_value(value) +#define debugp_verbose_node(header, value) r_value(value) +#define debugp_param(header, value) r_value(value) +#define debug_node_start(node) ((void)0) +#define debug_node_end() ((void)0) +#endif + +#if CPDEBUG > 1 || CPDEBUG < 0 +PRINTF_ARGS(void ruby_debug_printf(const char*, ...), 1, 2); +#define debugs if (compile_debug_print_indent(1)) ruby_debug_printf +#define debug_compile(msg, v) ((void)(compile_debug_print_indent(1) && fputs(msg, stderr)), (v)) +#else +#define debugs if(0)printf +#define debug_compile(msg, v) (v) +#endif + + +/* create new label */ +#define NEW_LABEL(l) new_label_body(iseq, l) + +#define iseq_filename(iseq) \ + (((rb_iseq_t*)DATA_PTR(iseq))->filename) + +#define NEW_ISEQVAL(node, name, type, line_no) \ + new_child_iseq(iseq, node, name, 0, type, line_no) + +#define NEW_CHILD_ISEQVAL(node, name, type, line_no) \ + new_child_iseq(iseq, node, name, iseq->self, type, line_no) + +/* add instructions */ +#define ADD_SEQ(seq1, seq2) \ + APPEND_LIST(seq1, seq2) + +/* add an instruction */ +#define ADD_INSN(seq, line, insn) \ + ADD_ELEM(seq, (LINK_ELEMENT *) new_insn_body(iseq, line, BIN(insn), 0)) + +/* add an instruction with label operand */ +#define ADD_INSNL(seq, line, insn, label) \ + ADD_ELEM(seq, (LINK_ELEMENT *) \ + new_insn_body(iseq, line, BIN(insn), 1, (VALUE)label)) + +/* add an instruction with some operands (1, 2, 3, 5) */ +#define ADD_INSN1(seq, line, insn, op1) \ + ADD_ELEM(seq, (LINK_ELEMENT *) \ + new_insn_body(iseq, line, BIN(insn), 1, (VALUE)op1)) + +#define ADD_INSN2(seq, line, insn, op1, op2) \ + ADD_ELEM(seq, (LINK_ELEMENT *) \ + new_insn_body(iseq, line, BIN(insn), 2, (VALUE)op1, (VALUE)op2)) + +#define ADD_INSN3(seq, line, insn, op1, op2, op3) \ + ADD_ELEM(seq, (LINK_ELEMENT *) \ + new_insn_body(iseq, line, BIN(insn), 3, (VALUE)op1, (VALUE)op2, (VALUE)op3)) + +/* Specific Insn factory */ +#define ADD_SEND(seq, line, id, argc) \ + ADD_SEND_R(seq, line, id, argc, (VALUE)Qfalse, (VALUE)INT2FIX(0)) + +#define ADD_CALL_RECEIVER(seq, line) \ + ADD_INSN(seq, line, putnil) + +#define ADD_CALL(seq, line, id, argc) \ + ADD_SEND_R(seq, line, id, argc, (VALUE)Qfalse, (VALUE)INT2FIX(VM_CALL_FCALL_BIT)) + +#define ADD_CALL_WITH_BLOCK(seq, line, id, argc, block) \ + ADD_SEND_R(seq, line, id, argc, block, (VALUE)INT2FIX(VM_CALL_FCALL_BIT)) + +#define ADD_SEND_R(seq, line, id, argc, block, flag) \ + ADD_ELEM(seq, (LINK_ELEMENT *) \ + new_insn_send(iseq, line, \ + (VALUE)id, (VALUE)argc, (VALUE)block, (VALUE)flag)) + +#define ADD_TRACE(seq, line, event) \ + do { \ + if ((event) == RUBY_EVENT_LINE && iseq->coverage && \ + RARRAY_PTR(iseq->coverage)[(line) - 1] == Qnil) { \ + RARRAY_PTR(iseq->coverage)[(line) - 1] = INT2FIX(0); \ + ADD_INSN1(seq, line, trace, INT2FIX(RUBY_EVENT_COVERAGE)); \ + } \ + if (iseq->compile_data->option->trace_instruction) { \ + ADD_INSN1(seq, line, trace, INT2FIX(event)); \ + } \ + }while(0); + +/* add label */ +#define ADD_LABEL(seq, label) \ + ADD_ELEM(seq, (LINK_ELEMENT *) label) + +#define ADD_ADJUST(seq, line, label) \ + ADD_ELEM(seq, (LINK_ELEMENT *) new_adjust_body(iseq, label, line)) + +#define ADD_ADJUST_RESTORE(seq, label) \ + ADD_ELEM(seq, (LINK_ELEMENT *) new_adjust_body(iseq, label, -1)) + +#define ADD_CATCH_ENTRY(type, ls, le, iseqv, lc) \ + (rb_ary_push(iseq->compile_data->catch_table_ary, \ + rb_ary_new3(5, type, \ + (VALUE)(ls) | 1, (VALUE)(le) | 1, \ + iseqv, (VALUE)(lc) | 1))) + +/* compile node */ +#define COMPILE(anchor, desc, node) \ + (debug_compile("== " desc "\n", \ + iseq_compile_each(iseq, anchor, node, 0))) + +/* compile node, this node's value will be poped */ +#define COMPILE_POPED(anchor, desc, node) \ + (debug_compile("== " desc "\n", \ + iseq_compile_each(iseq, anchor, node, 1))) + +/* compile node, which is poped when 'poped' is true */ +#define COMPILE_(anchor, desc, node, poped) \ + (debug_compile("== " desc "\n", \ + iseq_compile_each(iseq, anchor, node, poped))) + +#define OPERAND_AT(insn, idx) \ + (((INSN*)(insn))->operands[idx]) + +#define INSN_OF(insn) \ + (((INSN*)(insn))->insn_id) + +/* error */ +#define COMPILE_ERROR(strs) \ +{ \ + VALUE tmp = GET_THREAD()->errinfo; \ + if (compile_debug) rb_compile_bug strs; \ + GET_THREAD()->errinfo = iseq->compile_data->err_info; \ + rb_compile_error strs; \ + iseq->compile_data->err_info = GET_THREAD()->errinfo; \ + GET_THREAD()->errinfo = tmp; \ + ret = 0; \ + break; \ +} + +#define ERROR_ARGS ruby_sourcefile, nd_line(node), + + +#define COMPILE_OK 1 +#define COMPILE_NG 0 + + +/* leave name uninitialized so that compiler warn if INIT_ANCHOR is + * missing */ +#define DECL_ANCHOR(name) \ + LINK_ANCHOR *name, name##_body__ = {{0,},} +#define INIT_ANCHOR(name) \ + (name##_body__.last = &name##_body__.anchor, name = &name##_body__) + +#define hide_obj(obj) do {OBJ_FREEZE(obj); RBASIC(obj)->klass = 0;} while (0) + +#include "optinsn.inc" +#if OPT_INSTRUCTIONS_UNIFICATION +#include "optunifs.inc" +#endif + +/* for debug */ +#if CPDEBUG < 0 +#define ISEQ_ARG iseq, +#define ISEQ_ARG_DECLARE rb_iseq_t *iseq, +#else +#define ISEQ_ARG +#define ISEQ_ARG_DECLARE +#endif + +#if CPDEBUG +#define gl_node_level iseq->compile_data->node_level +#if 0 +static void debug_list(ISEQ_ARG_DECLARE LINK_ANCHOR *anchor); +#endif +#endif + +static void dump_disasm_list(LINK_ELEMENT *elem); + +static int insn_data_length(INSN *iobj); +static int insn_data_line_no(INSN *iobj); +static int calc_sp_depth(int depth, INSN *iobj); + +static void ADD_ELEM(ISEQ_ARG_DECLARE LINK_ANCHOR *anchor, LINK_ELEMENT *elem); + +static INSN *new_insn_body(rb_iseq_t *iseq, int line_no, int insn_id, int argc, ...); +static LABEL *new_label_body(rb_iseq_t *iseq, int line); +static ADJUST *new_adjust_body(rb_iseq_t *iseq, LABEL *label, int line); + +static int iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *anchor, NODE * n, int); +static int iseq_setup(rb_iseq_t *iseq, LINK_ANCHOR *anchor); +static int iseq_optimize(rb_iseq_t *iseq, LINK_ANCHOR *anchor); +static int iseq_insns_unification(rb_iseq_t *iseq, LINK_ANCHOR *anchor); + +static int iseq_set_local_table(rb_iseq_t *iseq, ID *tbl); +static int iseq_set_exception_local_table(rb_iseq_t *iseq); +static int iseq_set_arguments(rb_iseq_t *iseq, LINK_ANCHOR *anchor, NODE * node); + +static int iseq_set_sequence_stackcaching(rb_iseq_t *iseq, LINK_ANCHOR *anchor); +static int iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *anchor); +static int iseq_set_exception_table(rb_iseq_t *iseq); +static int iseq_set_optargs_table(rb_iseq_t *iseq); + +/* + * To make Array to LinkedList, use link_anchor + */ + +static void +verify_list(ISEQ_ARG_DECLARE const char *info, LINK_ANCHOR *anchor) +{ +#if CPDEBUG + int flag = 0; + LINK_ELEMENT *list, *plist; + + if (!compile_debug) return; + + list = anchor->anchor.next; + plist = &anchor->anchor; + while (list) { + if (plist != list->prev) { + flag += 1; + } + plist = list; + list = list->next; + } + + if (anchor->last != plist && anchor->last != 0) { + flag |= 0x70000; + } + + if (flag != 0) { + rb_bug("list verify error: %08x (%s)", flag, info); + } +#endif +} +#if CPDEBUG < 0 +#define verify_list(info, anchor) verify_list(iseq, info, anchor) +#endif + +/* + * elem1, elem2 => elem1, elem2, elem + */ +static void +ADD_ELEM(ISEQ_ARG_DECLARE LINK_ANCHOR *anchor, LINK_ELEMENT *elem) +{ + elem->prev = anchor->last; + anchor->last->next = elem; + anchor->last = elem; + verify_list("add", anchor); +} +#if CPDEBUG < 0 +#define ADD_ELEM(anchor, elem) ADD_ELEM(iseq, anchor, elem) +#endif + +static int +iseq_add_mark_object(rb_iseq_t *iseq, VALUE v) +{ + if (!SPECIAL_CONST_P(v)) { + rb_ary_push(iseq->mark_ary, v); + } + return COMPILE_OK; +} + +#define ruby_sourcefile RSTRING_PTR(iseq->filename) + +static int +iseq_add_mark_object_compile_time(rb_iseq_t *iseq, VALUE v) +{ + if (!SPECIAL_CONST_P(v)) { + rb_ary_push(iseq->compile_data->mark_ary, v); + } + return COMPILE_OK; +} + +VALUE +rb_iseq_compile_node(VALUE self, NODE *node) +{ + DECL_ANCHOR(ret); + rb_iseq_t *iseq; + INIT_ANCHOR(ret); + GetISeqPtr(self, iseq); + + if (node == 0) { + COMPILE(ret, "nil", node); + iseq_set_local_table(iseq, 0); + } + else if (nd_type(node) == NODE_SCOPE) { + /* iseq type of top, method, class, block */ + iseq_set_local_table(iseq, node->nd_tbl); + iseq_set_arguments(iseq, ret, node->nd_args); + + switch (iseq->type) { + case ISEQ_TYPE_BLOCK: { + LABEL *start = iseq->compile_data->start_label = NEW_LABEL(0); + LABEL *end = iseq->compile_data->end_label = NEW_LABEL(0); + + ADD_LABEL(ret, start); + COMPILE(ret, "block body", node->nd_body); + ADD_LABEL(ret, end); + + /* wide range catch handler must put at last */ + ADD_CATCH_ENTRY(CATCH_TYPE_REDO, start, end, 0, start); + ADD_CATCH_ENTRY(CATCH_TYPE_NEXT, start, end, 0, end); + break; + } + case ISEQ_TYPE_CLASS: { + ADD_TRACE(ret, FIX2INT(iseq->line_no), RUBY_EVENT_CLASS); + COMPILE(ret, "scoped node", node->nd_body); + ADD_TRACE(ret, nd_line(node), RUBY_EVENT_END); + break; + } + case ISEQ_TYPE_METHOD: { + ADD_TRACE(ret, FIX2INT(iseq->line_no), RUBY_EVENT_CALL); + COMPILE(ret, "scoped node", node->nd_body); + ADD_TRACE(ret, nd_line(node), RUBY_EVENT_RETURN); + break; + } + default: { + COMPILE(ret, "scoped node", node->nd_body); + break; + } + } + } + else { + switch (iseq->type) { + case ISEQ_TYPE_METHOD: + case ISEQ_TYPE_CLASS: + case ISEQ_TYPE_BLOCK: + case ISEQ_TYPE_EVAL: + case ISEQ_TYPE_MAIN: + case ISEQ_TYPE_TOP: + rb_compile_error(ERROR_ARGS "compile/should not be reached: %s:%d", + __FILE__, __LINE__); + break; + case ISEQ_TYPE_RESCUE: + iseq_set_exception_local_table(iseq); + COMPILE(ret, "rescue", node); + break; + case ISEQ_TYPE_ENSURE: + iseq_set_exception_local_table(iseq); + COMPILE_POPED(ret, "ensure", node); + break; + case ISEQ_TYPE_DEFINED_GUARD: + iseq_set_local_table(iseq, 0); + COMPILE(ret, "defined guard", node); + break; + default: + rb_bug("unknown scope"); + } + } + + if (iseq->type == ISEQ_TYPE_RESCUE || iseq->type == ISEQ_TYPE_ENSURE) { + ADD_INSN2(ret, 0, getdynamic, INT2FIX(2), INT2FIX(0)); + ADD_INSN1(ret, 0, throw, INT2FIX(0) /* continue throw */ ); + } + else { + ADD_INSN(ret, iseq->compile_data->last_line, leave); + } + + return iseq_setup(iseq, ret); +} + +int +rb_iseq_translate_threaded_code(rb_iseq_t *iseq) +{ +#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE + extern const void **rb_vm_get_insns_address_table(void); +#if OPT_DIRECT_THREADED_CODE + const void * const *table = rb_vm_get_insns_address_table(); +#else + const void * const *table = rb_vm_get_insns_address_table(); +#endif + int i; + + iseq->iseq_encoded = ALLOC_N(VALUE, iseq->iseq_size); + MEMCPY(iseq->iseq_encoded, iseq->iseq, VALUE, iseq->iseq_size); + + for (i = 0; i < iseq->iseq_size; /* */ ) { + int insn = iseq->iseq_encoded[i]; + int len = insn_len(insn); + iseq->iseq_encoded[i] = (VALUE)table[insn]; + i += len; + } +#else + iseq->iseq_encoded = iseq->iseq; +#endif + return COMPILE_OK; +} + +/*********************************************/ +/* definition of data structure for compiler */ +/*********************************************/ + +static void * +compile_data_alloc(rb_iseq_t *iseq, size_t size) +{ + void *ptr = 0; + struct iseq_compile_data_storage *storage = + iseq->compile_data->storage_current; + + if (storage->pos + size > storage->size) { + unsigned long alloc_size = storage->size * 2; + + retry: + if (alloc_size < size) { + alloc_size *= 2; + goto retry; + } + storage->next = (void *)ALLOC_N(char, alloc_size + + sizeof(struct + iseq_compile_data_storage)); + storage = iseq->compile_data->storage_current = storage->next; + storage->next = 0; + storage->pos = 0; + storage->size = alloc_size; + storage->buff = (char *)(&storage->buff + 1); + } + + ptr = (void *)&storage->buff[storage->pos]; + storage->pos += size; + return ptr; +} + +static INSN * +compile_data_alloc_insn(rb_iseq_t *iseq) +{ + return (INSN *)compile_data_alloc(iseq, sizeof(INSN)); +} + +static LABEL * +compile_data_alloc_label(rb_iseq_t *iseq) +{ + return (LABEL *)compile_data_alloc(iseq, sizeof(LABEL)); +} + +static ADJUST * +compile_data_alloc_adjust(rb_iseq_t *iseq) +{ + return (ADJUST *)compile_data_alloc(iseq, sizeof(ADJUST)); +} + +/* + * elem1, elemX => elem1, elem2, elemX + */ +static void +INSERT_ELEM_NEXT(LINK_ELEMENT *elem1, LINK_ELEMENT *elem2) +{ + elem2->next = elem1->next; + elem2->prev = elem1; + elem1->next = elem2; + if (elem2->next) { + elem2->next->prev = elem2; + } +} + +#if 0 /* unused */ +/* + * elemX, elem1 => elemX, elem2, elem1 + */ +static void +INSERT_ELEM_PREV(LINK_ELEMENT *elem1, LINK_ELEMENT *elem2) +{ + elem2->prev = elem1->prev; + elem2->next = elem1; + elem1->prev = elem2; + if (elem2->prev) { + elem2->prev->next = elem2; + } +} +#endif + +/* + * elemX, elem1, elemY => elemX, elem2, elemY + */ +static void +REPLACE_ELEM(LINK_ELEMENT *elem1, LINK_ELEMENT *elem2) +{ + elem2->prev = elem1->prev; + elem2->next = elem1->next; + if (elem1->prev) { + elem1->prev->next = elem2; + } + if (elem1->next) { + elem1->next->prev = elem2; + } +} + +static void +REMOVE_ELEM(LINK_ELEMENT *elem) +{ + elem->prev->next = elem->next; + if (elem->next) { + elem->next->prev = elem->prev; + } +} + +static LINK_ELEMENT * +FIRST_ELEMENT(LINK_ANCHOR *anchor) +{ + return anchor->anchor.next; +} + +#if 0 /* unused */ +static LINK_ELEMENT * +LAST_ELEMENT(LINK_ANCHOR *anchor) +{ + return anchor->last; +} +#endif + +static LINK_ELEMENT * +POP_ELEMENT(ISEQ_ARG_DECLARE LINK_ANCHOR *anchor) +{ + LINK_ELEMENT *elem = anchor->last; + anchor->last = anchor->last->prev; + anchor->last->next = 0; + verify_list("pop", anchor); + return elem; +} +#if CPDEBUG < 0 +#define POP_ELEMENT(anchor) POP_ELEMENT(iseq, anchor) +#endif + +#if 0 /* unused */ +static LINK_ELEMENT * +SHIFT_ELEMENT(LINK_ANCHOR *anchor) +{ + LINK_ELEMENT *elem = anchor->anchor.next; + if (elem) { + anchor->anchor.next = elem->next; + } + return elem; +} +#endif + +#if 0 /* unused */ +static int +LIST_SIZE(LINK_ANCHOR *anchor) +{ + LINK_ELEMENT *elem = anchor->anchor.next; + int size = 0; + while (elem) { + size += 1; + elem = elem->next; + } + return size; +} +#endif + +static int +LIST_SIZE_ZERO(LINK_ANCHOR *anchor) +{ + if (anchor->anchor.next == 0) { + return 1; + } + else { + return 0; + } +} + +/* + * anc1: e1, e2, e3 + * anc2: e4, e5 + *#=> + * anc1: e1, e2, e3, e4, e5 + * anc2: e4, e5 (broken) + */ +static void +APPEND_LIST(ISEQ_ARG_DECLARE LINK_ANCHOR *anc1, LINK_ANCHOR *anc2) +{ + if (anc2->anchor.next) { + anc1->last->next = anc2->anchor.next; + anc2->anchor.next->prev = anc1->last; + anc1->last = anc2->last; + } + verify_list("append", anc1); +} +#if CPDEBUG < 0 +#define APPEND_LIST(anc1, anc2) APPEND_LIST(iseq, anc1, anc2) +#endif + +/* + * anc1: e1, e2, e3 + * anc2: e4, e5 + *#=> + * anc1: e4, e5, e1, e2, e3 + * anc2: e4, e5 (broken) + */ +static void +INSERT_LIST(ISEQ_ARG_DECLARE LINK_ANCHOR *anc1, LINK_ANCHOR *anc2) +{ + if (anc2->anchor.next) { + LINK_ELEMENT *first = anc1->anchor.next; + anc1->anchor.next = anc2->anchor.next; + anc1->anchor.next->prev = &anc1->anchor; + anc2->last->next = first; + if (first) { + first->prev = anc2->last; + } + else { + anc1->last = anc2->last; + } + } + + verify_list("append", anc1); +} +#if CPDEBUG < 0 +#define INSERT_LIST(anc1, anc2) INSERT_LIST(iseq, anc1, anc2) +#endif + +#if 0 /* unused */ +/* + * anc1: e1, e2, e3 + * anc2: e4, e5 + *#=> + * anc1: e4, e5 + * anc2: e1, e2, e3 + */ +static void +SWAP_LIST(ISEQ_ARG_DECLARE LINK_ANCHOR *anc1, LINK_ANCHOR *anc2) +{ + LINK_ANCHOR tmp = *anc2; + + /* it has bug */ + *anc2 = *anc1; + *anc1 = tmp; + + verify_list("swap1", anc1); + verify_list("swap2", anc2); +} +#if CPDEBUG < 0 +#define SWAP_LIST(anc1, anc2) SWAP_LIST(iseq, anc1, anc2) +#endif + +static LINK_ANCHOR * +REVERSE_LIST(ISEQ_ARG_DECLARE LINK_ANCHOR *anc) +{ + LINK_ELEMENT *first, *last, *elem, *e; + first = &anc->anchor; + elem = first->next; + last = anc->last; + + if (elem != 0) { + anc->anchor.next = last; + anc->last = elem; + } + else { + /* null list */ + return anc; + } + while (elem) { + e = elem->next; + elem->next = elem->prev; + elem->prev = e; + elem = e; + } + + first->next = last; + last->prev = first; + anc->last->next = 0; + + verify_list("reverse", anc); + return anc; +} +#if CPDEBUG < 0 +#define REVERSE_LIST(anc) REVERSE_LIST(iseq, anc) +#endif +#endif + +#if CPDEBUG && 0 +static void +debug_list(ISEQ_ARG_DECLARE LINK_ANCHOR *anchor) +{ + LINK_ELEMENT *list = FIRST_ELEMENT(anchor); + printf("----\n"); + printf("anch: %p, frst: %p, last: %p\n", &anchor->anchor, + anchor->anchor.next, anchor->last); + while (list) { + printf("curr: %p, next: %p, prev: %p, type: %d\n", list, list->next, + list->prev, FIX2INT(list->type)); + list = list->next; + } + printf("----\n"); + + dump_disasm_list(anchor->anchor.next); + verify_list("debug list", anchor); +} +#if CPDEBUG < 0 +#define debug_list(anc) debug_list(iseq, anc) +#endif +#endif + +static LABEL * +new_label_body(rb_iseq_t *iseq, int line) +{ + LABEL *labelobj = compile_data_alloc_label(iseq); + + labelobj->link.type = ISEQ_ELEMENT_LABEL; + labelobj->link.next = 0; + + labelobj->label_no = iseq->compile_data->label_no++; + labelobj->sc_state = 0; + labelobj->sp = -1; + return labelobj; +} + +static ADJUST * +new_adjust_body(rb_iseq_t *iseq, LABEL *label, int line) +{ + ADJUST *adjust = compile_data_alloc_adjust(iseq); + adjust->link.type = ISEQ_ELEMENT_ADJUST; + adjust->link.next = 0; + adjust->label = label; + adjust->line_no = line; + return adjust; +} + +static INSN * +new_insn_core(rb_iseq_t *iseq, int line_no, + int insn_id, int argc, VALUE *argv) +{ + INSN *iobj = compile_data_alloc_insn(iseq); + + iobj->link.type = ISEQ_ELEMENT_INSN; + iobj->link.next = 0; + iobj->insn_id = insn_id; + iobj->line_no = line_no; + iobj->operands = argv; + iobj->operand_size = argc; + iobj->sc_state = 0; + return iobj; +} + +static INSN * +new_insn_body(rb_iseq_t *iseq, int line_no, int insn_id, int argc, ...) +{ + VALUE *operands = 0; + va_list argv; + if (argc > 0) { + int i; + va_init_list(argv, argc); + operands = (VALUE *)compile_data_alloc(iseq, sizeof(VALUE) * argc); + for (i = 0; i < argc; i++) { + VALUE v = va_arg(argv, VALUE); + operands[i] = v; + } + va_end(argv); + } + return new_insn_core(iseq, line_no, insn_id, argc, operands); +} + +static INSN * +new_insn_send(rb_iseq_t *iseq, int line_no, + VALUE id, VALUE argc, VALUE block, VALUE flag) +{ + INSN *iobj = 0; + VALUE *operands = + (VALUE *)compile_data_alloc(iseq, sizeof(VALUE) * 5); + operands[0] = id; + operands[1] = argc; + operands[2] = block; + operands[3] = flag; + operands[4] = 0; + iobj = new_insn_core(iseq, line_no, BIN(send), 5, operands); + return iobj; +} + +static VALUE +new_child_iseq(rb_iseq_t *iseq, NODE *node, + VALUE name, VALUE parent, VALUE type, int line_no) +{ + VALUE ret; + + debugs("[new_child_iseq]> ---------------------------------------\n"); + ret = rb_iseq_new_with_opt(node, name, iseq_filename(iseq->self), INT2FIX(line_no), + parent, type, iseq->compile_data->option); + debugs("[new_child_iseq]< ---------------------------------------\n"); + iseq_add_mark_object(iseq, ret); + return ret; +} + +static int +iseq_setup(rb_iseq_t *iseq, LINK_ANCHOR *anchor) +{ + /* debugs("[compile step 2] (iseq_array_to_linkedlist)\n"); */ + + if (compile_debug > 5) + dump_disasm_list(FIRST_ELEMENT(anchor)); + + debugs("[compile step 3.1 (iseq_optimize)]\n"); + iseq_optimize(iseq, anchor); + + if (compile_debug > 5) + dump_disasm_list(FIRST_ELEMENT(anchor)); + + if (iseq->compile_data->option->instructions_unification) { + debugs("[compile step 3.2 (iseq_insns_unification)]\n"); + iseq_insns_unification(iseq, anchor); + if (compile_debug > 5) + dump_disasm_list(FIRST_ELEMENT(anchor)); + } + + if (iseq->compile_data->option->stack_caching) { + debugs("[compile step 3.3 (iseq_set_sequence_stackcaching)]\n"); + iseq_set_sequence_stackcaching(iseq, anchor); + if (compile_debug > 5) + dump_disasm_list(FIRST_ELEMENT(anchor)); + } + + debugs("[compile step 4.1 (iseq_set_sequence)]\n"); + iseq_set_sequence(iseq, anchor); + if (compile_debug > 5) + dump_disasm_list(FIRST_ELEMENT(anchor)); + + debugs("[compile step 4.2 (iseq_set_exception_table)]\n"); + iseq_set_exception_table(iseq); + + debugs("[compile step 4.3 (set_optargs_table)] \n"); + iseq_set_optargs_table(iseq); + + debugs("[compile step 5 (iseq_translate_threaded_code)] \n"); + rb_iseq_translate_threaded_code(iseq); + + if (compile_debug > 1) { + VALUE str = rb_iseq_disasm(iseq->self); + printf("%s\n", StringValueCStr(str)); + fflush(stdout); + } + debugs("[compile step: finish]\n"); + + return 0; +} + +static int +iseq_set_exception_local_table(rb_iseq_t *iseq) +{ + ID id_dollar_bang; + + CONST_ID(id_dollar_bang, "#$!"); + iseq->local_table = (ID *)ALLOC_N(ID *, 1); + iseq->local_table_size = 1; + iseq->local_size = iseq->local_table_size + 1; + iseq->local_table[0] = id_dollar_bang; + return COMPILE_OK; +} + +static int +get_dyna_var_idx_at_raw(rb_iseq_t *iseq, ID id) +{ + int i; + + for (i = 0; i < iseq->local_table_size; i++) { + if (iseq->local_table[i] == id) { + return i; + } + } + return -1; +} + +static int +get_local_var_idx(rb_iseq_t *iseq, ID id) +{ + int idx = get_dyna_var_idx_at_raw(iseq->local_iseq, id); + + if (idx < 0) { + rb_bug("get_local_var_idx: %d", idx); + } + + return idx; +} + +static int +get_dyna_var_idx(rb_iseq_t *iseq, ID id, int *level, int *ls) +{ + int lv = 0, idx = -1; + + while (iseq) { + idx = get_dyna_var_idx_at_raw(iseq, id); + if (idx >= 0) { + break; + } + iseq = iseq->parent_iseq; + lv++; + } + + if (idx < 0) { + rb_bug("get_dyna_var_idx: -1"); + } + + *level = lv; + *ls = iseq->local_size; + return idx; +} + +static int +iseq_set_arguments(rb_iseq_t *iseq, LINK_ANCHOR *optargs, NODE *node_args) +{ + debugs("iseq_set_arguments: %s\n", node_args ? "" : "0"); + + if (node_args) { + NODE *node_aux = node_args->nd_next; + NODE *node_opt = node_args->nd_opt; + ID rest_id = 0; + int last_comma = 0; + ID block_id = 0; + NODE *node_init = 0; + + if (nd_type(node_args) != NODE_ARGS) { + rb_bug("iseq_set_arguments: NODE_ARGS is expected, but %s", + ruby_node_name(nd_type(node_args))); + } + + /* + * new argument infromation: + * NODE_ARGS [m: int, o: NODE_OPT_ARG, ->] + * NODE_ARGS_AUX [r: ID, b: ID, ->] + * NODE_ARGS_AUX [Pst: id, Plen: int, init: NODE*] + * optarg information: + * NODE_OPT_ARGS [idx, expr, next ->] + * init arg: + * NODE_AND(m_init, p_init) + * if "r" is 1, it's means "{|x,|}" type block parameter. + */ + + iseq->argc = node_args->nd_frml; + debugs(" - argc: %d\n", iseq->argc); + + if (node_aux) { + rest_id = node_aux->nd_rest; + if (rest_id == 1) { + last_comma = 1; + rest_id = 0; + } + block_id = (ID)node_aux->nd_body; + node_aux = node_aux->nd_next; + + if (node_aux) { + ID post_start_id = node_aux->nd_pid; + iseq->arg_post_start = get_dyna_var_idx_at_raw(iseq, post_start_id); + iseq->arg_post_len = node_aux->nd_plen; + node_init = node_aux->nd_next; + } + } + + if (node_opt) { + NODE *node = node_opt; + LABEL *label; + VALUE labels = rb_ary_tmp_new(1); + int i = 0, j; + + while (node) { + label = NEW_LABEL(nd_line(node)); + rb_ary_push(labels, (VALUE)label | 1); + ADD_LABEL(optargs, label); + COMPILE_POPED(optargs, "optarg", node->nd_body); + node = node->nd_next; + i += 1; + } + + /* last label */ + label = NEW_LABEL(nd_line(node_args)); + rb_ary_push(labels, (VALUE)label | 1); + ADD_LABEL(optargs, label); + i += 1; + + iseq->arg_opts = i; + iseq->arg_opt_table = ALLOC_N(VALUE, i); + MEMCPY(iseq->arg_opt_table, RARRAY_PTR(labels), VALUE, i); + for (j = 0; j < i; j++) { + iseq->arg_opt_table[j] &= ~1; + } + rb_ary_clear(labels); + } + else { + iseq->arg_opts = 0; + } + + if (node_init) { + if (node_init->nd_1st) { /* m_init */ + COMPILE_POPED(optargs, "init arguments (m)", node_init->nd_1st); + } + if (node_init->nd_2nd) { /* p_init */ + COMPILE_POPED(optargs, "init arguments (p)", node_init->nd_2nd); + } + } + + if (rest_id) { + iseq->arg_rest = get_dyna_var_idx_at_raw(iseq, rest_id); + + if (iseq->arg_rest == -1) { + rb_bug("arg_rest: -1"); + } + + if (iseq->arg_post_start == 0) { + iseq->arg_post_start = iseq->arg_rest + 1; + } + } + + if (block_id) { + iseq->arg_block = get_dyna_var_idx_at_raw(iseq, block_id); + } + + if (iseq->arg_opts != 0 || iseq->arg_post_len != 0 || + iseq->arg_rest != -1 || iseq->arg_block != -1) { + iseq->arg_simple = 0; + + /* set arg_size: size of arguments */ + if (iseq->arg_block != -1) { + iseq->arg_size = iseq->arg_block + 1; + } + else if (iseq->arg_post_len) { + iseq->arg_size = iseq->arg_post_start + iseq->arg_post_len; + } + else if (iseq->arg_rest != -1) { + iseq->arg_size = iseq->arg_rest + 1; + } + else if (iseq->arg_opts) { + iseq->arg_size = iseq->argc + iseq->arg_opts - 1; + } + else { + iseq->arg_size = iseq->argc; + } + } + else { + iseq->arg_simple = 1; + iseq->arg_size = iseq->argc; + } + + if (iseq->type == ISEQ_TYPE_BLOCK) { + if (iseq->arg_opts == 0 && iseq->arg_post_len == 0 && iseq->arg_rest == -1) { + if (iseq->argc == 1 && last_comma == 0) { + /* {|a|} */ + iseq->arg_simple |= 0x02; + } + } + } + } + else { + iseq->arg_simple = 1; + } + + return COMPILE_OK; +} + +static int +iseq_set_local_table(rb_iseq_t *iseq, ID *tbl) +{ + int size; + + if (tbl) { + size = *tbl; + tbl++; + } + else { + size = 0; + } + + if (size > 0) { + iseq->local_table = (ID *)ALLOC_N(ID *, size); + MEMCPY(iseq->local_table, tbl, ID *, size); + } + + iseq->local_size = iseq->local_table_size = size; + iseq->local_size += 1; + /* + if (lfp == dfp ) { // top, class, method + dfp[-1]: svar + else { // block + dfp[-1]: cref + } + */ + + debugs("iseq_set_local_table: %d, %d\n", iseq->local_size, iseq->local_table_size); + return COMPILE_OK; +} + +static int +cdhash_cmp(VALUE val, VALUE lit) +{ + if (val == lit) return 0; + if (SPECIAL_CONST_P(lit)) { + return val != lit; + } + if (SPECIAL_CONST_P(val) || BUILTIN_TYPE(val) != BUILTIN_TYPE(lit)) { + return -1; + } + if (BUILTIN_TYPE(lit) == T_STRING) { + return rb_str_hash_cmp(lit, val); + } + return !rb_eql(lit, val); +} + +static int +cdhash_hash(VALUE a) +{ + if (SPECIAL_CONST_P(a)) return (int)a; + if (TYPE(a) == T_STRING) return rb_str_hash(a); + { + VALUE hval = rb_hash(a); + return (int)FIX2LONG(hval); + } +} + +static const struct st_hash_type cdhash_type = { + cdhash_cmp, + cdhash_hash, +}; + +/** + ruby insn object array -> raw instruction sequence + */ +static int +iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *anchor) +{ + LABEL *lobj; + INSN *iobj; + struct iseq_insn_info_entry *insn_info_table; + LINK_ELEMENT *list; + VALUE *generated_iseq; + + int k, pos, sp, stack_max = 0, line = 0; + + /* set label position */ + list = FIRST_ELEMENT(anchor); + k = pos = 0; + while (list) { + switch (list->type) { + case ISEQ_ELEMENT_INSN: + { + iobj = (INSN *)list; + line = iobj->line_no; + pos += insn_data_length(iobj); + k++; + break; + } + case ISEQ_ELEMENT_LABEL: + { + lobj = (LABEL *)list; + lobj->position = pos; + lobj->set = Qtrue; + break; + } + case ISEQ_ELEMENT_NONE: + { + /* ignore */ + break; + } + case ISEQ_ELEMENT_ADJUST: + { + ADJUST *adjust = (ADJUST *)list; + if (adjust->line_no != -1) { + pos += 2 /* insn + 1 operand */; + k++; + } + break; + } + default: + dump_disasm_list(FIRST_ELEMENT(anchor)); + dump_disasm_list(list); + rb_compile_error(RSTRING_PTR(iseq->filename), line, + "error: set_sequence"); + break; + } + list = list->next; + } + + /* make instruction sequence */ + generated_iseq = ALLOC_N(VALUE, pos); + insn_info_table = ALLOC_N(struct iseq_insn_info_entry, k); + + list = FIRST_ELEMENT(anchor); + k = pos = sp = 0; + + while (list) { + switch (list->type) { + case ISEQ_ELEMENT_INSN: + { + int j, len, insn; + const char *types; + VALUE *operands; + + iobj = (INSN *)list; + + /* update sp */ + sp = calc_sp_depth(sp, iobj); + if (sp > stack_max) { + stack_max = sp; + } + + /* fprintf(stderr, "insn: %-16s, sp: %d\n", insn_name(iobj->insn_id), sp); */ + + operands = iobj->operands; + insn = iobj->insn_id; + generated_iseq[pos] = insn; + types = insn_op_types(insn); + len = insn_len(insn); + + /* operand check */ + if (iobj->operand_size != len - 1) { + dump_disasm_list(list); + rb_compile_error(RSTRING_PTR(iseq->filename), iobj->line_no, + "operand size miss! (%d for %d)", + iobj->operand_size, len - 1); + xfree(generated_iseq); + xfree(insn_info_table); + return 0; + } + + for (j = 0; types[j]; j++) { + char type = types[j]; + /* printf("--> [%c - (%d-%d)]\n", type, k, j); */ + switch (type) { + case TS_OFFSET: + { + /* label(destination position) */ + lobj = (LABEL *)operands[j]; + if (lobj->set != Qtrue) { + rb_compile_error(RSTRING_PTR(iseq->filename), iobj->line_no, + "unknown label"); + } + if (lobj->sp == -1) { + lobj->sp = sp; + } + generated_iseq[pos + 1 + j] = + lobj->position - (pos + len); + break; + } + case TS_CDHASH: + { + /* + * [obj, label, ...] + */ + int i; + VALUE lits = operands[j]; + VALUE map = rb_hash_new(); + RHASH_TBL(map)->type = &cdhash_type; + + for (i=0; i < RARRAY_LEN(lits); i+=2) { + VALUE obj = rb_ary_entry(lits, i); + VALUE lv = rb_ary_entry(lits, i+1); + lobj = (LABEL *)(lv & ~1); + + if (lobj->set != Qtrue) { + rb_compile_error(RSTRING_PTR(iseq->filename), iobj->line_no, + "unknown label"); + } + if (!st_lookup(rb_hash_tbl(map), obj, 0)) { + rb_hash_aset(map, obj, INT2FIX(lobj->position - (pos+len))); + } + else { + rb_compile_warning(RSTRING_PTR(iseq->filename), iobj->line_no, + "duplicated when clause is ignored"); + } + } + generated_iseq[pos + 1 + j] = map; + iseq_add_mark_object(iseq, map); + break; + } + case TS_LINDEX: + case TS_DINDEX: + case TS_NUM: /* ulong */ + generated_iseq[pos + 1 + j] = FIX2INT(operands[j]); + break; + case TS_ISEQ: /* iseq */ + { + VALUE v = operands[j]; + rb_iseq_t *block = 0; + if (v) { + GetISeqPtr(v, block); + } + generated_iseq[pos + 1 + j] = (VALUE)block; + break; + } + case TS_VALUE: /* VALUE */ + { + VALUE v = operands[j]; + generated_iseq[pos + 1 + j] = v; + /* to mark ruby object */ + iseq_add_mark_object(iseq, v); + break; + } + case TS_IC: /* inline cache */ + { + VALUE v = (VALUE)NEW_INLINE_CACHE_ENTRY(); + generated_iseq[pos + 1 + j] = v; + iseq_add_mark_object(iseq, v); + break; + } + case TS_ID: /* ID */ + generated_iseq[pos + 1 + j] = SYM2ID(operands[j]); + break; + case TS_GENTRY: + { + struct global_entry *entry = + (struct global_entry *)(operands[j] & (~1)); + generated_iseq[pos + 1 + j] = (VALUE)entry; + } + break; + default: + rb_compile_error(RSTRING_PTR(iseq->filename), iobj->line_no, + "unknown operand type: %c", type); + xfree(generated_iseq); + xfree(insn_info_table); + return 0; + } + } + insn_info_table[k].line_no = iobj->line_no; + insn_info_table[k].position = pos; + insn_info_table[k].sp = sp; + pos += len; + k++; + break; + } + case ISEQ_ELEMENT_LABEL: + { + lobj = (LABEL *)list; + if (lobj->sp == -1) { + lobj->sp = sp; + } + else { + sp = lobj->sp; + } + break; + } + case ISEQ_ELEMENT_ADJUST: + { + ADJUST *adjust = (ADJUST *)list; + int orig_sp = sp; + + if (adjust->label) { + sp = adjust->label->sp; + } + else { + sp = 0; + } + + if (adjust->line_no != -1) { + if (orig_sp - sp > 0) { + insn_info_table[k].line_no = adjust->line_no; + insn_info_table[k].position = pos; + insn_info_table[k].sp = sp; + k++; + generated_iseq[pos++] = BIN(adjuststack); + generated_iseq[pos++] = orig_sp - sp; + } + else if (orig_sp - sp == 0) { + /* jump to next insn */ + insn_info_table[k].line_no = adjust->line_no; + insn_info_table[k].position = pos; + insn_info_table[k].sp = sp; + k++; + generated_iseq[pos++] = BIN(jump); + generated_iseq[pos++] = 0; + } + else { + rb_bug("iseq_set_sequence: adjust bug"); + } + } + break; + } + default: + /* ignore */ + break; + } + list = list->next; + } + +#if 0 /* XXX */ + /* this check need dead code elimination */ + if (sp != 1) { + rb_bug("SP is not 0 on %s (%d)\n", RSTRING_PTR(iseq->name), sp); + } +#endif + + iseq->iseq = (void *)generated_iseq; + iseq->iseq_size = pos; + iseq->insn_info_table = insn_info_table; + iseq->insn_info_size = k; + iseq->stack_max = stack_max; + + return COMPILE_OK; +} + +static int +label_get_position(LABEL *lobj) +{ + return lobj->position; +} + +static int +label_get_sp(LABEL *lobj) +{ + return lobj->sp; +} + +static int +iseq_set_exception_table(rb_iseq_t *iseq) +{ + VALUE *tptr, *ptr; + int tlen, i; + struct iseq_catch_table_entry *entry; + + tlen = RARRAY_LEN(iseq->compile_data->catch_table_ary); + tptr = RARRAY_PTR(iseq->compile_data->catch_table_ary); + + iseq->catch_table = tlen ? ALLOC_N(struct iseq_catch_table_entry, tlen) : 0; + iseq->catch_table_size = tlen; + + for (i = 0; i < tlen; i++) { + ptr = RARRAY_PTR(tptr[i]); + entry = &iseq->catch_table[i]; + entry->type = ptr[0] & 0xffff; + entry->start = label_get_position((LABEL *)(ptr[1] & ~1)); + entry->end = label_get_position((LABEL *)(ptr[2] & ~1)); + entry->iseq = ptr[3]; + + /* register iseq as mark object */ + if (entry->iseq != 0) { + iseq_add_mark_object(iseq, entry->iseq); + } + + /* stack depth */ + if (ptr[4]) { + LABEL *lobj = (LABEL *)(ptr[4] & ~1); + entry->cont = label_get_position(lobj); + entry->sp = label_get_sp(lobj); + + /* TODO: Dirty Hack! Fix me */ + if (entry->type == CATCH_TYPE_RESCUE || + entry->type == CATCH_TYPE_BREAK || + entry->type == CATCH_TYPE_NEXT) { + entry->sp--; + } + } + else { + entry->cont = 0; + } + } + + iseq->compile_data->catch_table_ary = 0; /* free */ + return COMPILE_OK; +} + +/* + * set optional argument table + * def foo(a, b=expr1, c=expr2) + * => + * b: + * expr1 + * c: + * expr2 + */ +static int +iseq_set_optargs_table(rb_iseq_t *iseq) +{ + int i; + + if (iseq->arg_opts != 0) { + for (i = 0; i < iseq->arg_opts; i++) { + iseq->arg_opt_table[i] = + label_get_position((LABEL *)iseq->arg_opt_table[i]); + } + } + return COMPILE_OK; +} + +static LINK_ELEMENT * +get_destination_insn(INSN *iobj) +{ + LABEL *lobj = (LABEL *)OPERAND_AT(iobj, 0); + LINK_ELEMENT *list; + + list = lobj->link.next; + while (list) { + if (list->type == ISEQ_ELEMENT_INSN || list->type == ISEQ_ELEMENT_ADJUST) { + break; + } + list = list->next; + } + return list; +} + +static LINK_ELEMENT * +get_next_insn(INSN *iobj) +{ + LINK_ELEMENT *list = iobj->link.next; + + while (list) { + if (list->type == ISEQ_ELEMENT_INSN || list->type == ISEQ_ELEMENT_ADJUST) { + return list; + } + list = list->next; + } + return 0; +} + +static LINK_ELEMENT * +get_prev_insn(INSN *iobj) +{ + LINK_ELEMENT *list = iobj->link.prev; + + while (list) { + if (list->type == ISEQ_ELEMENT_INSN || list->type == ISEQ_ELEMENT_ADJUST) { + return list; + } + list = list->prev; + } + return 0; +} + +static int +iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcallopt) +{ + INSN *iobj = (INSN *)list; + again: + if (iobj->insn_id == BIN(jump)) { + INSN *niobj, *diobj, *piobj; + /* + * useless jump elimination: + * jump LABEL1 + * ... + * LABEL1: + * jump LABEL2 + * + * => in this case, first jump instruction should jump to + * LABEL2 directly + */ + diobj = (INSN *)get_destination_insn(iobj); + niobj = (INSN *)get_next_insn(iobj); + + if (diobj == niobj) { + /* + * jump LABEL + * LABEL: + * => + * LABEL: + */ + REMOVE_ELEM(&iobj->link); + } + else if (iobj != diobj && diobj->insn_id == BIN(jump)) { + if (OPERAND_AT(iobj, 0) != OPERAND_AT(diobj, 0)) { + OPERAND_AT(iobj, 0) = OPERAND_AT(diobj, 0); + goto again; + } + } + else if (diobj->insn_id == BIN(leave)) { + /* + * jump LABEL + * ... + * LABEL: + * leave + * => + * leave + * ... + * LABEL: + * leave + */ + INSN *eiobj = new_insn_core(iseq, iobj->line_no, BIN(leave), + diobj->operand_size, diobj->operands); + INSN *popiobj = new_insn_core(iseq, iobj->line_no, + BIN(pop), 0, 0); + /* replace */ + REPLACE_ELEM((LINK_ELEMENT *)iobj, (LINK_ELEMENT *)eiobj); + INSERT_ELEM_NEXT((LINK_ELEMENT *)eiobj, (LINK_ELEMENT *)popiobj); + iobj = popiobj; + } + /* + * useless jump elimination (if/unless destination): + * if L1 + * jump L2 + * L1: + * ... + * L2: + * + * ==> + * unless L2 + * L1: + * ... + * L2: + */ + else if ((piobj = (INSN *)get_prev_insn(iobj)) != 0 && + (piobj->insn_id == BIN(branchif) || + piobj->insn_id == BIN(branchunless))) { + if (niobj == (INSN *)get_destination_insn(piobj)) { + piobj->insn_id = (piobj->insn_id == BIN(branchif)) + ? BIN(branchunless) : BIN(branchif); + OPERAND_AT(piobj, 0) = OPERAND_AT(iobj, 0); + REMOVE_ELEM(&iobj->link); + } + } + } + + if (iobj->insn_id == BIN(branchif) || + iobj->insn_id == BIN(branchunless)) { + /* + * if L1 + * ... + * L1: + * jump L2 + * => + * if L2 + */ + INSN *nobj = (INSN *)get_destination_insn(iobj); + if (nobj->insn_id == BIN(jump)) { + OPERAND_AT(iobj, 0) = OPERAND_AT(nobj, 0); + } + } + + if (do_tailcallopt && iobj->insn_id == BIN(leave)) { + /* + * send ... + * leave + * => + * send ..., ... | VM_CALL_TAILCALL_BIT, ... + * leave # unreachable + */ + INSN *piobj = (INSN *)get_prev_insn((INSN *)list); + + if (piobj->insn_id == BIN(send) && + piobj->operands[2] == 0 /* block */ + ) { + piobj->operands[3] = INT2FIX(FIX2INT(piobj->operands[3]) | VM_CALL_TAILCALL_BIT); + } + } + return COMPILE_OK; +} + +static int +insn_set_specialized_instruction(INSN *iobj, int insn_id) +{ + iobj->insn_id = insn_id; + iobj->operand_size = 0; + return COMPILE_OK; +} + +static int +insn_set_specialized_instruction_with_ic(INSN *iobj, int insn_id, int n) +{ + int i; + iobj->insn_id = insn_id; + iobj->operand_size = n; + + /* max of n is 4 */ + for (i=0; ioperands[i] = Qnil; + } + + return COMPILE_OK; +} + + +static int +iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj) +{ + if (iobj->insn_id == BIN(send)) { + ID mid = SYM2ID(OPERAND_AT(iobj, 0)); + int argc = FIX2INT(OPERAND_AT(iobj, 1)); + VALUE block = OPERAND_AT(iobj, 2); + VALUE flag = OPERAND_AT(iobj, 3); + + /* TODO: should be more sophisticated search */ + if (block == 0 && flag == INT2FIX(0)) { + if (argc == 0) { + if (mid == idLength) { + insn_set_specialized_instruction(iobj, BIN(opt_length)); + } + else if (mid == idSucc) { + insn_set_specialized_instruction(iobj, BIN(opt_succ)); + } + else if (mid == idNot) { + insn_set_specialized_instruction_with_ic(iobj, BIN(opt_not), 1); + } + } + else if (argc == 1) { + if (0) { + } + else if (mid == idPLUS) { + insn_set_specialized_instruction(iobj, BIN(opt_plus)); + } + else if (mid == idMINUS) { + insn_set_specialized_instruction(iobj, BIN(opt_minus)); + } + else if (mid == idMULT) { + insn_set_specialized_instruction(iobj, BIN(opt_mult)); + } + else if (mid == idDIV) { + insn_set_specialized_instruction(iobj, BIN(opt_div)); + } + else if (mid == idMOD) { + insn_set_specialized_instruction(iobj, BIN(opt_mod)); + } + else if (mid == idEq) { + insn_set_specialized_instruction_with_ic(iobj, BIN(opt_eq), 1); + } + else if (mid == idNeq) { + insn_set_specialized_instruction_with_ic(iobj, BIN(opt_neq), 2); + } + else if (mid == idLT) { + insn_set_specialized_instruction(iobj, BIN(opt_lt)); + } + else if (mid == idLE) { + insn_set_specialized_instruction(iobj, BIN(opt_le)); + } + else if (mid == idGT) { + insn_set_specialized_instruction(iobj, BIN(opt_gt)); + } + else if (mid == idGE) { + insn_set_specialized_instruction(iobj, BIN(opt_ge)); + } + else if (mid == idLTLT) { + insn_set_specialized_instruction(iobj, BIN(opt_ltlt)); + } + else if (mid == idAREF) { + insn_set_specialized_instruction(iobj, BIN(opt_aref)); + } + } + } + + if (argc > 0) { + if (mid == idSend || mid == id__send__ ) { + OPERAND_AT(iobj, 3) |= INT2FIX(VM_CALL_SEND_BIT); + } + } + } + return COMPILE_OK; +} + +static int +iseq_optimize(rb_iseq_t *iseq, LINK_ANCHOR *anchor) +{ + LINK_ELEMENT *list; + const int do_peepholeopt = iseq->compile_data->option->peephole_optimization; + const int do_tailcallopt = iseq->compile_data->option->tailcall_optimization; + const int do_si = iseq->compile_data->option->specialized_instruction; + const int do_ou = iseq->compile_data->option->operands_unification; + list = FIRST_ELEMENT(anchor); + + while (list) { + if (list->type == ISEQ_ELEMENT_INSN) { + if (do_peepholeopt) { + iseq_peephole_optimize(iseq, list, do_tailcallopt); + } + if (do_si) { + iseq_specialized_instruction(iseq, (INSN *)list); + } + if (do_ou) { + insn_operands_unification((INSN *)list); + } + } + list = list->next; + } + return COMPILE_OK; +} + +#if OPT_INSTRUCTIONS_UNIFICATION +static INSN * +new_unified_insn(rb_iseq_t *iseq, + int insn_id, int size, LINK_ELEMENT *seq_list) +{ + INSN *iobj = 0; + LINK_ELEMENT *list = seq_list; + int i, argc = 0; + VALUE *operands = 0, *ptr = 0; + + + /* count argc */ + for (i = 0; i < size; i++) { + iobj = (INSN *)list; + argc += iobj->operand_size; + list = list->next; + } + + if (argc > 0) { + ptr = operands = + (VALUE *)compile_data_alloc(iseq, sizeof(VALUE) * argc); + } + + /* copy operands */ + list = seq_list; + for (i = 0; i < size; i++) { + iobj = (INSN *)list; + MEMCPY(ptr, iobj->operands, VALUE, iobj->operand_size); + ptr += iobj->operand_size; + list = list->next; + } + + return new_insn_core(iseq, iobj->line_no, insn_id, argc, operands); +} +#endif + +/* + * This scheme can get more performance if do this optimize with + * label address resolving. + * It's future work (if compile time was bottle neck). + */ +static int +iseq_insns_unification(rb_iseq_t *iseq, LINK_ANCHOR *anchor) +{ +#if OPT_INSTRUCTIONS_UNIFICATION + LINK_ELEMENT *list; + INSN *iobj, *niobj; + int id, j, k; + + list = FIRST_ELEMENT(anchor); + while (list) { + if (list->type == ISEQ_ELEMENT_INSN) { + iobj = (INSN *)list; + id = iobj->insn_id; + if (unified_insns_data[id] != 0) { + const int *const *entry = unified_insns_data[id]; + for (j = 1; j < (int)entry[0]; j++) { + const int *unified = entry[j]; + LINK_ELEMENT *li = list->next; + for (k = 2; k < unified[1]; k++) { + if (li->type != ISEQ_ELEMENT_INSN || + ((INSN *)li)->insn_id != unified[k]) { + goto miss; + } + li = li->next; + } + /* matched */ + niobj = + new_unified_insn(iseq, unified[0], unified[1] - 1, + list); + + /* insert to list */ + niobj->link.prev = (LINK_ELEMENT *)iobj->link.prev; + niobj->link.next = li; + if (li) { + li->prev = (LINK_ELEMENT *)niobj; + } + + list->prev->next = (LINK_ELEMENT *)niobj; + list = (LINK_ELEMENT *)niobj; + break; + miss:; + } + } + } + list = list->next; + } +#endif + return COMPILE_OK; +} + +#if OPT_STACK_CACHING + +#define SC_INSN(insn, stat) sc_insn_info[(insn)][(stat)] +#define SC_NEXT(insn) sc_insn_next[insn] + +#include "opt_sc.inc" + +static int +insn_set_sc_state(rb_iseq_t *iseq, INSN *iobj, int state) +{ + int nstate; + int insn_id; + + insn_id = iobj->insn_id; + iobj->insn_id = SC_INSN(insn_id, state); + nstate = SC_NEXT(iobj->insn_id); + + if (insn_id == BIN(jump) || + insn_id == BIN(branchif) || insn_id == BIN(branchunless)) { + LABEL *lobj = (LABEL *)OPERAND_AT(iobj, 0); + + if (lobj->sc_state != 0) { + if (lobj->sc_state != nstate) { + dump_disasm_list((LINK_ELEMENT *)iobj); + dump_disasm_list((LINK_ELEMENT *)lobj); + printf("\n-- %d, %d\n", lobj->sc_state, nstate); + rb_compile_error(RSTRING_PTR(iseq->filename), iobj->line_no, + "insn_set_sc_state error\n"); + return 0; + } + } + else { + lobj->sc_state = nstate; + } + if (insn_id == BIN(jump)) { + nstate = SCS_XX; + } + } + else if (insn_id == BIN(leave)) { + nstate = SCS_XX; + } + + return nstate; +} + +static int +label_set_sc_state(LABEL *lobj, int state) +{ + if (lobj->sc_state != 0) { + if (lobj->sc_state != state) { + state = lobj->sc_state; + } + } + else { + lobj->sc_state = state; + } + + return state; +} + + +#endif + +static int +iseq_set_sequence_stackcaching(rb_iseq_t *iseq, LINK_ANCHOR *anchor) +{ +#if OPT_STACK_CACHING + LINK_ELEMENT *list; + int state, insn_id; + + /* initialize */ + state = SCS_XX; + list = FIRST_ELEMENT(anchor); + /* dump_disasm_list(list); */ + + /* for each list element */ + while (list) { + redo_point: + switch (list->type) { + case ISEQ_ELEMENT_INSN: + { + INSN *iobj = (INSN *)list; + insn_id = iobj->insn_id; + + /* dump_disasm_list(list); */ + + switch (insn_id) { + case BIN(nop): + { + /* exception merge point */ + if (state != SCS_AX) { + INSN *rpobj = + new_insn_body(iseq, 0, BIN(reput), 0); + + /* replace this insn */ + REPLACE_ELEM(list, (LINK_ELEMENT *)rpobj); + list = (LINK_ELEMENT *)rpobj; + goto redo_point; + } + break; + } + case BIN(swap): + { + if (state == SCS_AB || state == SCS_BA) { + state = (state == SCS_AB ? SCS_BA : SCS_AB); + + REMOVE_ELEM(list); + list = list->next; + goto redo_point; + } + break; + } + case BIN(pop): + { + switch (state) { + case SCS_AX: + case SCS_BX: + state = SCS_XX; + break; + case SCS_AB: + state = SCS_AX; + break; + case SCS_BA: + state = SCS_BX; + break; + case SCS_XX: + goto normal_insn; + default: + rb_compile_error(RSTRING_PTR(iseq->filename), iobj->line_no, + "unreachable"); + } + /* remove useless pop */ + REMOVE_ELEM(list); + list = list->next; + goto redo_point; + } + default:; + /* none */ + } /* end of switch */ + normal_insn: + state = insn_set_sc_state(iseq, iobj, state); + break; + } + case ISEQ_ELEMENT_LABEL: + { + LABEL *lobj; + lobj = (LABEL *)list; + + state = label_set_sc_state(lobj, state); + } + default: + break; + } + list = list->next; + } +#endif + return COMPILE_OK; +} + + + +static int +compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int *cntp) +{ + NODE *list = node->nd_next; + VALUE lit = node->nd_lit; + int cnt = 1; + + debugp_param("nd_lit", lit); + ADD_INSN1(ret, nd_line(node), putobject, node->nd_lit); + + while (list) { + COMPILE(ret, "each string", list->nd_head); + cnt++; + list = list->nd_next; + } + *cntp = cnt; + + return COMPILE_OK; +} + +static int +compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node) +{ + int cnt; + compile_dstr_fragments(iseq, ret, node, &cnt); + ADD_INSN1(ret, nd_line(node), concatstrings, INT2FIX(cnt)); + return COMPILE_OK; +} + +static int +compile_dregx(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node) +{ + int cnt; + compile_dstr_fragments(iseq, ret, node, &cnt); + ADD_INSN2(ret, nd_line(node), toregexp, INT2FIX(node->nd_cflag), INT2FIX(cnt)); + return COMPILE_OK; +} + +static int +compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * cond, + LABEL *then_label, LABEL *else_label) +{ + switch (nd_type(cond)) { + case NODE_AND: + { + LABEL *label = NEW_LABEL(nd_line(cond)); + compile_branch_condition(iseq, ret, cond->nd_1st, label, + else_label); + ADD_LABEL(ret, label); + compile_branch_condition(iseq, ret, cond->nd_2nd, then_label, + else_label); + break; + } + case NODE_OR: + { + LABEL *label = NEW_LABEL(nd_line(cond)); + compile_branch_condition(iseq, ret, cond->nd_1st, then_label, + label); + ADD_LABEL(ret, label); + compile_branch_condition(iseq, ret, cond->nd_2nd, then_label, + else_label); + break; + } + case NODE_LIT: /* NODE_LIT is always not true */ + case NODE_TRUE: + case NODE_STR: + /* printf("useless conditon eliminate (%s)\n", ruby_node_name(nd_type(cond))); */ + ADD_INSNL(ret, nd_line(cond), jump, then_label); + break; + case NODE_FALSE: + case NODE_NIL: + /* printf("useless conditon eliminate (%s)\n", ruby_node_name(nd_type(cond))); */ + ADD_INSNL(ret, nd_line(cond), jump, else_label); + break; + default: + COMPILE(ret, "branch condition", cond); + ADD_INSNL(ret, nd_line(cond), branchunless, else_label); + ADD_INSNL(ret, nd_line(cond), jump, then_label); + break; + } + return COMPILE_OK; +} + +static int +compile_array_(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE* node_root, + VALUE opt_p, int poped) +{ + NODE *node = node_root; + int len = node->nd_alen, line = nd_line(node), i=0; + DECL_ANCHOR(anchor); + + INIT_ANCHOR(anchor); + if (nd_type(node) != NODE_ZARRAY) { + while (node) { + if (nd_type(node) != NODE_ARRAY) { + rb_bug("compile_array: This node is not NODE_ARRAY, but %s", + ruby_node_name(nd_type(node))); + } + + i++; + if (opt_p && nd_type(node->nd_head) != NODE_LIT) { + opt_p = Qfalse; + } + COMPILE_(anchor, "array element", node->nd_head, poped); + node = node->nd_next; + } + } + + if (len != i) { + if (0) { + rb_bug("node error: compile_array (%d: %d-%d)", + (int)nd_line(node_root), len, i); + } + len = i; + } + + if (opt_p == Qtrue) { + if (!poped) { + VALUE ary = rb_ary_tmp_new(len); + node = node_root; + while (node) { + rb_ary_push(ary, node->nd_head->nd_lit); + node = node->nd_next; + } + + iseq_add_mark_object_compile_time(iseq, ary); + ADD_INSN1(ret, nd_line(node_root), duparray, ary); + } + } + else { + if (!poped) { + ADD_INSN1(anchor, line, newarray, INT2FIX(len)); + } + APPEND_LIST(ret, anchor); + } + return len; +} + +static VALUE +compile_array(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE* node_root, VALUE opt_p) +{ + return compile_array_(iseq, ret, node_root, opt_p, 0); +} + +static VALUE +case_when_optimizable_literal(NODE * node) +{ + switch (nd_type(node)) { + case NODE_LIT: { + VALUE v = node->nd_lit; + if (SYMBOL_P(v) || rb_obj_is_kind_of(v, rb_cNumeric)) { + return v; + } + break; + } + case NODE_STR: + return node->nd_lit; + } + return Qfalse; +} + +static VALUE +when_vals(rb_iseq_t *iseq, LINK_ANCHOR *cond_seq, NODE *vals, LABEL *l1, VALUE special_literals) +{ + while (vals) { + VALUE lit; + NODE* val; + + val = vals->nd_head; + + if (special_literals && + (lit = case_when_optimizable_literal(val)) != Qfalse) { + rb_ary_push(special_literals, lit); + rb_ary_push(special_literals, (VALUE)(l1) | 1); + } + else { + special_literals = Qfalse; + } + + COMPILE(cond_seq, "when cond", val); + ADD_INSN1(cond_seq, nd_line(val), topn, INT2FIX(1)); + ADD_SEND(cond_seq, nd_line(val), ID2SYM(idEqq), INT2FIX(1)); + ADD_INSNL(cond_seq, nd_line(val), branchif, l1); + vals = vals->nd_next; + } + return special_literals; +} + +static int +compile_massign_lhs(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *node) +{ + switch (nd_type(node)) { + case NODE_ATTRASGN: { + INSN *iobj; + VALUE dupidx; + + COMPILE_POPED(ret, "masgn lhs (NODE_ATTRASGN)", node); + POP_ELEMENT(ret); /* pop pop insn */ + iobj = (INSN *)POP_ELEMENT(ret); /* pop send insn */ + + dupidx = iobj->operands[1]; + dupidx = INT2FIX(FIX2INT(dupidx) + 1); + iobj->operands[1] = dupidx; + + ADD_INSN1(ret, nd_line(node), topn, dupidx); + ADD_ELEM(ret, (LINK_ELEMENT *)iobj); + ADD_INSN(ret, nd_line(node), pop); /* result */ + ADD_INSN(ret, nd_line(node), pop); /* rhs */ + break; + } + case NODE_MASGN: { + DECL_ANCHOR(anchor); + INIT_ANCHOR(anchor); + COMPILE_POPED(anchor, "nest masgn lhs", node); + REMOVE_ELEM(FIRST_ELEMENT(anchor)); + ADD_SEQ(ret, anchor); + break; + } + default: { + DECL_ANCHOR(anchor); + INIT_ANCHOR(anchor); + COMPILE_POPED(anchor, "masgn lhs", node); + REMOVE_ELEM(FIRST_ELEMENT(anchor)); + ADD_SEQ(ret, anchor); + } + } + + return COMPILE_OK; +} + +static void +compile_massign_opt_lhs(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *lhsn) +{ + if (lhsn) { + compile_massign_opt_lhs(iseq, ret, lhsn->nd_next); + compile_massign_lhs(iseq, ret, lhsn->nd_head); + } +} + +static int +compile_massign_opt(rb_iseq_t *iseq, LINK_ANCHOR *ret, + NODE *rhsn, NODE *orig_lhsn) +{ + VALUE mem[64]; + const int memsize = sizeof(mem) / sizeof(mem[0]); + int memindex = 0; + int llen = 0, rlen = 0; + int i; + NODE *lhsn = orig_lhsn; + +#define MEMORY(v) { \ + int i; \ + if (memindex == memsize) return 0; \ + for (i=0; ind_head; + switch (nd_type(ln)) { + case NODE_LASGN: + MEMORY(ln->nd_vid); + break; + case NODE_DASGN: + case NODE_DASGN_CURR: + case NODE_IASGN: + case NODE_IASGN2: + case NODE_CVASGN: + MEMORY(ln->nd_vid); + break; + default: + return 0; + } + lhsn = lhsn->nd_next; + llen++; + } + + while (rhsn) { + if (llen <= rlen) { + COMPILE_POPED(ret, "masgn val (poped)", rhsn->nd_head); + } + else { + COMPILE(ret, "masgn val", rhsn->nd_head); + } + rhsn = rhsn->nd_next; + rlen++; + } + + if (llen > rlen) { + for (i=0; ind_value; + NODE *splatn = node->nd_args; + NODE *lhsn = node->nd_head; + int lhs_splat = (splatn && (VALUE)splatn != (VALUE)-1) ? 1 : 0; + + if (!poped || splatn || !compile_massign_opt(iseq, ret, rhsn, lhsn)) { + int llen = 0; + DECL_ANCHOR(lhsseq); + + INIT_ANCHOR(lhsseq); + + while (lhsn) { + compile_massign_lhs(iseq, lhsseq, lhsn->nd_head); + llen += 1; + lhsn = lhsn->nd_next; + } + + COMPILE(ret, "normal masgn rhs", rhsn); + + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + + ADD_INSN2(ret, nd_line(node), expandarray, + INT2FIX(llen), INT2FIX(lhs_splat)); + ADD_SEQ(ret, lhsseq); + + if (lhs_splat) { + if (nd_type(splatn) == NODE_POSTARG) { + /*a, b, *r, p1, p2 */ + NODE *postn = splatn->nd_2nd; + NODE *restn = splatn->nd_1st; + int num = postn->nd_alen; + int flag = 0x02 | (((VALUE)restn == (VALUE)-1) ? 0x00 : 0x01); + + ADD_INSN2(ret, nd_line(splatn), expandarray, + INT2FIX(num), INT2FIX(flag)); + + if ((VALUE)restn != (VALUE)-1) { + compile_massign_lhs(iseq, ret, restn); + } + while (postn) { + compile_massign_lhs(iseq, ret, postn->nd_head); + postn = postn->nd_next; + } + } + else { + /* a, b, *r */ + compile_massign_lhs(iseq, ret, splatn); + } + } + } + return COMPILE_OK; +} + +static int +compile_colon2(rb_iseq_t *iseq, NODE * node, + LINK_ANCHOR *pref, LINK_ANCHOR *body) +{ + switch (nd_type(node)) { + case NODE_CONST: + debugi("compile_colon2 - colon", node->nd_vid); + ADD_INSN1(body, nd_line(node), getconstant, ID2SYM(node->nd_vid)); + break; + case NODE_COLON3: + debugi("compile_colon2 - colon3", node->nd_mid); + ADD_INSN(body, nd_line(node), pop); + ADD_INSN1(body, nd_line(node), putobject, rb_cObject); + ADD_INSN1(body, nd_line(node), getconstant, ID2SYM(node->nd_mid)); + break; + case NODE_COLON2: + compile_colon2(iseq, node->nd_head, pref, body); + debugi("compile_colon2 - colon2", node->nd_mid); + ADD_INSN1(body, nd_line(node), getconstant, ID2SYM(node->nd_mid)); + break; + default: + COMPILE(pref, "const colon2 prefix", node); + break; + } + return COMPILE_OK; +} + +static VALUE +compile_cpath(LINK_ANCHOR *ret, rb_iseq_t *iseq, NODE *cpath) +{ + if (nd_type(cpath) == NODE_COLON3) { + /* toplevel class ::Foo */ + ADD_INSN1(ret, nd_line(cpath), putobject, rb_cObject); + return Qfalse; + } + else if (cpath->nd_head) { + /* Bar::Foo */ + COMPILE(ret, "nd_else->nd_head", cpath->nd_head); + return Qfalse; + } + else { + /* class at cbase Foo */ + ADD_INSN1(ret, nd_line(cpath), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CBASE)); + return Qtrue; + } +} + +static int +defined_expr(rb_iseq_t *iseq, LINK_ANCHOR *ret, + NODE *node, LABEL **lfinish, VALUE needstr) +{ + const char *estr = 0; + enum node_type type; + + switch (type = nd_type(node)) { + + /* easy literals */ + case NODE_NIL: + estr = "nil"; + break; + case NODE_SELF: + estr = "self"; + break; + case NODE_TRUE: + estr = "true"; + break; + case NODE_FALSE: + estr = "false"; + break; + + case NODE_ARRAY:{ + NODE *vals = node; + + do { + defined_expr(iseq, ret, vals->nd_head, lfinish, Qfalse); + + if (!lfinish[1]) { + lfinish[1] = NEW_LABEL(nd_line(node)); + } + ADD_INSNL(ret, nd_line(node), branchunless, lfinish[1]); + } while ((vals = vals->nd_next) != NULL); + } + case NODE_STR: + case NODE_LIT: + case NODE_ZARRAY: + case NODE_AND: + case NODE_OR: + default: + estr = "expression"; + break; + + /* variables */ + case NODE_LVAR: + case NODE_DVAR: + estr = "local-variable"; + break; + + case NODE_IVAR: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_IVAR), + ID2SYM(node->nd_vid), needstr); + return 1; + + case NODE_GVAR: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_GVAR), + ((VALUE)node->nd_entry) | 1, needstr); + return 1; + + case NODE_CVAR: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_CVAR), + ID2SYM(node->nd_vid), needstr); + return 1; + + case NODE_CONST: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_CONST), + ID2SYM(node->nd_vid), needstr); + return 1; + case NODE_COLON2: + if (!lfinish[1]) { + lfinish[1] = NEW_LABEL(nd_line(node)); + } + defined_expr(iseq, ret, node->nd_head, lfinish, Qfalse); + ADD_INSNL(ret, nd_line(node), branchunless, lfinish[1]); + + if (rb_is_const_id(node->nd_mid)) { + COMPILE(ret, "defined/colon2#nd_head", node->nd_head); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_CONST), + ID2SYM(node->nd_mid), needstr); + } + else { + COMPILE(ret, "defined/colon2#nd_head", node->nd_head); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_METHOD), + ID2SYM(node->nd_mid), needstr); + } + return 1; + case NODE_COLON3: + ADD_INSN1(ret, nd_line(node), putobject, rb_cObject); + ADD_INSN3(ret, nd_line(node), defined, + INT2FIX(DEFINED_CONST), ID2SYM(node->nd_mid), needstr); + return 1; + + /* method dispatch */ + case NODE_CALL: + case NODE_VCALL: + case NODE_FCALL: + case NODE_ATTRASGN:{ + int self = Qtrue; + + switch (type) { + case NODE_ATTRASGN: + if (node->nd_recv == (NODE *)1) break; + case NODE_CALL: + self = Qfalse; + break; + default: + /* through */; + } + if (!lfinish[1]) { + lfinish[1] = NEW_LABEL(nd_line(node)); + } + if (node->nd_args) { + defined_expr(iseq, ret, node->nd_args, lfinish, Qfalse); + ADD_INSNL(ret, nd_line(node), branchunless, lfinish[1]); + } + if (!self) { + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + VALUE rescue = NEW_CHILD_ISEQVAL(NEW_NIL(), + rb_str_concat(rb_str_new2 + ("defined guard in "), + iseq->name), + ISEQ_TYPE_DEFINED_GUARD, 0); + + defined_expr(iseq, ret, node->nd_recv, lfinish, Qfalse); + ADD_INSNL(ret, nd_line(node), branchunless, lfinish[1]); + + ADD_LABEL(ret, lstart); + COMPILE(ret, "defined/recv", node->nd_recv); + ADD_LABEL(ret, lend); + ADD_CATCH_ENTRY(CATCH_TYPE_RESCUE, lstart, lend, rescue, lfinish[1]); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_METHOD), + ID2SYM(node->nd_mid), needstr); + } + else { + ADD_INSN(ret, nd_line(node), putself); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_FUNC), + ID2SYM(node->nd_mid), needstr); + } + return 1; + } + + case NODE_YIELD: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_YIELD), 0, + needstr); + return 1; + + case NODE_BACK_REF: + case NODE_NTH_REF: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_REF), + INT2FIX((node->nd_nth << 1) | (type == NODE_BACK_REF)), + needstr); + return 1; + + case NODE_SUPER: + case NODE_ZSUPER: + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN3(ret, nd_line(node), defined, INT2FIX(DEFINED_ZSUPER), 0, + needstr); + return 1; + + case NODE_OP_ASGN1: + case NODE_OP_ASGN2: + case NODE_OP_ASGN_OR: + case NODE_OP_ASGN_AND: + case NODE_MASGN: + case NODE_LASGN: + case NODE_DASGN: + case NODE_DASGN_CURR: + case NODE_GASGN: + case NODE_IASGN: + case NODE_CDECL: + case NODE_CVDECL: + case NODE_CVASGN: + estr = "assignment"; + break; + } + + if (estr != 0) { + if (needstr != Qfalse) { + VALUE str = rb_str_new2(estr); + hide_obj(str); + ADD_INSN1(ret, nd_line(node), putstring, str); + iseq_add_mark_object_compile_time(iseq, str); + } + else { + ADD_INSN1(ret, nd_line(node), putobject, Qtrue); + } + return 1; + } + return 0; +} + +#define BUFSIZE 0x100 + +static VALUE +make_name_for_block(rb_iseq_t *iseq) +{ + int level = 1; + rb_iseq_t *ip = iseq; + + if (iseq->parent_iseq != 0) { + while (ip->local_iseq != ip) { + if (ip->type == ISEQ_TYPE_BLOCK) { + level++; + } + ip = ip->parent_iseq; + } + } + + if (level == 1) { + return rb_sprintf("block in %s", RSTRING_PTR(ip->name)); + } + else { + return rb_sprintf("block (%d levels) in %s", level, RSTRING_PTR(ip->name)); + } +} + +static void +push_ensure_entry(rb_iseq_t *iseq, + struct iseq_compile_data_ensure_node_stack *enl, + struct ensure_range *er, NODE *node) +{ + enl->ensure_node = node; + enl->prev = iseq->compile_data->ensure_node_stack; /* prev */ + enl->erange = er; + iseq->compile_data->ensure_node_stack = enl; +} + +static void +add_ensure_range(rb_iseq_t *iseq, struct ensure_range *erange, + LABEL *lstart, LABEL *lend) +{ + struct ensure_range *ne = + compile_data_alloc(iseq, sizeof(struct ensure_range)); + + while (erange->next != 0) { + erange = erange->next; + } + ne->next = 0; + ne->begin = lend; + ne->end = erange->end; + erange->end = lstart; + + erange->next = ne; +} + +static void +add_ensure_iseq(LINK_ANCHOR *ret, rb_iseq_t *iseq, int is_return) +{ + struct iseq_compile_data_ensure_node_stack *enlp = + iseq->compile_data->ensure_node_stack; + struct iseq_compile_data_ensure_node_stack *prev_enlp = enlp; + DECL_ANCHOR(ensure); + + INIT_ANCHOR(ensure); + while (enlp) { + if (enlp->erange != 0) { + DECL_ANCHOR(ensure_part); + LABEL *lstart = NEW_LABEL(0); + LABEL *lend = NEW_LABEL(0); + INIT_ANCHOR(ensure_part); + + add_ensure_range(iseq, enlp->erange, lstart, lend); + + iseq->compile_data->ensure_node_stack = enlp->prev; + ADD_LABEL(ensure_part, lstart); + COMPILE_POPED(ensure_part, "ensure part", enlp->ensure_node); + ADD_LABEL(ensure_part, lend); + ADD_SEQ(ensure, ensure_part); + } + else { + if (!is_return) { + break; + } + } + enlp = enlp->prev; + } + iseq->compile_data->ensure_node_stack = prev_enlp; + ADD_SEQ(ret, ensure); +} + +static VALUE +setup_args(rb_iseq_t *iseq, LINK_ANCHOR *args, NODE *argn, unsigned long *flag) +{ + VALUE argc = INT2FIX(0); + int nsplat = 0; + DECL_ANCHOR(arg_block); + DECL_ANCHOR(args_splat); + + INIT_ANCHOR(arg_block); + INIT_ANCHOR(args_splat); + if (argn && nd_type(argn) == NODE_BLOCK_PASS) { + COMPILE(arg_block, "block", argn->nd_body); + *flag |= VM_CALL_ARGS_BLOCKARG_BIT; + argn = argn->nd_head; + } + + setup_argn: + if (argn) { + switch (nd_type(argn)) { + case NODE_SPLAT: { + COMPILE(args, "args (splat)", argn->nd_head); + argc = INT2FIX(1); + nsplat++; + *flag |= VM_CALL_ARGS_SPLAT_BIT; + break; + } + case NODE_ARGSCAT: + case NODE_ARGSPUSH: { + int next_is_array = (nd_type(argn->nd_head) == NODE_ARRAY); + DECL_ANCHOR(tmp); + + INIT_ANCHOR(tmp); + COMPILE(tmp, "args (cat: splat)", argn->nd_body); + if (next_is_array && nsplat == 0) { + /* none */ + } + else { + if (nd_type(argn) == NODE_ARGSCAT) { + ADD_INSN1(tmp, nd_line(argn), splatarray, Qfalse); + } + else { + ADD_INSN1(tmp, nd_line(argn), newarray, INT2FIX(1)); + } + } + INSERT_LIST(args_splat, tmp); + nsplat++; + *flag |= VM_CALL_ARGS_SPLAT_BIT; + + if (next_is_array) { + argc = INT2FIX(compile_array(iseq, args, argn->nd_head, Qfalse) + 1); + POP_ELEMENT(args); + } + else { + argn = argn->nd_head; + goto setup_argn; + } + break; + } + case NODE_ARRAY: { + argc = INT2FIX(compile_array(iseq, args, argn, Qfalse)); + POP_ELEMENT(args); + break; + } + default: { + rb_bug("setup_arg: unknown node: %s\n", ruby_node_name(nd_type(argn))); + } + } + } + + if (nsplat > 1) { + int i; + for (i=1; icompile_data->last_line, putnil); + } + return COMPILE_OK; + } + + iseq->compile_data->last_line = nd_line(node); + debug_node_start(node); + + type = nd_type(node); + + if (node->flags & NODE_FL_NEWLINE) { + ADD_TRACE(ret, nd_line(node), RUBY_EVENT_LINE); + } + + switch (type) { + case NODE_BLOCK:{ + while (node && nd_type(node) == NODE_BLOCK) { + COMPILE_(ret, "BLOCK body", node->nd_head, + (node->nd_next == 0 && poped == 0) ? 0 : 1); + node = node->nd_next; + } + if (node) { + COMPILE_(ret, "BLOCK next", node->nd_next, poped); + } + break; + } + case NODE_IF:{ + DECL_ANCHOR(cond_seq); + DECL_ANCHOR(then_seq); + DECL_ANCHOR(else_seq); + LABEL *then_label, *else_label, *end_label; + + INIT_ANCHOR(cond_seq); + INIT_ANCHOR(then_seq); + INIT_ANCHOR(else_seq); + then_label = NEW_LABEL(nd_line(node)); + else_label = NEW_LABEL(nd_line(node)); + end_label = NEW_LABEL(nd_line(node)); + + compile_branch_condition(iseq, cond_seq, node->nd_cond, + then_label, else_label); + COMPILE_(then_seq, "then", node->nd_body, poped); + COMPILE_(else_seq, "else", node->nd_else, poped); + + ADD_SEQ(ret, cond_seq); + + ADD_LABEL(ret, then_label); + ADD_SEQ(ret, then_seq); + ADD_INSNL(ret, nd_line(node), jump, end_label); + + ADD_LABEL(ret, else_label); + ADD_SEQ(ret, else_seq); + + ADD_LABEL(ret, end_label); + + break; + } + case NODE_CASE:{ + NODE *vals; + NODE *tempnode = node; + LABEL *endlabel, *elselabel; + DECL_ANCHOR(head); + DECL_ANCHOR(body_seq); + DECL_ANCHOR(cond_seq); + VALUE special_literals = rb_ary_tmp_new(1); + + INIT_ANCHOR(head); + INIT_ANCHOR(body_seq); + INIT_ANCHOR(cond_seq); + if (node->nd_head == 0) { + COMPILE_(ret, "when", node->nd_body, poped); + break; + } + COMPILE(head, "case base", node->nd_head); + + node = node->nd_body; + type = nd_type(node); + + if (type != NODE_WHEN) { + COMPILE_ERROR((ERROR_ARGS "NODE_CASE: unexpected node. must be NODE_WHEN, but %s", ruby_node_name(type))); + } + + endlabel = NEW_LABEL(nd_line(node)); + elselabel = NEW_LABEL(nd_line(node)); + + ADD_SEQ(ret, head); /* case VAL */ + + while (type == NODE_WHEN) { + LABEL *l1; + + l1 = NEW_LABEL(nd_line(node)); + ADD_LABEL(body_seq, l1); + ADD_INSN(body_seq, nd_line(node), pop); + COMPILE_(body_seq, "when body", node->nd_body, poped); + ADD_INSNL(body_seq, nd_line(node), jump, endlabel); + + vals = node->nd_head; + if (vals) { + switch (nd_type(vals)) { + case NODE_ARRAY: + special_literals = when_vals(iseq, cond_seq, vals, l1, special_literals); + break; + case NODE_SPLAT: + case NODE_ARGSCAT: + case NODE_ARGSPUSH: + special_literals = 0; + COMPILE(cond_seq, "when/cond splat", vals); + ADD_INSN1(cond_seq, nd_line(vals), checkincludearray, Qtrue); + ADD_INSNL(cond_seq, nd_line(vals), branchif, l1); + break; + default: + rb_bug("NODE_CASE: unknown node (%s)", + ruby_node_name(nd_type(vals))); + } + } + else { + rb_bug("NODE_CASE: must be NODE_ARRAY, but 0"); + } + + node = node->nd_next; + if (!node) { + break; + } + type = nd_type(node); + } + /* else */ + if (node) { + ADD_LABEL(cond_seq, elselabel); + ADD_INSN(cond_seq, nd_line(node), pop); + COMPILE_(cond_seq, "else", node, poped); + ADD_INSNL(cond_seq, nd_line(node), jump, endlabel); + } + else { + debugs("== else (implicit)\n"); + ADD_LABEL(cond_seq, elselabel); + ADD_INSN(cond_seq, nd_line(tempnode), pop); + if (!poped) { + ADD_INSN(cond_seq, nd_line(tempnode), putnil); + } + ADD_INSNL(cond_seq, nd_line(tempnode), jump, endlabel); + } + + if (special_literals) { + ADD_INSN(ret, nd_line(tempnode), dup); + ADD_INSN2(ret, nd_line(tempnode), opt_case_dispatch, + special_literals, elselabel); + iseq_add_mark_object_compile_time(iseq, special_literals); + } + + ADD_SEQ(ret, cond_seq); + ADD_SEQ(ret, body_seq); + ADD_LABEL(ret, endlabel); + break; + } + case NODE_WHEN:{ + NODE *vals; + NODE *val; + NODE *orig_node = node; + LABEL *endlabel; + DECL_ANCHOR(body_seq); + + INIT_ANCHOR(body_seq); + endlabel = NEW_LABEL(nd_line(node)); + + while (node && nd_type(node) == NODE_WHEN) { + LABEL *l1 = NEW_LABEL(nd_line(node)); + ADD_LABEL(body_seq, l1); + COMPILE_(body_seq, "when", node->nd_body, poped); + ADD_INSNL(body_seq, nd_line(node), jump, endlabel); + + vals = node->nd_head; + if (vals && nd_type(vals) == NODE_ARRAY) { + while (vals) { + val = vals->nd_head; + COMPILE(ret, "when2", val); + ADD_INSNL(ret, nd_line(val), branchif, l1); + vals = vals->nd_next; + } + } + else if (nd_type(vals) == NODE_SPLAT || + nd_type(vals) == NODE_ARGSCAT || + nd_type(vals) == NODE_ARGSPUSH) { + + NODE *val = vals->nd_head; + + if (nd_type(vals) == NODE_ARGSCAT || nd_type(vals) == NODE_ARGSPUSH) { + NODE *vs = vals->nd_head; + val = vals->nd_body; + + while (vs) { + NODE* val = vs->nd_head; + COMPILE(ret, "when/argscat", val); + ADD_INSNL(ret, nd_line(val), branchif, l1); + vs = vs->nd_next; + } + } + + ADD_INSN(ret, nd_line(val), putnil); + COMPILE(ret, "when2/splat", val); + ADD_INSN1(ret, nd_line(val), checkincludearray, Qfalse); + ADD_INSN(ret, nd_line(val), pop); + ADD_INSNL(ret, nd_line(val), branchif, l1); + } + else { + rb_bug("err"); + } + node = node->nd_next; + } + /* else */ + COMPILE_(ret, "else", node, poped); + ADD_INSNL(ret, nd_line(orig_node), jump, endlabel); + + ADD_SEQ(ret, body_seq); + ADD_LABEL(ret, endlabel); + + break; + } + case NODE_OPT_N: + case NODE_WHILE: + case NODE_UNTIL:{ + LABEL *prev_start_label = iseq->compile_data->start_label; + LABEL *prev_end_label = iseq->compile_data->end_label; + LABEL *prev_redo_label = iseq->compile_data->redo_label; + VALUE prev_loopval_popped = iseq->compile_data->loopval_popped; + + struct iseq_compile_data_ensure_node_stack enl; + + LABEL *next_label = iseq->compile_data->start_label = NEW_LABEL(nd_line(node)); /* next */ + LABEL *redo_label = iseq->compile_data->redo_label = NEW_LABEL(nd_line(node)); /* redo */ + LABEL *break_label = iseq->compile_data->end_label = NEW_LABEL(nd_line(node)); /* break */ + LABEL *end_label = NEW_LABEL(nd_line(node)); + + LABEL *next_catch_label = NEW_LABEL(nd_line(node)); + LABEL *tmp_label = NULL; + + iseq->compile_data->loopval_popped = 0; + push_ensure_entry(iseq, &enl, 0, 0); + + if (type == NODE_OPT_N || node->nd_state == 1) { + ADD_INSNL(ret, nd_line(node), jump, next_label); + } + else { + tmp_label = NEW_LABEL(nd_line(node)); + ADD_INSNL(ret, nd_line(node), jump, tmp_label); + } + ADD_INSN(ret, nd_line(node), putnil); + ADD_LABEL(ret, next_catch_label); + ADD_INSN(ret, nd_line(node), pop); + ADD_INSNL(ret, nd_line(node), jump, next_label); + if (tmp_label) ADD_LABEL(ret, tmp_label); + + ADD_LABEL(ret, redo_label); + COMPILE_POPED(ret, "while body", node->nd_body); + ADD_LABEL(ret, next_label); /* next */ + + if (type == NODE_WHILE) { + compile_branch_condition(iseq, ret, node->nd_cond, + redo_label, end_label); + } + else if (type == NODE_UNTIL) { + /* untile */ + compile_branch_condition(iseq, ret, node->nd_cond, + end_label, redo_label); + } + else { + ADD_CALL_RECEIVER(ret, nd_line(node)); + ADD_CALL(ret, nd_line(node), ID2SYM(idGets), INT2FIX(0)); + ADD_INSNL(ret, nd_line(node), branchif, redo_label); + /* opt_n */ + } + + ADD_LABEL(ret, end_label); + + if (node->nd_state == Qundef) { + /* ADD_INSN(ret, nd_line(node), putundef); */ + rb_bug("unsupported: putundef"); + } + else { + ADD_INSN(ret, nd_line(node), putnil); + } + + ADD_LABEL(ret, break_label); /* break */ + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + + ADD_CATCH_ENTRY(CATCH_TYPE_BREAK, redo_label, break_label, + 0, break_label); + ADD_CATCH_ENTRY(CATCH_TYPE_NEXT, redo_label, break_label, 0, + next_catch_label); + ADD_CATCH_ENTRY(CATCH_TYPE_REDO, redo_label, break_label, 0, + iseq->compile_data->redo_label); + + iseq->compile_data->start_label = prev_start_label; + iseq->compile_data->end_label = prev_end_label; + iseq->compile_data->redo_label = prev_redo_label; + iseq->compile_data->loopval_popped = prev_loopval_popped; + iseq->compile_data->ensure_node_stack = iseq->compile_data->ensure_node_stack->prev; + break; + } + case NODE_ITER: + case NODE_FOR:{ + VALUE prevblock = iseq->compile_data->current_block; + LABEL *retry_label = NEW_LABEL(nd_line(node)); + LABEL *retry_end_l = NEW_LABEL(nd_line(node)); + ID mid = 0; + + ADD_LABEL(ret, retry_label); + if (nd_type(node) == NODE_FOR) { + COMPILE(ret, "iter caller (for)", node->nd_iter); + + iseq->compile_data->current_block = + NEW_CHILD_ISEQVAL(node->nd_body, make_name_for_block(iseq), + ISEQ_TYPE_BLOCK, nd_line(node)); + + mid = idEach; + ADD_SEND_R(ret, nd_line(node), ID2SYM(idEach), INT2FIX(0), + iseq->compile_data->current_block, INT2FIX(0)); + } + else { + iseq->compile_data->current_block = + NEW_CHILD_ISEQVAL(node->nd_body, make_name_for_block(iseq), + ISEQ_TYPE_BLOCK, nd_line(node)); + COMPILE(ret, "iter caller", node->nd_iter); + } + ADD_LABEL(ret, retry_end_l); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + + iseq->compile_data->current_block = prevblock; + + ADD_CATCH_ENTRY(CATCH_TYPE_BREAK, retry_label, retry_end_l, 0, retry_end_l); + + break; + } + case NODE_BREAK:{ + unsigned long level = 0; + + if (iseq->compile_data->redo_label != 0) { + /* while/until */ + LABEL *splabel = NEW_LABEL(0); + ADD_LABEL(ret, splabel); + ADD_ADJUST(ret, nd_line(node), iseq->compile_data->redo_label); + COMPILE_(ret, "break val (while/until)", node->nd_stts, iseq->compile_data->loopval_popped); + add_ensure_iseq(ret, iseq, 0); + ADD_INSNL(ret, nd_line(node), jump, iseq->compile_data->end_label); + ADD_ADJUST_RESTORE(ret, splabel); + + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + } + } + else if (iseq->type == ISEQ_TYPE_BLOCK) { + break_by_insn: + /* escape from block */ + COMPILE(ret, "break val (block)", node->nd_stts); + ADD_INSN1(ret, nd_line(node), throw, INT2FIX(level | 0x02) /* TAG_BREAK */ ); + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + } + else if (iseq->type == ISEQ_TYPE_EVAL) { + break_in_eval: + COMPILE_ERROR((ERROR_ARGS "Can't escape from eval with break")); + } + else { + rb_iseq_t *ip = iseq->parent_iseq; + while (ip) { + if (!ip->compile_data) { + ip = 0; + break; + } + + level++; + if (ip->compile_data->redo_label != 0) { + level = 0x8000; + if (ip->compile_data->loopval_popped == 0) { + /* need value */ + level |= 0x4000; + } + goto break_by_insn; + } + else if (ip->type == ISEQ_TYPE_BLOCK) { + level <<= 16; + goto break_by_insn; + } + else if (ip->type == ISEQ_TYPE_EVAL) { + goto break_in_eval; + } + + ip = ip->parent_iseq; + } + COMPILE_ERROR((ERROR_ARGS "Invalid break")); + } + break; + } + case NODE_NEXT:{ + unsigned long level = 0; + + if (iseq->compile_data->redo_label != 0) { + LABEL *splabel = NEW_LABEL(0); + debugs("next in while loop\n"); + ADD_LABEL(ret, splabel); + COMPILE(ret, "next val/valid syntax?", node->nd_stts); + add_ensure_iseq(ret, iseq, 0); + ADD_ADJUST(ret, nd_line(node), iseq->compile_data->redo_label); + ADD_INSNL(ret, nd_line(node), jump, iseq->compile_data->start_label); + ADD_ADJUST_RESTORE(ret, splabel); + } + else if (iseq->compile_data->end_label) { + LABEL *splabel = NEW_LABEL(0); + debugs("next in block\n"); + ADD_LABEL(ret, splabel); + ADD_ADJUST(ret, nd_line(node), iseq->compile_data->start_label); + COMPILE(ret, "next val", node->nd_stts); + add_ensure_iseq(ret, iseq, 0); + ADD_INSNL(ret, nd_line(node), jump, iseq->compile_data->end_label); + ADD_ADJUST_RESTORE(ret, splabel); + + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + } + } + else if (iseq->type == ISEQ_TYPE_EVAL) { + next_in_eval: + COMPILE_ERROR((ERROR_ARGS "Can't escape from eval with next")); + } + else { + rb_iseq_t *ip; + ip = iseq; + while (ip) { + if (!ip->compile_data) { + ip = 0; + break; + } + + level = 0x8000 | 0x4000; + if (ip->compile_data->redo_label != 0) { + /* while loop */ + break; + } + else if (ip->type == ISEQ_TYPE_BLOCK) { + break; + } + else if (ip->type == ISEQ_TYPE_EVAL) { + goto next_in_eval; + } + + ip = ip->parent_iseq; + } + if (ip != 0) { + COMPILE(ret, "next val", node->nd_stts); + ADD_INSN1(ret, nd_line(node), throw, INT2FIX(level | 0x03) /* TAG_NEXT */ ); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + } + else { + COMPILE_ERROR((ERROR_ARGS "Invalid next")); + } + } + break; + } + case NODE_REDO:{ + if (iseq->compile_data->redo_label) { + LABEL *splabel = NEW_LABEL(0); + debugs("redo in while"); + ADD_LABEL(ret, splabel); + ADD_ADJUST(ret, nd_line(node), iseq->compile_data->redo_label); + add_ensure_iseq(ret, iseq, 0); + ADD_INSNL(ret, nd_line(node), jump, iseq->compile_data->redo_label); + ADD_ADJUST_RESTORE(ret, splabel); + } + else if (iseq->type == ISEQ_TYPE_EVAL) { + redo_in_eval: + COMPILE_ERROR((ERROR_ARGS "Can't escape from eval with redo")); + } + else if (iseq->compile_data->start_label) { + LABEL *splabel = NEW_LABEL(0); + + debugs("redo in block"); + ADD_LABEL(ret, splabel); + add_ensure_iseq(ret, iseq, 0); + ADD_ADJUST(ret, nd_line(node), iseq->compile_data->start_label); + ADD_INSNL(ret, nd_line(node), jump, iseq->compile_data->start_label); + ADD_ADJUST_RESTORE(ret, splabel); + + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + } + } + else { + rb_iseq_t *ip; + unsigned long level; + level = 0x8000 | 0x4000; + ip = iseq; + while (ip) { + if (!ip->compile_data) { + ip = 0; + break; + } + + if (ip->compile_data->redo_label != 0) { + break; + } + else if (ip->type == ISEQ_TYPE_BLOCK) { + break; + } + else if (ip->type == ISEQ_TYPE_EVAL) { + goto redo_in_eval; + } + + ip = ip->parent_iseq; + } + if (ip != 0) { + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN1(ret, nd_line(node), throw, INT2FIX(level | 0x05) /* TAG_REDO */ ); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + } + else { + COMPILE_ERROR((ERROR_ARGS "Invalid redo")); + } + } + break; + } + case NODE_RETRY:{ + if (iseq->type == ISEQ_TYPE_RESCUE) { + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN1(ret, nd_line(node), throw, INT2FIX(0x04) /* TAG_RETRY */ ); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + } + else { + COMPILE_ERROR((ERROR_ARGS "Invalid retry")); + } + break; + } + case NODE_BEGIN:{ + COMPILE_(ret, "NODE_BEGIN", node->nd_body, poped); + break; + } + case NODE_RESCUE:{ + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + LABEL *lcont = NEW_LABEL(nd_line(node)); + VALUE rescue = NEW_CHILD_ISEQVAL( + node->nd_resq, + rb_str_concat(rb_str_new2("rescue in "), iseq->name), + ISEQ_TYPE_RESCUE, nd_line(node)); + + ADD_LABEL(ret, lstart); + COMPILE(ret, "rescue head", node->nd_head); + ADD_LABEL(ret, lend); + if (node->nd_else) { + ADD_INSN(ret, nd_line(node), pop); + COMPILE(ret, "rescue else", node->nd_else); + } + ADD_INSN(ret, nd_line(node), nop); + ADD_LABEL(ret, lcont); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + + /* resgister catch entry */ + ADD_CATCH_ENTRY(CATCH_TYPE_RESCUE, lstart, lend, rescue, lcont); + ADD_CATCH_ENTRY(CATCH_TYPE_RETRY, lend, lcont, 0, lstart); + break; + } + case NODE_RESBODY:{ + NODE *resq = node; + NODE *narg; + LABEL *label_miss, *label_hit; + + while (resq) { + label_miss = NEW_LABEL(nd_line(node)); + label_hit = NEW_LABEL(nd_line(node)); + + narg = resq->nd_args; + if (narg) { + switch (nd_type(narg)) { + case NODE_ARRAY: + while (narg) { + COMPILE(ret, "rescue arg", narg->nd_head); + ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(2), INT2FIX(0)); + ADD_SEND(ret, nd_line(node), ID2SYM(idEqq), INT2FIX(1)); + ADD_INSNL(ret, nd_line(node), branchif, label_hit); + narg = narg->nd_next; + } + break; + case NODE_SPLAT: + case NODE_ARGSCAT: + case NODE_ARGSPUSH: + ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(2), INT2FIX(0)); + COMPILE(ret, "rescue/cond splat", narg); + ADD_INSN1(ret, nd_line(node), checkincludearray, Qtrue); + ADD_INSN(ret, nd_line(node), swap); + ADD_INSN(ret, nd_line(node), pop); + ADD_INSNL(ret, nd_line(node), branchif, label_hit); + break; + default: + rb_bug("NODE_RESBODY: unknown node (%s)", + ruby_node_name(nd_type(narg))); + } + } + else { + ADD_INSN1(ret, nd_line(node), putobject, + rb_eStandardError); + ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(2), INT2FIX(0)); + ADD_SEND(ret, nd_line(node), ID2SYM(idEqq), INT2FIX(1)); + ADD_INSNL(ret, nd_line(node), branchif, label_hit); + } + ADD_INSNL(ret, nd_line(node), jump, label_miss); + ADD_LABEL(ret, label_hit); + COMPILE(ret, "resbody body", resq->nd_body); + if (iseq->compile_data->option->tailcall_optimization) { + ADD_INSN(ret, nd_line(node), nop); + } + ADD_INSN(ret, nd_line(node), leave); + ADD_LABEL(ret, label_miss); + resq = resq->nd_head; + } + break; + } + case NODE_ENSURE:{ + DECL_ANCHOR(ensr); + VALUE ensure = NEW_CHILD_ISEQVAL(node->nd_ensr, + rb_str_concat(rb_str_new2 + ("ensure in "), + iseq->name), + ISEQ_TYPE_ENSURE, nd_line(node)); + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + LABEL *lcont = NEW_LABEL(nd_line(node)); + struct ensure_range er; + struct iseq_compile_data_ensure_node_stack enl; + struct ensure_range *erange; + + INIT_ANCHOR(ensr); + COMPILE_POPED(ensr, "ensure ensr", node->nd_ensr); + + er.begin = lstart; + er.end = lend; + er.next = 0; + push_ensure_entry(iseq, &enl, &er, node->nd_ensr); + + ADD_LABEL(ret, lstart); + COMPILE_(ret, "ensure head", node->nd_head, poped); + ADD_LABEL(ret, lend); + if (ensr->anchor.next == 0) { + ADD_INSN(ret, nd_line(node), nop); + } + else { + ADD_SEQ(ret, ensr); + } + ADD_LABEL(ret, lcont); + + erange = iseq->compile_data->ensure_node_stack->erange; + while (erange) { + ADD_CATCH_ENTRY(CATCH_TYPE_ENSURE, erange->begin, erange->end, + ensure, lcont); + erange = erange->next; + } + + iseq->compile_data->ensure_node_stack = enl.prev; + break; + } + + case NODE_AND: + case NODE_OR:{ + LABEL *end_label = NEW_LABEL(nd_line(node)); + COMPILE(ret, "nd_1st", node->nd_1st); + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + if (type == NODE_AND) { + ADD_INSNL(ret, nd_line(node), branchunless, end_label); + } + else { + ADD_INSNL(ret, nd_line(node), branchif, end_label); + } + if (!poped) { + ADD_INSN(ret, nd_line(node), pop); + } + COMPILE_(ret, "nd_2nd", node->nd_2nd, poped); + ADD_LABEL(ret, end_label); + break; + } + + case NODE_MASGN:{ + compile_massign(iseq, ret, node, poped); + break; + } + + case NODE_LASGN:{ + ID id = node->nd_vid; + int idx = iseq->local_iseq->local_size - get_local_var_idx(iseq, id); + + debugs("lvar: %s idx: %d\n", rb_id2name(id), idx); + COMPILE(ret, "rvalue", node->nd_value); + + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + ADD_INSN1(ret, nd_line(node), setlocal, INT2FIX(idx)); + + break; + } + case NODE_DASGN: + case NODE_DASGN_CURR:{ + int idx, lv, ls; + COMPILE(ret, "dvalue", node->nd_value); + debugp_param("dassn id", rb_str_new2(rb_id2name(node->nd_vid) ? rb_id2name(node->nd_vid) : "*")); + + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + + idx = get_dyna_var_idx(iseq, node->nd_vid, &lv, &ls); + + if (idx < 0) { + rb_bug("NODE_DASGN(_CURR): unknown id (%s)", rb_id2name(node->nd_vid)); + } + + ADD_INSN2(ret, nd_line(node), setdynamic, + INT2FIX(ls - idx), INT2FIX(lv)); + break; + } + case NODE_GASGN:{ + COMPILE(ret, "lvalue", node->nd_value); + + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + ADD_INSN1(ret, nd_line(node), setglobal, + (((long)node->nd_entry) | 1)); + break; + } + case NODE_IASGN: + case NODE_IASGN2:{ + COMPILE(ret, "lvalue", node->nd_value); + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + ADD_INSN1(ret, nd_line(node), setinstancevariable, + ID2SYM(node->nd_vid)); + break; + } + case NODE_CDECL:{ + COMPILE(ret, "lvalue", node->nd_value); + + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + + if (node->nd_vid) { + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CBASE)); + ADD_INSN1(ret, nd_line(node), setconstant, ID2SYM(node->nd_vid)); + } + else { + compile_cpath(ret, iseq, node->nd_else); + ADD_INSN1(ret, nd_line(node), setconstant, ID2SYM(node->nd_else->nd_mid)); + } + break; + } + case NODE_CVASGN:{ + COMPILE(ret, "cvasgn val", node->nd_value); + if (!poped) { + ADD_INSN(ret, nd_line(node), dup); + } + ADD_INSN1(ret, nd_line(node), setclassvariable, + ID2SYM(node->nd_vid)); + break; + } + case NODE_OP_ASGN1: { + DECL_ANCHOR(args); + VALUE argc; + unsigned long flag = 0; + ID id = node->nd_mid; + + /* + * a[x] (op)= y + * + * nil # nil + * eval a # nil a + * eval x # nil a x + * dupn 2 # nil a x a x + * send :[] # nil a x a[x] + * eval y # nil a x a[x] y + * send op # nil a x ret + * setn 3 # ret a x ret + * send []= # ret ? + * pop # ret + */ + + /* + * nd_recv[nd_args->nd_body] (nd_mid)= nd_args->nd_head; + * NODE_OP_ASGN nd_recv + * nd_args->nd_head + * nd_args->nd_body + * nd_mid + */ + + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + } + COMPILE(ret, "NODE_OP_ASGN1 recv", node->nd_recv); + if (nd_type(node->nd_args->nd_head) != NODE_ZARRAY) { + INIT_ANCHOR(args); + argc = setup_args(iseq, args, node->nd_args->nd_head, &flag); + ADD_SEQ(ret, args); + } + else { + argc = INT2FIX(0); + } + ADD_INSN1(ret, nd_line(node), dupn, INT2FIX(FIX2INT(argc)+1)); + ADD_SEND_R(ret, nd_line(node), ID2SYM(idAREF), argc, Qfalse, LONG2FIX(flag)); + + if (id == 0 || id == 1) { + /* 0: or, 1: and + a[x] ||= y + + unless/if a[x] + a[x]= y + else + nil + end + */ + LABEL *label = NEW_LABEL(nd_line(node)); + LABEL *lfin = NEW_LABEL(nd_line(node)); + + if (id == 0) { + /* or */ + ADD_INSN(ret, nd_line(node), dup); + ADD_INSNL(ret, nd_line(node), branchif, label); + ADD_INSN(ret, nd_line(node), pop); + } + else { + /* and */ + ADD_INSN(ret, nd_line(node), dup); + ADD_INSNL(ret, nd_line(node), branchunless, label); + ADD_INSN(ret, nd_line(node), pop); + } + + COMPILE(ret, "NODE_OP_ASGN1 args->body: ", node->nd_args->nd_body); + if (!poped) { + ADD_INSN1(ret, nd_line(node), setn, INT2FIX(3)); + } + if (flag & VM_CALL_ARGS_SPLAT_BIT) { + ADD_INSN1(ret, nd_line(node), newarray, INT2FIX(1)); + ADD_INSN(ret, nd_line(node), concatarray); + ADD_SEND_R(ret, nd_line(node), ID2SYM(idASET), + argc, Qfalse, LONG2FIX(flag)); + } + else { + ADD_SEND_R(ret, nd_line(node), ID2SYM(idASET), + INT2FIX(FIX2INT(argc) + 1), Qfalse, LONG2FIX(flag)); + } + ADD_INSN(ret, nd_line(node), pop); + ADD_INSNL(ret, nd_line(node), jump, lfin); + ADD_LABEL(ret, label); + if (!poped) { + ADD_INSN1(ret, nd_line(node), setn, INT2FIX(FIX2INT(argc) + 2)); + } + ADD_INSN1(ret, nd_line(node), adjuststack, INT2FIX(FIX2INT(argc) + 2)); + ADD_LABEL(ret, lfin); + } + else { + COMPILE(ret, "NODE_OP_ASGN1 args->body: ", node->nd_args->nd_body); + ADD_SEND(ret, nd_line(node), ID2SYM(id), INT2FIX(1)); + if (!poped) { + ADD_INSN1(ret, nd_line(node), setn, INT2FIX(FIX2INT(argc) + 2)); + } + if (flag & VM_CALL_ARGS_SPLAT_BIT) { + ADD_INSN1(ret, nd_line(node), newarray, INT2FIX(1)); + ADD_INSN(ret, nd_line(node), concatarray); + ADD_SEND_R(ret, nd_line(node), ID2SYM(idASET), + argc, Qfalse, LONG2FIX(flag)); + } + else { + ADD_SEND_R(ret, nd_line(node), ID2SYM(idASET), + INT2FIX(FIX2INT(argc) + 1), Qfalse, LONG2FIX(flag)); + } + + ADD_INSN(ret, nd_line(node), pop); + } + + break; + } + case NODE_OP_ASGN2:{ + ID atype = node->nd_next->nd_mid; + LABEL *lfin = NEW_LABEL(nd_line(node)); + LABEL *lcfin = NEW_LABEL(nd_line(node)); + /* + class C; attr_accessor :c; end + r = C.new + r.a &&= v # asgn2 + + eval r # r + dup # r r + eval r.a # r o + + # or + dup # r o o + if lcfin # r o + pop # r + eval v # r v + swap # v r + topn 1 # v r v + send a= # v ? + jump lfin # v ? + + lcfin: # r o + swap # o r + + lfin: # o ? + pop # o + + # and + dup # r o o + unless lcfin + pop # r + eval v # r v + swap # v r + topn 1 # v r v + send a= # v ? + jump lfin # v ? + + # others + eval v # r o v + send ?? # r w + send a= # w + + */ + + COMPILE(ret, "NODE_OP_ASGN2#recv", node->nd_recv); + ADD_INSN(ret, nd_line(node), dup); + ADD_SEND(ret, nd_line(node), ID2SYM(node->nd_next->nd_vid), + INT2FIX(0)); + + if (atype == 0 || atype == 1) { /* 0: OR or 1: AND */ + ADD_INSN(ret, nd_line(node), dup); + if (atype == 0) { + ADD_INSNL(ret, nd_line(node), branchif, lcfin); + } + else { + ADD_INSNL(ret, nd_line(node), branchunless, lcfin); + } + ADD_INSN(ret, nd_line(node), pop); + COMPILE(ret, "NODE_OP_ASGN2 val", node->nd_value); + ADD_INSN(ret, nd_line(node), swap); + ADD_INSN1(ret, nd_line(node), topn, INT2FIX(1)); + ADD_SEND(ret, nd_line(node), ID2SYM(node->nd_next->nd_aid), + INT2FIX(1)); + ADD_INSNL(ret, nd_line(node), jump, lfin); + + ADD_LABEL(ret, lcfin); + ADD_INSN(ret, nd_line(node), swap); + + ADD_LABEL(ret, lfin); + ADD_INSN(ret, nd_line(node), pop); + if (poped) { + /* we can apply more optimize */ + ADD_INSN(ret, nd_line(node), pop); + } + } + else { + COMPILE(ret, "NODE_OP_ASGN2 val", node->nd_value); + ADD_SEND(ret, nd_line(node), ID2SYM(node->nd_next->nd_mid), + INT2FIX(1)); + if (!poped) { + ADD_INSN(ret, nd_line(node), swap); + ADD_INSN1(ret, nd_line(node), topn, INT2FIX(1)); + } + ADD_SEND(ret, nd_line(node), ID2SYM(node->nd_next->nd_aid), + INT2FIX(1)); + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_OP_ASGN_AND: + case NODE_OP_ASGN_OR:{ + LABEL *lfin = NEW_LABEL(nd_line(node)); + LABEL *lassign; + + if (nd_type(node) == NODE_OP_ASGN_OR) { + LABEL *lfinish[2]; + lfinish[0] = lfin; + lfinish[1] = 0; + defined_expr(iseq, ret, node->nd_head, lfinish, Qfalse); + lassign = lfinish[1]; + if (!lassign) { + lassign = NEW_LABEL(nd_line(node)); + } + ADD_INSNL(ret, nd_line(node), branchunless, lassign); + } + else { + lassign = NEW_LABEL(nd_line(node)); + } + + COMPILE(ret, "NODE_OP_ASGN_AND/OR#nd_head", node->nd_head); + ADD_INSN(ret, nd_line(node), dup); + + if (nd_type(node) == NODE_OP_ASGN_AND) { + ADD_INSNL(ret, nd_line(node), branchunless, lfin); + } + else { + ADD_INSNL(ret, nd_line(node), branchif, lfin); + } + + ADD_INSN(ret, nd_line(node), pop); + ADD_LABEL(ret, lassign); + COMPILE(ret, "NODE_OP_ASGN_AND/OR#nd_value", node->nd_value); + ADD_LABEL(ret, lfin); + + if (poped) { + /* we can apply more optimize */ + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_CALL: + case NODE_FCALL: + case NODE_VCALL:{ /* VCALL: variable or call */ + /* + call: obj.method(...) + fcall: func(...) + vcall: func + */ + DECL_ANCHOR(recv); + DECL_ANCHOR(args); + ID mid = node->nd_mid; + VALUE argc; + unsigned long flag = 0; + VALUE parent_block = iseq->compile_data->current_block; + iseq->compile_data->current_block = Qfalse; + + INIT_ANCHOR(recv); + INIT_ANCHOR(args); +#if SUPPORT_JOKE + if (nd_type(node) == NODE_VCALL) { + if (mid == idBitblt) { + ADD_INSN(ret, nd_line(node), bitblt); + break; + } + else if (mid == idAnswer) { + ADD_INSN(ret, nd_line(node), answer); + break; + } + } + /* only joke */ + { + ID goto_id; + ID label_id; + VALUE label; + VALUE label_sym; + + + CONST_ID(goto_id, "__goto__"); + CONST_ID(label_id, "__label__"); + + if (nd_type(node) == NODE_FCALL && + (mid == goto_id || mid == label_id)) { + if (nd_type(node->nd_args->nd_head) == NODE_LIT && + SYMBOL_P(node->nd_args->nd_head->nd_lit)) { + + label_sym = label = node->nd_args->nd_head->nd_lit; + if ((label = + rb_hash_aref(iseq->compile_data, + label_sym)) == Qnil) { + rb_hash_aset(iseq->compile_data, label_sym, + label = NEW_LABEL(nd_line(node))); + } + } + else { + rb_bug("invalid goto/label format"); + } + + + if (mid == goto_id) { + ADD_INSNL(ret, nd_line(node), jump, label); + } + else { + ADD_LABEL(ret, label); + } + break; + } + } +#endif + /* reciever */ + if (type == NODE_CALL) { + COMPILE(recv, "recv", node->nd_recv); + } + else if (type == NODE_FCALL || type == NODE_VCALL) { + ADD_CALL_RECEIVER(recv, nd_line(node)); + } + + /* args */ + if (nd_type(node) != NODE_VCALL) { + argc = setup_args(iseq, args, node->nd_args, &flag); + } + else { + argc = INT2FIX(0); + } + + ADD_SEQ(ret, recv); + ADD_SEQ(ret, args); + + debugp_param("call args argc", argc); + debugp_param("call method", ID2SYM(mid)); + + switch (nd_type(node)) { + case NODE_VCALL: + flag |= VM_CALL_VCALL_BIT; + /* VCALL is funcall, so fall through */ + case NODE_FCALL: + flag |= VM_CALL_FCALL_BIT; + } + + ADD_SEND_R(ret, nd_line(node), ID2SYM(mid), + argc, parent_block, LONG2FIX(flag)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_SUPER: + case NODE_ZSUPER:{ + DECL_ANCHOR(args); + VALUE argc; + unsigned long flag = 0; + VALUE parent_block = iseq->compile_data->current_block; + + INIT_ANCHOR(args); + iseq->compile_data->current_block = Qfalse; + if (nd_type(node) == NODE_SUPER) { + argc = setup_args(iseq, args, node->nd_args, &flag); + } + else { + /* NODE_ZSUPER */ + int i; + rb_iseq_t *liseq = iseq->local_iseq; + + argc = INT2FIX(liseq->argc); + + /* normal arguments */ + for (i = 0; i < liseq->argc; i++) { + int idx = liseq->local_size - i; + ADD_INSN1(args, nd_line(node), getlocal, INT2FIX(idx)); + } + + if (!liseq->arg_simple) { + if (liseq->arg_opts) { + /* optional arguments */ + int j; + for (j = 0; j < liseq->arg_opts - 1; j++) { + int idx = liseq->local_size - (i + j); + ADD_INSN1(args, nd_line(node), getlocal, INT2FIX(idx)); + } + i += j; + argc = INT2FIX(i); + } + + if (liseq->arg_rest != -1) { + /* rest argument */ + int idx = liseq->local_size - liseq->arg_rest; + ADD_INSN1(args, nd_line(node), getlocal, INT2FIX(idx)); + argc = INT2FIX(liseq->arg_rest + 1); + flag |= VM_CALL_ARGS_SPLAT_BIT; + } + + if (liseq->arg_post_len) { + /* post arguments */ + int post_len = liseq->arg_post_len; + int post_start = liseq->arg_post_start; + + if (liseq->arg_rest != -1) { + int j; + for (j=0; jlocal_size - (post_start + j); + ADD_INSN1(args, nd_line(node), getlocal, INT2FIX(idx)); + } + ADD_INSN1(args, nd_line(node), newarray, INT2FIX(j)); + ADD_INSN (args, nd_line(node), concatarray); + /* argc is setteled at above */ + } + else { + int j; + for (j=0; jlocal_size - (post_start + j); + ADD_INSN1(args, nd_line(node), getlocal, INT2FIX(idx)); + } + argc = INT2FIX(post_len + post_start); + } + } + } + } + + /* dummy reciever */ + ADD_INSN1(ret, nd_line(node), putobject, + nd_type(node) == NODE_ZSUPER ? Qfalse : Qtrue); + ADD_SEQ(ret, args); + ADD_INSN3(ret, nd_line(node), invokesuper, + argc, parent_block, LONG2FIX(flag)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_ARRAY:{ + compile_array_(iseq, ret, node, Qtrue, poped); + break; + } + case NODE_ZARRAY:{ + if (!poped) { + ADD_INSN1(ret, nd_line(node), newarray, INT2FIX(0)); + } + break; + } + case NODE_VALUES:{ + NODE *n = node; + while (n) { + COMPILE(ret, "values item", n->nd_head); + n = n->nd_next; + } + ADD_INSN1(ret, nd_line(node), newarray, INT2FIX(node->nd_alen)); + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_HASH:{ + DECL_ANCHOR(list); + VALUE size = 0; + int type = node->nd_head ? nd_type(node->nd_head) : NODE_ZARRAY; + + INIT_ANCHOR(list); + switch (type) { + case NODE_ARRAY:{ + compile_array(iseq, list, node->nd_head, Qfalse); + size = OPERAND_AT(POP_ELEMENT(list), 0); + ADD_SEQ(ret, list); + break; + } + case NODE_ZARRAY: + size = INT2FIX(0); + break; + + default: + rb_bug("can't make hash with this node: %s", ruby_node_name(type)); + } + + ADD_INSN1(ret, nd_line(node), newhash, size); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_RETURN:{ + rb_iseq_t *is = iseq; + + while (is) { + if (is->type == ISEQ_TYPE_TOP || is->type == ISEQ_TYPE_CLASS) { + COMPILE_ERROR((ERROR_ARGS "Invalid return")); + break; + } + else { + LABEL *splabel = 0; + + if (is->type == ISEQ_TYPE_METHOD) { + splabel = NEW_LABEL(0); + ADD_LABEL(ret, splabel); + ADD_ADJUST(ret, nd_line(node), 0); + } + + COMPILE(ret, "return nd_stts (return val)", node->nd_stts); + + if (is->type == ISEQ_TYPE_METHOD) { + add_ensure_iseq(ret, iseq, 1); + ADD_TRACE(ret, nd_line(node), RUBY_EVENT_RETURN); + ADD_INSN(ret, nd_line(node), leave); + ADD_ADJUST_RESTORE(ret, splabel); + + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + } + } + else { + ADD_INSN1(ret, nd_line(node), throw, INT2FIX(0x01) /* TAG_RETURN */ ); + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + } + break; + } + } + break; + } + case NODE_YIELD:{ + DECL_ANCHOR(args); + VALUE argc; + unsigned long flag = 0; + + INIT_ANCHOR(args); + if (iseq->type == ISEQ_TYPE_TOP || iseq->type == ISEQ_TYPE_CLASS) { + COMPILE_ERROR((ERROR_ARGS "Invalid yield")); + } + + if (node->nd_head) { + argc = setup_args(iseq, args, node->nd_head, &flag); + } + else { + argc = INT2FIX(0); + } + + ADD_SEQ(ret, args); + ADD_INSN2(ret, nd_line(node), invokeblock, argc, LONG2FIX(flag)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_LVAR:{ + if (!poped) { + ID id = node->nd_vid; + int idx = iseq->local_iseq->local_size - get_local_var_idx(iseq, id); + + debugs("id: %s idx: %d\n", rb_id2name(id), idx); + ADD_INSN1(ret, nd_line(node), getlocal, INT2FIX(idx)); + } + break; + } + case NODE_DVAR:{ + int lv, idx, ls; + debugi("nd_vid", node->nd_vid); + if (!poped) { + idx = get_dyna_var_idx(iseq, node->nd_vid, &lv, &ls); + if (idx < 0) { + rb_bug("unknown dvar (%s)", rb_id2name(node->nd_vid)); + } + ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(ls - idx), INT2FIX(lv)); + } + break; + } + case NODE_GVAR:{ + ADD_INSN1(ret, nd_line(node), getglobal, + (((long)node->nd_entry) | 1)); + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_IVAR:{ + debugi("nd_vid", node->nd_vid); + if (!poped) { + ADD_INSN1(ret, nd_line(node), getinstancevariable, + ID2SYM(node->nd_vid)); + } + break; + } + case NODE_CONST:{ + debugi("nd_vid", node->nd_vid); + + if (iseq->compile_data->option->inline_const_cache) { + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + + ADD_LABEL(ret, lstart); + ADD_INSN2(ret, nd_line(node), getinlinecache, 0, lend); + ADD_INSN1(ret, nd_line(node), getconstant, ID2SYM(node->nd_vid)); + ADD_INSN1(ret, nd_line(node), setinlinecache, lstart); + ADD_LABEL(ret, lend); + } + else { + ADD_INSN(ret, nd_line(node), putnil); + ADD_INSN1(ret, nd_line(node), getconstant, ID2SYM(node->nd_vid)); + } + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_CVAR:{ + if (!poped) { + ADD_INSN1(ret, nd_line(node), getclassvariable, + ID2SYM(node->nd_vid)); + } + break; + } + case NODE_NTH_REF:{ + if (!poped) { + ADD_INSN2(ret, nd_line(node), getspecial, INT2FIX(1) /* '~' */, + INT2FIX(node->nd_nth << 1)); + } + break; + } + case NODE_BACK_REF:{ + if (!poped) { + ADD_INSN2(ret, nd_line(node), getspecial, INT2FIX(1) /* '~' */, + INT2FIX(0x01 | (node->nd_nth << 1))); + } + break; + } + case NODE_MATCH: + case NODE_MATCH2: + case NODE_MATCH3:{ + DECL_ANCHOR(recv); + DECL_ANCHOR(val); + + INIT_ANCHOR(recv); + INIT_ANCHOR(val); + switch(nd_type(node)) { + case NODE_MATCH: + ADD_INSN1(recv, nd_line(node), putobject, node->nd_lit); + ADD_INSN2(val, nd_line(node), getspecial, INT2FIX(0), + INT2FIX(0)); + break; + case NODE_MATCH2: + COMPILE(recv, "reciever", node->nd_recv); + COMPILE(val, "value", node->nd_value); + break; + case NODE_MATCH3: + COMPILE(recv, "reciever", node->nd_value); + COMPILE(val, "value", node->nd_recv); + break; + } + + if (iseq->compile_data->option->specialized_instruction) { + /* TODO: detect by node */ + if (recv->last == recv->anchor.next && + INSN_OF(recv->last) == BIN(putobject) && + nd_type(node) == NODE_MATCH2) { + ADD_SEQ(ret, val); + ADD_INSN1(ret, nd_line(node), opt_regexpmatch1, + OPERAND_AT(recv->last, 0)); + } + else { + ADD_SEQ(ret, recv); + ADD_SEQ(ret, val); + ADD_INSN(ret, nd_line(node), opt_regexpmatch2); + } + } + else { + ADD_SEQ(ret, recv); + ADD_SEQ(ret, val); + ADD_SEND(ret, nd_line(node), ID2SYM(idEqTilde), INT2FIX(1)); + } + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_LIT:{ + debugp_param("lit", node->nd_lit); + if (!poped) { + ADD_INSN1(ret, nd_line(node), putobject, node->nd_lit); + } + break; + } + case NODE_STR:{ + debugp_param("nd_lit", node->nd_lit); + if (!poped) { + hide_obj(node->nd_lit); + ADD_INSN1(ret, nd_line(node), putstring, node->nd_lit); + } + break; + } + case NODE_DSTR:{ + compile_dstr(iseq, ret, node); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_XSTR:{ + ADD_CALL_RECEIVER(ret, nd_line(node)); + ADD_INSN1(ret, nd_line(node), putobject, node->nd_lit); + ADD_CALL(ret, nd_line(node), ID2SYM(idBackquote), INT2FIX(1)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_DXSTR:{ + ADD_CALL_RECEIVER(ret, nd_line(node)); + compile_dstr(iseq, ret, node); + ADD_CALL(ret, nd_line(node), ID2SYM(idBackquote), INT2FIX(1)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_EVSTR:{ + COMPILE(ret, "nd_body", node->nd_body); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + else { + ADD_INSN(ret, nd_line(node), tostring); + } + break; + } + case NODE_DREGX:{ + compile_dregx(iseq, ret, node); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_DREGX_ONCE:{ + /* TODO: once? */ + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + + ADD_LABEL(ret, lstart); + ADD_INSN2(ret, nd_line(node), onceinlinecache, 0, lend); + ADD_INSN(ret, nd_line(node), pop); + + compile_dregx(iseq, ret, node); + + ADD_INSN1(ret, nd_line(node), setinlinecache, lstart); + ADD_LABEL(ret, lend); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_ARGSCAT:{ + COMPILE(ret, "argscat head", node->nd_head); + COMPILE(ret, "argscat body", node->nd_body); + ADD_INSN(ret, nd_line(node), concatarray); + break; + } + case NODE_ARGSPUSH:{ + COMPILE(ret, "arsgpush head", node->nd_head); + COMPILE(ret, "argspush body", node->nd_body); + ADD_INSN1(ret, nd_line(node), newarray, INT2FIX(1)); + ADD_INSN(ret, nd_line(node), concatarray); + break; + } + case NODE_SPLAT:{ + COMPILE(ret, "splat", node->nd_head); + ADD_INSN1(ret, nd_line(node), splatarray, Qfalse); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_DEFN:{ + VALUE iseqval = NEW_ISEQVAL(node->nd_defn, + rb_str_dup(rb_id2str(node->nd_mid)), + ISEQ_TYPE_METHOD, nd_line(node)); + + debugp_param("defn/iseq", iseqval); + + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CBASE)); + ADD_INSN1(ret, nd_line(node), putobject, ID2SYM(node->nd_mid)); + ADD_INSN1(ret, nd_line(node), putiseq, iseqval); + ADD_SEND (ret, nd_line(node), ID2SYM(id_core_define_method), INT2FIX(3)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + + debugp_param("defn", iseqval); + break; + } + case NODE_DEFS:{ + VALUE iseqval = NEW_ISEQVAL(node->nd_defn, + rb_str_dup(rb_id2str(node->nd_mid)), + ISEQ_TYPE_METHOD, nd_line(node)); + + debugp_param("defs/iseq", iseqval); + + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + COMPILE(ret, "defs: recv", node->nd_recv); + ADD_INSN1(ret, nd_line(node), putobject, ID2SYM(node->nd_mid)); + ADD_INSN1(ret, nd_line(node), putiseq, iseqval); + ADD_SEND (ret, nd_line(node), ID2SYM(id_core_define_singleton_method), INT2FIX(3)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_ALIAS:{ + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CBASE)); + COMPILE(ret, "alias arg1", node->u1.node); + COMPILE(ret, "alias arg2", node->u2.node); + ADD_SEND(ret, nd_line(node), ID2SYM(id_core_set_method_alias), INT2FIX(3)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_VALIAS:{ + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + ADD_INSN1(ret, nd_line(node), putobject, ID2SYM(node->u1.id)); + ADD_INSN1(ret, nd_line(node), putobject, ID2SYM(node->u2.id)); + ADD_SEND(ret, nd_line(node), ID2SYM(id_core_set_variable_alias), INT2FIX(2)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_UNDEF:{ + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CBASE)); + COMPILE(ret, "undef arg", node->u2.node); + ADD_SEND(ret, nd_line(node), ID2SYM(id_core_undef_method), INT2FIX(2)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_CLASS:{ + VALUE iseqval = + NEW_CHILD_ISEQVAL( + node->nd_body, + rb_sprintf("", rb_id2name(node->nd_cpath->nd_mid)), + ISEQ_TYPE_CLASS, nd_line(node)); + compile_cpath(ret, iseq, node->nd_cpath); + COMPILE(ret, "super", node->nd_super); + ADD_INSN3(ret, nd_line(node), defineclass, + ID2SYM(node->nd_cpath->nd_mid), iseqval, INT2FIX(0)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_MODULE:{ + VALUE iseqval = NEW_CHILD_ISEQVAL( + node->nd_body, + rb_sprintf("", rb_id2name(node->nd_cpath->nd_mid)), + ISEQ_TYPE_CLASS, nd_line(node)); + + compile_cpath(ret, iseq, node->nd_cpath); + ADD_INSN (ret, nd_line(node), putnil); /* dummy */ + ADD_INSN3(ret, nd_line(node), defineclass, + ID2SYM(node->nd_cpath->nd_mid), iseqval, INT2FIX(2)); + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_SCLASS:{ + ID singletonclass; + VALUE iseqval = + NEW_ISEQVAL(node->nd_body, rb_str_new2("singletonclass"), + ISEQ_TYPE_CLASS, nd_line(node)); + + COMPILE(ret, "sclass#recv", node->nd_recv); + ADD_INSN (ret, nd_line(node), putnil); + CONST_ID(singletonclass, "singletonclass"); + ADD_INSN3(ret, nd_line(node), defineclass, + ID2SYM(singletonclass), iseqval, INT2FIX(1)); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_COLON2:{ + if (rb_is_const_id(node->nd_mid)) { + /* constant */ + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + DECL_ANCHOR(pref); + DECL_ANCHOR(body); + + INIT_ANCHOR(pref); + INIT_ANCHOR(body); + compile_colon2(iseq, node, pref, body); + if (LIST_SIZE_ZERO(pref)) { + if (iseq->compile_data->option->inline_const_cache) { + ADD_LABEL(ret, lstart); + ADD_INSN2(ret, nd_line(node), getinlinecache, 0, lend); + } + else { + ADD_INSN(ret, nd_line(node), putnil); + } + + ADD_SEQ(ret, body); + + if (iseq->compile_data->option->inline_const_cache) { + ADD_INSN1(ret, nd_line(node), setinlinecache, lstart); + ADD_LABEL(ret, lend); + } + } + else { + ADD_SEQ(ret, pref); + ADD_SEQ(ret, body); + } + } + else { + /* function call */ + ADD_CALL_RECEIVER(ret, nd_line(node)); + COMPILE(ret, "colon2#nd_head", node->nd_head); + ADD_CALL(ret, nd_line(node), ID2SYM(node->nd_mid), + INT2FIX(1)); + } + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_COLON3:{ + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + debugi("colon3#nd_mid", node->nd_mid); + + /* add cache insn */ + if (iseq->compile_data->option->inline_const_cache) { + ADD_LABEL(ret, lstart); + ADD_INSN2(ret, nd_line(node), getinlinecache, 0, lend); + ADD_INSN(ret, nd_line(node), pop); + } + + ADD_INSN1(ret, nd_line(node), putobject, rb_cObject); + ADD_INSN1(ret, nd_line(node), getconstant, ID2SYM(node->nd_mid)); + + if (iseq->compile_data->option->inline_const_cache) { + ADD_INSN1(ret, nd_line(node), setinlinecache, lstart); + ADD_LABEL(ret, lend); + } + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_DOT2: + case NODE_DOT3:{ + int flag = type == NODE_DOT2 ? INT2FIX(0) : INT2FIX(1); + COMPILE(ret, "min", (NODE *) node->nd_beg); + COMPILE(ret, "max", (NODE *) node->nd_end); + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + ADD_INSN(ret, nd_line(node), pop); + } + else { + ADD_INSN1(ret, nd_line(node), newrange, flag); + } + break; + } + case NODE_FLIP2: + case NODE_FLIP3:{ + LABEL *lend = NEW_LABEL(nd_line(node)); + LABEL *lfin = NEW_LABEL(nd_line(node)); + LABEL *ltrue = NEW_LABEL(nd_line(node)); + VALUE key = rb_sprintf("flipflag/%s-%p-%d", + RSTRING_PTR(iseq->name), (void *)iseq, + iseq->compile_data->flip_cnt++); + + iseq_add_mark_object_compile_time(iseq, key); + ADD_INSN2(ret, nd_line(node), getspecial, key, INT2FIX(0)); + ADD_INSNL(ret, nd_line(node), branchif, lend); + + /* *flip == 0 */ + COMPILE(ret, "flip2 beg", node->nd_beg); + ADD_INSN(ret, nd_line(node), dup); + ADD_INSNL(ret, nd_line(node), branchunless, lfin); + if (nd_type(node) == NODE_FLIP3) { + ADD_INSN(ret, nd_line(node), dup); + ADD_INSN1(ret, nd_line(node), setspecial, key); + ADD_INSNL(ret, nd_line(node), jump, lfin); + } + else { + ADD_INSN1(ret, nd_line(node), setspecial, key); + } + + /* *flip == 1 */ + ADD_LABEL(ret, lend); + COMPILE(ret, "flip2 end", node->nd_end); + ADD_INSNL(ret, nd_line(node), branchunless, ltrue); + ADD_INSN1(ret, nd_line(node), putobject, Qfalse); + ADD_INSN1(ret, nd_line(node), setspecial, key); + + ADD_LABEL(ret, ltrue); + ADD_INSN1(ret, nd_line(node), putobject, Qtrue); + + ADD_LABEL(ret, lfin); + break; + } + case NODE_SELF:{ + if (!poped) { + ADD_INSN(ret, nd_line(node), putself); + } + break; + } + case NODE_NIL:{ + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + } + break; + } + case NODE_TRUE:{ + if (!poped) { + ADD_INSN1(ret, nd_line(node), putobject, Qtrue); + } + break; + } + case NODE_FALSE:{ + if (!poped) { + ADD_INSN1(ret, nd_line(node), putobject, Qfalse); + } + break; + } + case NODE_ERRINFO:{ + if (!poped) { + if (iseq->type == ISEQ_TYPE_RESCUE) { + ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(2), INT2FIX(0)); + } + else { + rb_iseq_t *ip = iseq; + int level = 0; + while (ip) { + if (ip->type == ISEQ_TYPE_RESCUE) { + break; + } + ip = ip->parent_iseq; + level++; + } + if (ip) { + ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(2), INT2FIX(level)); + } + else { + ADD_INSN(ret, nd_line(node), putnil); + } + } + } + break; + } + case NODE_DEFINED:{ + if (!poped) { + LABEL *lfinish[2]; + lfinish[0] = NEW_LABEL(nd_line(node)); + lfinish[1] = 0; + defined_expr(iseq, ret, node->nd_head, lfinish, Qtrue); + if (lfinish[1]) { + ADD_INSNL(ret, nd_line(node), jump, lfinish[0]); + ADD_LABEL(ret, lfinish[1]); + ADD_INSN(ret, nd_line(node), putnil); + } + ADD_LABEL(ret, lfinish[0]); + } + break; + } + case NODE_POSTEXE:{ + LABEL *lstart = NEW_LABEL(nd_line(node)); + LABEL *lend = NEW_LABEL(nd_line(node)); + VALUE block = NEW_CHILD_ISEQVAL(node->nd_body, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, nd_line(node)); + + ADD_LABEL(ret, lstart); + ADD_INSN2(ret, nd_line(node), onceinlinecache, 0, lend); + ADD_INSN(ret, nd_line(node), pop); + + ADD_INSN1(ret, nd_line(node), putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + ADD_INSN1(ret, nd_line(node), putiseq, block); + ADD_SEND (ret, nd_line(node), ID2SYM(id_core_set_postexe), INT2FIX(1)); + + ADD_INSN1(ret, nd_line(node), setinlinecache, lstart); + ADD_LABEL(ret, lend); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_DSYM:{ + compile_dstr(iseq, ret, node); + if (!poped) { + ADD_SEND(ret, nd_line(node), ID2SYM(idIntern), INT2FIX(0)); + } + else { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + case NODE_ATTRASGN:{ + DECL_ANCHOR(recv); + DECL_ANCHOR(args); + unsigned long flag = 0; + VALUE argc; + + INIT_ANCHOR(recv); + INIT_ANCHOR(args); + argc = setup_args(iseq, args, node->nd_args, &flag); + + if (node->nd_recv == (NODE *) 1) { + flag |= VM_CALL_FCALL_BIT; + ADD_INSN(recv, nd_line(node), putself); + } + else { + COMPILE(recv, "recv", node->nd_recv); + } + + debugp_param("argc", argc); + debugp_param("nd_mid", ID2SYM(node->nd_mid)); + + if (!poped) { + ADD_INSN(ret, nd_line(node), putnil); + ADD_SEQ(ret, recv); + ADD_SEQ(ret, args); + + if (flag & VM_CALL_ARGS_BLOCKARG_BIT) { + ADD_INSN1(ret, nd_line(node), topn, INT2FIX(1)); + ADD_INSN1(ret, nd_line(node), setn, INT2FIX(FIX2INT(argc) + 3)); + ADD_INSN (ret, nd_line(node), pop); + } + else { + ADD_INSN1(ret, nd_line(node), setn, INT2FIX(FIX2INT(argc) + 1)); + } + } + else { + ADD_SEQ(ret, recv); + ADD_SEQ(ret, args); + } + ADD_SEND_R(ret, nd_line(node), ID2SYM(node->nd_mid), argc, 0, LONG2FIX(flag)); + ADD_INSN(ret, nd_line(node), pop); + + break; + } + case NODE_OPTBLOCK:{ + /* for optimize */ + LABEL *redo_label = NEW_LABEL(0); + LABEL *next_label = NEW_LABEL(0); + + iseq->compile_data->start_label = next_label; + iseq->compile_data->redo_label = redo_label; + + ADD_LABEL(ret, redo_label); + COMPILE_(ret, "optblock body", node->nd_head, 1 /* pop */ ); + ADD_LABEL(ret, next_label); + ADD_INSN(ret, 0, opt_checkenv); + break; + } + case NODE_PRELUDE:{ + COMPILE_POPED(ret, "prelude", node->nd_head); + COMPILE_(ret, "body", node->nd_body, poped); + break; + } + case NODE_LAMBDA:{ + /* compile same as lambda{...} */ + VALUE block = NEW_CHILD_ISEQVAL(node->nd_body, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, nd_line(node)); + VALUE argc = INT2FIX(0); + ADD_CALL_RECEIVER(ret, nd_line(node)); + ADD_CALL_WITH_BLOCK(ret, nd_line(node), ID2SYM(idLambda), argc, block); + + if (poped) { + ADD_INSN(ret, nd_line(node), pop); + } + break; + } + default: + rb_bug("iseq_compile_each: unknown node: %s", ruby_node_name(type)); + return Qnil; + } + + debug_node_end(); + return COMPILE_OK; +} + +/***************************/ +/* instruction information */ +/***************************/ + +static int +insn_data_length(INSN *iobj) +{ + return insn_len(iobj->insn_id); +} + +static int +calc_sp_depth(int depth, INSN *insn) +{ + return insn_stack_increase(depth, insn->insn_id, insn->operands); +} + +static int +insn_data_line_no(INSN *iobj) +{ + return insn_len(iobj->line_no); +} + +static VALUE +insn_data_to_s_detail(INSN *iobj) +{ + VALUE str = rb_str_new(0, 0); + + str = rb_sprintf("%-16s", insn_name(iobj->insn_id)); + if (iobj->operands) { + const char *types = insn_op_types(iobj->insn_id); + int j; + + for (j = 0; types[j]; j++) { + char type = types[j]; + + switch (type) { + case TS_OFFSET: /* label(destination position) */ + { + LABEL *lobj = (LABEL *)OPERAND_AT(iobj, j); + rb_str_catf(str, "", lobj->label_no); + break; + } + break; + case TS_ISEQ: /* iseq */ + { + rb_iseq_t *iseq = (rb_iseq_t *)OPERAND_AT(iobj, j); + VALUE val = Qnil; + if (iseq) { + val = iseq->self; + } + rb_str_concat(str, rb_inspect(val)); + } + break; + case TS_LINDEX: + case TS_DINDEX: + case TS_NUM: /* ulong */ + case TS_VALUE: /* VALUE */ + rb_str_concat(str, rb_inspect(OPERAND_AT(iobj, j))); + break; + case TS_ID: /* ID */ + rb_str_concat(str, rb_inspect(OPERAND_AT(iobj, j))); + break; + case TS_GENTRY: + { + struct global_entry *entry = (struct global_entry *) + (OPERAND_AT(iobj, j) & (~1)); + rb_str_cat2(str, rb_id2name(entry->id)); + } + case TS_IC: /* method cache */ + rb_str_cat2(str, ""); + break; + case TS_CDHASH: /* case/when condition cache */ + rb_str_cat2(str, ""); + break; + default:{ + rb_raise(rb_eSyntaxError, "unknown operand type: %c", type); + } + } + if (types[j + 1]) { + rb_str_cat2(str, ", "); + } + } + } + return str; +} + +static void +dump_disasm_list(struct iseq_link_element *link) +{ + int pos = 0; + INSN *iobj; + LABEL *lobj; + VALUE str; + + printf("-- raw disasm--------\n"); + + while (link) { + switch (link->type) { + case ISEQ_ELEMENT_INSN: + { + iobj = (INSN *)link; + str = insn_data_to_s_detail(iobj); + printf("%04d %-65s(%4d)\n", pos, StringValueCStr(str), + insn_data_line_no(iobj)); + pos += insn_data_length(iobj); + break; + } + case ISEQ_ELEMENT_LABEL: + { + lobj = (LABEL *)link; + printf("\n", lobj->label_no); + break; + } + case ISEQ_ELEMENT_NONE: + { + printf("[none]\n"); + break; + } + case ISEQ_ELEMENT_ADJUST: + { + ADJUST *adjust = (ADJUST *)link; + printf("adjust: [label: %d]\n", adjust->label->label_no); + break; + } + default: + /* ignore */ + rb_raise(rb_eSyntaxError, "dump_disasm_list error: %ld\n", FIX2LONG(link->type)); + } + link = link->next; + } + printf("---------------------\n"); +} + +VALUE +rb_insns_name_array(void) +{ + VALUE ary = rb_ary_new(); + int i; + for (i = 0; i < sizeof(insn_name_info) / sizeof(insn_name_info[0]); i++) { + rb_ary_push(ary, rb_obj_freeze(rb_str_new2(insn_name_info[i]))); + } + return rb_obj_freeze(ary); +} + +static LABEL * +register_label(rb_iseq_t *iseq, struct st_table *labels_table, VALUE obj) +{ + LABEL *label = 0; + st_data_t tmp; + obj = rb_convert_type(obj, T_SYMBOL, "Symbol", "to_sym"); + + if (st_lookup(labels_table, obj, &tmp) == 0) { + label = NEW_LABEL(0); + st_insert(labels_table, obj, (st_data_t)label); + } + else { + label = (LABEL *)tmp; + } + return label; +} + +static VALUE +get_exception_sym2type(VALUE sym) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + static VALUE symRescue, symEnsure, symRetry; + static VALUE symBreak, symRedo, symNext; + + if (symRescue == 0) { + symRescue = ID2SYM(rb_intern("rescue")); + symEnsure = ID2SYM(rb_intern("ensure")); + symRetry = ID2SYM(rb_intern("retry")); + symBreak = ID2SYM(rb_intern("break")); + symRedo = ID2SYM(rb_intern("redo")); + symNext = ID2SYM(rb_intern("next")); + } + + if (sym == symRescue) return CATCH_TYPE_RESCUE; + if (sym == symEnsure) return CATCH_TYPE_ENSURE; + if (sym == symRetry) return CATCH_TYPE_RETRY; + if (sym == symBreak) return CATCH_TYPE_BREAK; + if (sym == symRedo) return CATCH_TYPE_REDO; + if (sym == symNext) return CATCH_TYPE_NEXT; + rb_raise(rb_eSyntaxError, "invalid exception symbol: %s", + RSTRING_PTR(rb_inspect(sym))); + return 0; +} + +static int +iseq_build_exception(rb_iseq_t *iseq, struct st_table *labels_table, + VALUE exception) +{ + int i; + + for (i=0; iself, Qnil); + } + + lstart = register_label(iseq, labels_table, ptr[2]); + lend = register_label(iseq, labels_table, ptr[3]); + lcont = register_label(iseq, labels_table, ptr[4]); + sp = NUM2INT(ptr[5]); + + ADD_CATCH_ENTRY(type, lstart, lend, eiseqval, lcont); + } + return COMPILE_OK; +} + +static struct st_table * +insn_make_insn_table(void) +{ + struct st_table *table; + int i; + table = st_init_numtable(); + + for (i=0; i LABEL *label + */ + static struct st_table *insn_table; + + if (insn_table == 0) { + insn_table = insn_make_insn_table(); + } + + for (i=0; ifilename), line_no, + "unknown instruction: %s", RSTRING_PTR(insn)); + } + + if (argc != insn_len(insn_id)-1) { + rb_compile_error(RSTRING_PTR(iseq->filename), line_no, + "operand size mismatch"); + } + + if (argc > 0) { + argv = compile_data_alloc(iseq, sizeof(VALUE) * argc); + for (j=0; jself, Qnil); + } + else if (CLASS_OF(op) == rb_cISeq) { + argv[j] = op; + } + else { + rb_raise(rb_eSyntaxError, "ISEQ is required"); + } + iseq_add_mark_object(iseq, argv[j]); + } + else { + argv[j] = 0; + } + } + break; + case TS_GENTRY: + op = rb_convert_type(op, T_SYMBOL, "Symbol", "to_sym"); + argv[j] = (VALUE)rb_global_entry(SYM2ID(op)); + break; + case TS_IC: + argv[j] = (VALUE)NEW_INLINE_CACHE_ENTRY(); + iseq_add_mark_object(iseq, argv[j]); + break; + case TS_ID: + argv[j] = rb_convert_type(op, T_SYMBOL, + "Symbol", "to_sym"); + break; + case TS_CDHASH: + { + int i; + op = rb_convert_type(op, T_ARRAY, "Array", "to_ary"); + op = rb_ary_dup(op); + for (i=0; ilocal_table_size = RARRAY_LEN(locals); + iseq->local_table = tbl = (ID *)ALLOC_N(ID *, iseq->local_table_size); + iseq->local_size = iseq->local_table_size + 1; + + for (i=0; iarg_size = iseq->argc = FIX2INT(args); + iseq->arg_simple = 1; + } + else { + int i = 0; + VALUE argc = CHECK_INTEGER(rb_ary_entry(args, i++)); + VALUE arg_opt_labels = CHECK_ARRAY(rb_ary_entry(args, i++)); + VALUE arg_post_len = CHECK_INTEGER(rb_ary_entry(args, i++)); + VALUE arg_post_start = CHECK_INTEGER(rb_ary_entry(args, i++)); + VALUE arg_rest = CHECK_INTEGER(rb_ary_entry(args, i++)); + VALUE arg_block = CHECK_INTEGER(rb_ary_entry(args, i++)); + VALUE arg_simple = CHECK_INTEGER(rb_ary_entry(args, i++)); + + iseq->argc = FIX2INT(argc); + iseq->arg_rest = FIX2INT(arg_rest); + iseq->arg_post_len = FIX2INT(arg_post_len); + iseq->arg_post_start = FIX2INT(arg_post_start); + iseq->arg_block = FIX2INT(arg_block); + iseq->arg_opts = RARRAY_LEN(arg_opt_labels); + iseq->arg_opt_table = (VALUE *)ALLOC_N(VALUE, iseq->arg_opts); + + if (iseq->arg_block != -1) { + iseq->arg_size = iseq->arg_block + 1; + } + else if (iseq->arg_post_len) { + iseq->arg_size = iseq->arg_post_start + iseq->arg_post_len; + } + else if (iseq->arg_rest != -1) { + iseq->arg_size = iseq->arg_rest + 1; + } + else { + iseq->arg_size = iseq->argc + (iseq->arg_opts ? iseq->arg_opts - 1 : 0); + } + + for (i=0; iarg_opt_table[i] = + (VALUE)register_label(iseq, labels_table, + rb_ary_entry(arg_opt_labels, i)); + } + + iseq->arg_simple = NUM2INT(arg_simple); + } + + /* exception */ + iseq_build_exception(iseq, labels_table, exception); + + /* body */ + iseq_build_body(iseq, anchor, body, labels_table); + return iseq->self; +} + +/* for parser */ + +int +rb_dvar_defined(ID id) +{ + rb_thread_t *th = GET_THREAD(); + rb_iseq_t *iseq; + if (th->base_block && (iseq = th->base_block->iseq)) { + while (iseq->type == ISEQ_TYPE_BLOCK || + iseq->type == ISEQ_TYPE_RESCUE || + iseq->type == ISEQ_TYPE_ENSURE || + iseq->type == ISEQ_TYPE_EVAL || + iseq->type == ISEQ_TYPE_MAIN + ) { + int i; + + for (i = 0; i < iseq->local_table_size; i++) { + if (iseq->local_table[i] == id) { + return 1; + } + } + iseq = iseq->parent_iseq; + } + } + return 0; +} + +int +rb_local_defined(ID id) +{ + rb_thread_t *th = GET_THREAD(); + rb_iseq_t *iseq; + + if (th->base_block && th->base_block->iseq) { + int i; + iseq = th->base_block->iseq->local_iseq; + + for (i=0; ilocal_table_size; i++) { + if (iseq->local_table[i] == id) { + return 1; + } + } + } + return 0; +} + +int +rb_parse_in_eval(void) +{ + return GET_THREAD()->parse_in_eval > 0; +} + +int +rb_parse_in_main(void) +{ + return GET_THREAD()->parse_in_eval < 0; +} diff --git a/complex.c b/complex.c new file mode 100644 index 0000000..092289d --- /dev/null +++ b/complex.c @@ -0,0 +1,1584 @@ +/* + complex.c: Coded by Tadayoshi Funaba 2008 + + This implementation is based on Keiju Ishitsuka's Complex library + which is written in ruby. +*/ + +#include "ruby.h" +#include + +#define NDEBUG +#include + +#define ZERO INT2FIX(0) +#define ONE INT2FIX(1) +#define TWO INT2FIX(2) + +VALUE rb_cComplex; + +static ID id_abs, id_abs2, id_arg, id_cmp, id_conj, id_convert, + id_denominator, id_divmod, id_equal_p, id_expt, id_floor, id_hash, + id_idiv, id_inspect, id_negate, id_numerator, id_polar, id_quo, + id_real_p, id_to_f, id_to_i, id_to_r, id_to_s; + +#define f_boolcast(x) ((x) ? Qtrue : Qfalse) + +#define binop(n,op) \ +inline static VALUE \ +f_##n(VALUE x, VALUE y)\ +{\ + return rb_funcall(x, op, 1, y);\ +} + +#define fun1(n) \ +inline static VALUE \ +f_##n(VALUE x)\ +{\ + return rb_funcall(x, id_##n, 0);\ +} + +#define fun2(n) \ +inline static VALUE \ +f_##n(VALUE x, VALUE y)\ +{\ + return rb_funcall(x, id_##n, 1, y);\ +} + +#define math1(n) \ +inline static VALUE \ +m_##n(VALUE x)\ +{\ + return rb_funcall(rb_mMath, id_##n, 1, x);\ +} + +#define math2(n) \ +inline static VALUE \ +m_##n(VALUE x, VALUE y)\ +{\ + return rb_funcall(rb_mMath, id_##n, 2, x, y);\ +} + +#define PRESERVE_SIGNEDZERO + +inline static VALUE +f_add(VALUE x, VALUE y) +{ +#ifndef PRESERVE_SIGNEDZERO + if (FIXNUM_P(y) && FIX2LONG(y) == 0) + return x; + else if (FIXNUM_P(x) && FIX2LONG(x) == 0) + return y; +#endif + return rb_funcall(x, '+', 1, y); +} + +inline static VALUE +f_cmp(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) { + long c = FIX2LONG(x) - FIX2LONG(y); + if (c > 0) + c = 1; + else if (c < 0) + c = -1; + return INT2FIX(c); + } + return rb_funcall(x, id_cmp, 1, y); +} + +inline static VALUE +f_div(VALUE x, VALUE y) +{ + if (FIXNUM_P(y) && FIX2LONG(y) == 1) + return x; + return rb_funcall(x, '/', 1, y); +} + +inline static VALUE +f_gt_p(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) + return f_boolcast(FIX2LONG(x) > FIX2LONG(y)); + return rb_funcall(x, '>', 1, y); +} + +inline static VALUE +f_lt_p(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) + return f_boolcast(FIX2LONG(x) < FIX2LONG(y)); + return rb_funcall(x, '<', 1, y); +} + +binop(mod, '%') + +inline static VALUE +f_mul(VALUE x, VALUE y) +{ +#ifndef PRESERVE_SIGNEDZERO + if (FIXNUM_P(y)) { + long iy = FIX2LONG(y); + if (iy == 0) { + if (FIXNUM_P(x) || TYPE(x) == T_BIGNUM) + return ZERO; + } + else if (iy == 1) + return x; + } + else if (FIXNUM_P(x)) { + long ix = FIX2LONG(x); + if (ix == 0) { + if (FIXNUM_P(y) || TYPE(y) == T_BIGNUM) + return ZERO; + } + else if (ix == 1) + return y; + } +#endif + return rb_funcall(x, '*', 1, y); +} + +inline static VALUE +f_sub(VALUE x, VALUE y) +{ +#ifndef PRESERVE_SIGNEDZERO + if (FIXNUM_P(y) && FIX2LONG(y) == 0) + return x; +#endif + return rb_funcall(x, '-', 1, y); +} + +binop(xor, '^') + +fun1(abs) +fun1(abs2) +fun1(arg) +fun1(conj) +fun1(denominator) +fun1(floor) +fun1(hash) +fun1(inspect) +fun1(negate) +fun1(numerator) +fun1(polar) +fun1(real_p) + +fun1(to_f) +fun1(to_i) +fun1(to_r) +fun1(to_s) + +fun2(divmod) + +inline static VALUE +f_equal_p(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) + return f_boolcast(FIX2LONG(x) == FIX2LONG(y)); + return rb_funcall(x, id_equal_p, 1, y); +} + +fun2(expt) +fun2(idiv) +fun2(quo) + +inline static VALUE +f_negative_p(VALUE x) +{ + if (FIXNUM_P(x)) + return f_boolcast(FIX2LONG(x) < 0); + return rb_funcall(x, '<', 1, ZERO); +} + +#define f_positive_p(x) (!f_negative_p(x)) + +inline static VALUE +f_zero_p(VALUE x) +{ + if (FIXNUM_P(x)) + return f_boolcast(FIX2LONG(x) == 0); + return rb_funcall(x, id_equal_p, 1, ZERO); +} + +#define f_nonzero_p(x) (!f_zero_p(x)) + +inline static VALUE +f_one_p(VALUE x) +{ + if (FIXNUM_P(x)) + return f_boolcast(FIX2LONG(x) == 1); + return rb_funcall(x, id_equal_p, 1, ONE); +} + +inline static VALUE +f_kind_of_p(VALUE x, VALUE c) +{ + return rb_obj_is_kind_of(x, c); +} + +inline static VALUE +k_numeric_p(VALUE x) +{ + return f_kind_of_p(x, rb_cNumeric); +} + +inline static VALUE +k_integer_p(VALUE x) +{ + return f_kind_of_p(x, rb_cInteger); +} + +inline static VALUE +k_float_p(VALUE x) +{ + return f_kind_of_p(x, rb_cFloat); +} + +inline static VALUE +k_rational_p(VALUE x) +{ + return f_kind_of_p(x, rb_cRational); +} + +inline static VALUE +k_complex_p(VALUE x) +{ + return f_kind_of_p(x, rb_cComplex); +} + +#define k_exact_p(x) (!k_float_p(x)) +#define k_inexact_p(x) k_float_p(x) + +#define get_dat1(x) \ + struct RComplex *dat;\ + dat = ((struct RComplex *)(x)) + +#define get_dat2(x,y) \ + struct RComplex *adat, *bdat;\ + adat = ((struct RComplex *)(x));\ + bdat = ((struct RComplex *)(y)) + +inline static VALUE +nucomp_s_new_internal(VALUE klass, VALUE real, VALUE imag) +{ + NEWOBJ(obj, struct RComplex); + OBJSETUP(obj, klass, T_COMPLEX); + + obj->real = real; + obj->imag = imag; + + return (VALUE)obj; +} + +static VALUE +nucomp_s_alloc(VALUE klass) +{ + return nucomp_s_new_internal(klass, ZERO, ZERO); +} + +#if 0 +static VALUE +nucomp_s_new_bang(int argc, VALUE *argv, VALUE klass) +{ + VALUE real, imag; + + switch (rb_scan_args(argc, argv, "11", &real, &imag)) { + case 1: + if (!k_numeric_p(real)) + real = f_to_i(real); + imag = ZERO; + break; + default: + if (!k_numeric_p(real)) + real = f_to_i(real); + if (!k_numeric_p(imag)) + imag = f_to_i(imag); + break; + } + + return nucomp_s_new_internal(klass, real, imag); +} +#endif + +inline static VALUE +f_complex_new_bang1(VALUE klass, VALUE x) +{ + assert(!k_complex_p(x)); + return nucomp_s_new_internal(klass, x, ZERO); +} + +inline static VALUE +f_complex_new_bang2(VALUE klass, VALUE x, VALUE y) +{ + assert(!k_complex_p(x)); + assert(!k_complex_p(y)); + return nucomp_s_new_internal(klass, x, y); +} + +#ifdef CANONICALIZATION_FOR_MATHN +#define CANON +#endif + +#ifdef CANON +static int canonicalization = 0; + +void +nucomp_canonicalization(int f) +{ + canonicalization = f; +} +#endif + +inline static void +nucomp_real_check(VALUE num) +{ + switch (TYPE(num)) { + case T_FIXNUM: + case T_BIGNUM: + case T_FLOAT: + case T_RATIONAL: + break; + default: + if (!k_numeric_p(num) || !f_real_p(num)) + rb_raise(rb_eArgError, "not a real"); + } +} + +inline static VALUE +nucomp_s_canonicalize_internal(VALUE klass, VALUE real, VALUE imag) +{ +#ifdef CANON +#define CL_CANON +#ifdef CL_CANON + if (f_zero_p(imag) && k_exact_p(imag) && canonicalization) + return real; +#else + if (f_zero_p(imag) && canonicalization) + return real; +#endif +#endif + if (f_real_p(real) && f_real_p(imag)) + return nucomp_s_new_internal(klass, real, imag); + else if (f_real_p(real)) { + get_dat1(imag); + + return nucomp_s_new_internal(klass, + f_sub(real, dat->imag), + f_add(ZERO, dat->real)); + } + else if (f_real_p(imag)) { + get_dat1(real); + + return nucomp_s_new_internal(klass, + dat->real, + f_add(dat->imag, imag)); + } + else { + get_dat2(real, imag); + + return nucomp_s_new_internal(klass, + f_sub(adat->real, bdat->imag), + f_add(adat->imag, bdat->real)); + } +} + +static VALUE +nucomp_s_new(int argc, VALUE *argv, VALUE klass) +{ + VALUE real, imag; + + switch (rb_scan_args(argc, argv, "11", &real, &imag)) { + case 1: + nucomp_real_check(real); + imag = ZERO; + break; + default: + nucomp_real_check(real); + nucomp_real_check(imag); + break; + } + + return nucomp_s_canonicalize_internal(klass, real, imag); +} + +inline static VALUE +f_complex_new1(VALUE klass, VALUE x) +{ + assert(!k_complex_p(x)); + return nucomp_s_canonicalize_internal(klass, x, ZERO); +} + +inline static VALUE +f_complex_new2(VALUE klass, VALUE x, VALUE y) +{ + assert(!k_complex_p(x)); + return nucomp_s_canonicalize_internal(klass, x, y); +} + +static VALUE +nucomp_f_complex(int argc, VALUE *argv, VALUE klass) +{ + return rb_funcall2(rb_cComplex, id_convert, argc, argv); +} + +#define imp1(n) \ +extern VALUE rb_math_##n(VALUE x);\ +inline static VALUE \ +m_##n##_bang(VALUE x)\ +{\ + return rb_math_##n(x);\ +} + +#define imp2(n) \ +extern VALUE rb_math_##n(VALUE x, VALUE y);\ +inline static VALUE \ +m_##n##_bang(VALUE x, VALUE y)\ +{\ + return rb_math_##n(x, y);\ +} + +imp2(atan2) +imp1(cos) +imp1(cosh) +imp1(exp) +imp2(hypot) + +#define m_hypot(x,y) m_hypot_bang(x,y) + +extern VALUE rb_math_log(int argc, VALUE *argv); + +static VALUE +m_log_bang(VALUE x) +{ + return rb_math_log(1, &x); +} + +imp1(sin) +imp1(sinh) +imp1(sqrt) + +static VALUE +m_cos(VALUE x) +{ + if (f_real_p(x)) + return m_cos_bang(x); + { + get_dat1(x); + return f_complex_new2(rb_cComplex, + f_mul(m_cos_bang(dat->real), + m_cosh_bang(dat->imag)), + f_mul(f_negate(m_sin_bang(dat->real)), + m_sinh_bang(dat->imag))); + } +} + +static VALUE +m_sin(VALUE x) +{ + if (f_real_p(x)) + return m_sin_bang(x); + { + get_dat1(x); + return f_complex_new2(rb_cComplex, + f_mul(m_sin_bang(dat->real), + m_cosh_bang(dat->imag)), + f_mul(m_cos_bang(dat->real), + m_sinh_bang(dat->imag))); + } +} + +#if 0 +static VALUE +m_sqrt(VALUE x) +{ + if (f_real_p(x)) { + if (f_positive_p(x)) + return m_sqrt_bang(x); + return f_complex_new2(rb_cComplex, ZERO, m_sqrt_bang(f_negate(x))); + } + else { + get_dat1(x); + + if (f_negative_p(dat->imag)) + return f_conj(m_sqrt(f_conj(x))); + else { + VALUE a = f_abs(x); + return f_complex_new2(rb_cComplex, + m_sqrt_bang(f_div(f_add(a, dat->real), TWO)), + m_sqrt_bang(f_div(f_sub(a, dat->real), TWO))); + } + } +} +#endif + +inline static VALUE +f_complex_polar(VALUE klass, VALUE x, VALUE y) +{ + assert(!k_complex_p(x)); + assert(!k_complex_p(y)); + return nucomp_s_canonicalize_internal(klass, + f_mul(x, m_cos(y)), + f_mul(x, m_sin(y))); +} + +static VALUE +nucomp_s_polar(VALUE klass, VALUE abs, VALUE arg) +{ + return f_complex_polar(klass, abs, arg); +} + +static VALUE +nucomp_real(VALUE self) +{ + get_dat1(self); + return dat->real; +} + +static VALUE +nucomp_imag(VALUE self) +{ + get_dat1(self); + return dat->imag; +} + +static VALUE +nucomp_negate(VALUE self) +{ + get_dat1(self); + return f_complex_new2(CLASS_OF(self), + f_negate(dat->real), f_negate(dat->imag)); +} + +static VALUE +nucomp_add(VALUE self, VALUE other) +{ + if (k_complex_p(other)) { + VALUE real, imag; + + get_dat2(self, other); + + real = f_add(adat->real, bdat->real); + imag = f_add(adat->imag, bdat->imag); + + return f_complex_new2(CLASS_OF(self), real, imag); + } + if (k_numeric_p(other) && f_real_p(other)) { + get_dat1(self); + + return f_complex_new2(CLASS_OF(self), + f_add(dat->real, other), dat->imag); + } + return rb_num_coerce_bin(self, other, '+'); +} + +static VALUE +nucomp_sub(VALUE self, VALUE other) +{ + if (k_complex_p(other)) { + VALUE real, imag; + + get_dat2(self, other); + + real = f_sub(adat->real, bdat->real); + imag = f_sub(adat->imag, bdat->imag); + + return f_complex_new2(CLASS_OF(self), real, imag); + } + if (k_numeric_p(other) && f_real_p(other)) { + get_dat1(self); + + return f_complex_new2(CLASS_OF(self), + f_sub(dat->real, other), dat->imag); + } + return rb_num_coerce_bin(self, other, '-'); +} + +static VALUE +nucomp_mul(VALUE self, VALUE other) +{ + if (k_complex_p(other)) { + VALUE real, imag; + + get_dat2(self, other); + + real = f_sub(f_mul(adat->real, bdat->real), + f_mul(adat->imag, bdat->imag)); + imag = f_add(f_mul(adat->real, bdat->imag), + f_mul(adat->imag, bdat->real)); + + return f_complex_new2(CLASS_OF(self), real, imag); + } + if (k_numeric_p(other) && f_real_p(other)) { + get_dat1(self); + + return f_complex_new2(CLASS_OF(self), + f_mul(dat->real, other), + f_mul(dat->imag, other)); + } + return rb_num_coerce_bin(self, other, '*'); +} + +#define f_div f_quo + +static VALUE +nucomp_div(VALUE self, VALUE other) +{ + if (k_complex_p(other)) { + get_dat2(self, other); + + if (TYPE(adat->real) == T_FLOAT || + TYPE(adat->imag) == T_FLOAT || + TYPE(bdat->real) == T_FLOAT || + TYPE(bdat->imag) == T_FLOAT) { + VALUE magn = m_hypot(bdat->real, bdat->imag); + VALUE tmp = f_complex_new_bang2(CLASS_OF(self), + f_div(bdat->real, magn), + f_div(bdat->imag, magn)); + return f_div(f_mul(self, f_conj(tmp)), magn); + } + return f_div(f_mul(self, f_conj(other)), f_abs2(other)); + } + if (k_numeric_p(other) && f_real_p(other)) { + get_dat1(self); + + return f_complex_new2(CLASS_OF(self), + f_div(dat->real, other), + f_div(dat->imag, other)); + } + return rb_num_coerce_bin(self, other, '/'); +} + +#undef f_div +#define nucomp_quo nucomp_div + +static VALUE +nucomp_fdiv(VALUE self, VALUE other) +{ + get_dat1(self); + + return f_div(f_complex_new2(CLASS_OF(self), + f_to_f(dat->real), + f_to_f(dat->imag)), other); +} + +static VALUE +nucomp_expt(VALUE self, VALUE other) +{ + if (k_exact_p(other) && f_zero_p(other)) + return f_complex_new_bang1(CLASS_OF(self), ONE); + + if (k_rational_p(other) && f_one_p(f_denominator(other))) + other = f_numerator(other); /* good? */ + + if (k_complex_p(other)) { + VALUE a, r, theta, ore, oim, nr, ntheta; + + get_dat1(other); + + a = f_polar(self); + r = RARRAY_PTR(a)[0]; + theta = RARRAY_PTR(a)[1]; + + ore = dat->real; + oim = dat->imag; + nr = m_exp_bang(f_sub(f_mul(ore, m_log_bang(r)), + f_mul(oim, theta))); + ntheta = f_add(f_mul(theta, ore), f_mul(oim, m_log_bang(r))); + return f_complex_polar(CLASS_OF(self), nr, ntheta); + } + if (k_integer_p(other)) { + if (f_gt_p(other, ZERO)) { + VALUE x, z, n; + + x = self; + z = x; + n = f_sub(other, ONE); + + while (f_nonzero_p(n)) { + VALUE a; + + while (a = f_divmod(n, TWO), + f_zero_p(RARRAY_PTR(a)[1])) { + get_dat1(x); + + x = f_complex_new2(CLASS_OF(self), + f_sub(f_mul(dat->real, dat->real), + f_mul(dat->imag, dat->imag)), + f_mul(f_mul(TWO, dat->real), dat->imag)); + n = RARRAY_PTR(a)[0]; + } + z = f_mul(z, x); + n = f_sub(n, ONE); + } + return z; + } + return f_expt(f_div(f_to_r(ONE), self), f_negate(other)); + } + if (k_numeric_p(other) && f_real_p(other)) { + VALUE a, r, theta; + + a = f_polar(self); + r = RARRAY_PTR(a)[0]; + theta = RARRAY_PTR(a)[1]; + return f_complex_polar(CLASS_OF(self), f_expt(r, other), + f_mul(theta, other)); + } + return rb_num_coerce_bin(self, other, id_expt); +} + +static VALUE +nucomp_equal_p(VALUE self, VALUE other) +{ + if (k_complex_p(other)) { + get_dat2(self, other); + + return f_boolcast(f_equal_p(adat->real, bdat->real) && + f_equal_p(adat->imag, bdat->imag)); + } + if (k_numeric_p(other) && f_real_p(other)) { + get_dat1(self); + + return f_boolcast(f_equal_p(dat->real, other) && f_zero_p(dat->imag)); + } + return f_equal_p(other, self); +} + +static VALUE +nucomp_coerce(VALUE self, VALUE other) +{ + if (k_numeric_p(other) && f_real_p(other)) + return rb_assoc_new(f_complex_new_bang1(CLASS_OF(self), other), self); + if (TYPE(other) == T_COMPLEX) + return rb_assoc_new(other, self); + + rb_raise(rb_eTypeError, "%s can't be coerced into %s", + rb_obj_classname(other), rb_obj_classname(self)); + return Qnil; +} + +static VALUE +nucomp_abs(VALUE self) +{ + get_dat1(self); + return m_hypot(dat->real, dat->imag); +} + +static VALUE +nucomp_abs2(VALUE self) +{ + get_dat1(self); + return f_add(f_mul(dat->real, dat->real), + f_mul(dat->imag, dat->imag)); +} + +static VALUE +nucomp_arg(VALUE self) +{ + get_dat1(self); + return m_atan2_bang(dat->imag, dat->real); +} + +static VALUE +nucomp_rect(VALUE self) +{ + get_dat1(self); + return rb_assoc_new(dat->real, dat->imag); +} + +static VALUE +nucomp_polar(VALUE self) +{ + return rb_assoc_new(f_abs(self), f_arg(self)); +} + +static VALUE +nucomp_conj(VALUE self) +{ + get_dat1(self); + return f_complex_new2(CLASS_OF(self), dat->real, f_negate(dat->imag)); +} + +#if 0 +static VALUE +nucomp_true(VALUE self) +{ + return Qtrue; +} +#endif + +static VALUE +nucomp_false(VALUE self) +{ + return Qfalse; +} + +#if 0 +static VALUE +nucomp_exact_p(VALUE self) +{ + get_dat1(self); + return f_boolcast(f_exact_p(dat->real) && f_exact_p(dat->imag)); +} + +static VALUE +nucomp_inexact_p(VALUE self) +{ + return f_boolcast(!nucomp_exact_p(self)); +} +#endif + +extern VALUE rb_lcm(VALUE x, VALUE y); + +static VALUE +nucomp_denominator(VALUE self) +{ + get_dat1(self); + return rb_lcm(f_denominator(dat->real), f_denominator(dat->imag)); +} + +static VALUE +nucomp_numerator(VALUE self) +{ + VALUE cd; + + get_dat1(self); + + cd = f_denominator(self); + return f_complex_new2(CLASS_OF(self), + f_mul(f_numerator(dat->real), + f_div(cd, f_denominator(dat->real))), + f_mul(f_numerator(dat->imag), + f_div(cd, f_denominator(dat->imag)))); +} + +static VALUE +nucomp_hash(VALUE self) +{ + get_dat1(self); + return f_xor(f_hash(dat->real), f_hash(dat->imag)); +} + +static VALUE +nucomp_eql_p(VALUE self, VALUE other) +{ + if (k_complex_p(other)) { + get_dat2(self, other); + + return f_boolcast((CLASS_OF(adat->real) == CLASS_OF(bdat->real)) && + (CLASS_OF(adat->imag) == CLASS_OF(bdat->imag)) && + f_equal_p(self, other)); + + } + return Qfalse; +} + +#ifndef HAVE_SIGNBIT +#ifdef signbit +#define HAVE_SIGNBIT 1 +#endif +#endif + +inline static VALUE +f_signbit(VALUE x) +{ + switch (TYPE(x)) { + case T_FLOAT: +#ifdef HAVE_SIGNBIT + { + double f = RFLOAT_VALUE(x); + return f_boolcast(!isnan(f) && signbit(f)); + } +#else + { + char s[2]; + double f = RFLOAT_VALUE(x); + + if (isnan(f)) return Qfalse; + (void)snprintf(s, sizeof s, "%.0f", f); + return f_boolcast(s[0] == '-'); + } +#endif + } + return f_negative_p(x); +} + +inline static VALUE +f_tpositive_p(VALUE x) +{ + return f_boolcast(!f_signbit(x)); +} + +static VALUE +nucomp_format(VALUE self, VALUE (*func)(VALUE)) +{ + VALUE s, impos; + + get_dat1(self); + + impos = f_tpositive_p(dat->imag); + + s = (*func)(dat->real); + rb_str_cat2(s, !impos ? "-" : "+"); + + rb_str_concat(s, (*func)(f_abs(dat->imag))); + if (!rb_isdigit(RSTRING_PTR(s)[RSTRING_LEN(s) - 1])) + rb_str_cat2(s, "*"); + rb_str_cat2(s, "i"); + + return s; +} + +static VALUE +nucomp_to_s(VALUE self) +{ + return nucomp_format(self, f_to_s); +} + +static VALUE +nucomp_inspect(VALUE self) +{ + VALUE s; + + s = rb_usascii_str_new2("("); + rb_str_concat(s, nucomp_format(self, f_inspect)); + rb_str_cat2(s, ")"); + + return s; +} + +static VALUE +nucomp_marshal_dump(VALUE self) +{ + VALUE a; + get_dat1(self); + + a = rb_assoc_new(dat->real, dat->imag); + rb_copy_generic_ivar(a, self); + return a; +} + +static VALUE +nucomp_marshal_load(VALUE self, VALUE a) +{ + get_dat1(self); + dat->real = RARRAY_PTR(a)[0]; + dat->imag = RARRAY_PTR(a)[1]; + rb_copy_generic_ivar(self, a); + return self; +} + +/* --- */ + +VALUE +rb_complex_raw(VALUE x, VALUE y) +{ + return nucomp_s_new_internal(rb_cComplex, x, y); +} + +VALUE +rb_complex_new(VALUE x, VALUE y) +{ + return nucomp_s_canonicalize_internal(rb_cComplex, x, y); +} + +VALUE +rb_complex_polar(VALUE x, VALUE y) +{ + return nucomp_s_polar(rb_cComplex, x, y); +} + +static VALUE nucomp_s_convert(int argc, VALUE *argv, VALUE klass); + +VALUE +rb_Complex(VALUE x, VALUE y) +{ + VALUE a[2]; + a[0] = x; + a[1] = y; + return nucomp_s_convert(2, a, rb_cComplex); +} + +static VALUE +nucomp_to_i(VALUE self) +{ + get_dat1(self); + + if (k_inexact_p(dat->imag) || f_nonzero_p(dat->imag)) { + VALUE s = f_to_s(self); + rb_raise(rb_eRangeError, "can't convert %s into Integer", + StringValuePtr(s)); + } + return f_to_i(dat->real); +} + +static VALUE +nucomp_to_f(VALUE self) +{ + get_dat1(self); + + if (k_inexact_p(dat->imag) || f_nonzero_p(dat->imag)) { + VALUE s = f_to_s(self); + rb_raise(rb_eRangeError, "can't convert %s into Float", + StringValuePtr(s)); + } + return f_to_f(dat->real); +} + +static VALUE +nucomp_to_r(VALUE self) +{ + get_dat1(self); + + if (k_inexact_p(dat->imag) || f_nonzero_p(dat->imag)) { + VALUE s = f_to_s(self); + rb_raise(rb_eRangeError, "can't convert %s into Rational", + StringValuePtr(s)); + } + return f_to_r(dat->real); +} + +static VALUE +nilclass_to_c(VALUE self) +{ + return rb_complex_new1(INT2FIX(0)); +} + +static VALUE +numeric_to_c(VALUE self) +{ + return rb_complex_new1(self); +} + +static VALUE comp_pat0, comp_pat1, comp_pat2, a_slash, a_dot_and_an_e, + null_string, underscores_pat, an_underscore; + +#define WS "\\s*" +#define DIGITS "(?:\\d(?:_\\d|\\d)*)" +#define NUMERATOR "(?:" DIGITS "?\\.)?" DIGITS "(?:[eE][-+]?" DIGITS ")?" +#define DENOMINATOR DIGITS +#define NUMBER "[-+]?" NUMERATOR "(?:\\/" DENOMINATOR ")?" +#define NUMBERNOS NUMERATOR "(?:\\/" DENOMINATOR ")?" +#define PATTERN0 "\\A" WS "(" NUMBER ")@(" NUMBER ")" WS +#define PATTERN1 "\\A" WS "([-+])?(" NUMBER ")?[iIjJ]" WS +#define PATTERN2 "\\A" WS "(" NUMBER ")(([-+])(" NUMBERNOS ")?[iIjJ])?" WS + +static void +make_patterns(void) +{ + static const char comp_pat0_source[] = PATTERN0; + static const char comp_pat1_source[] = PATTERN1; + static const char comp_pat2_source[] = PATTERN2; + static const char underscores_pat_source[] = "_+"; + + if (comp_pat0) return; + + comp_pat0 = rb_reg_new(comp_pat0_source, sizeof comp_pat0_source - 1, 0); + rb_gc_register_mark_object(comp_pat0); + + comp_pat1 = rb_reg_new(comp_pat1_source, sizeof comp_pat1_source - 1, 0); + rb_gc_register_mark_object(comp_pat1); + + comp_pat2 = rb_reg_new(comp_pat2_source, sizeof comp_pat2_source - 1, 0); + rb_gc_register_mark_object(comp_pat2); + + a_slash = rb_usascii_str_new2("/"); + rb_gc_register_mark_object(a_slash); + + a_dot_and_an_e = rb_usascii_str_new2(".eE"); + rb_gc_register_mark_object(a_dot_and_an_e); + + null_string = rb_usascii_str_new2(""); + rb_gc_register_mark_object(null_string); + + underscores_pat = rb_reg_new(underscores_pat_source, + sizeof underscores_pat_source - 1, 0); + rb_gc_register_mark_object(underscores_pat); + + an_underscore = rb_usascii_str_new2("_"); + rb_gc_register_mark_object(an_underscore); +} + +#define id_match rb_intern("match") +#define f_match(x,y) rb_funcall(x, id_match, 1, y) + +#define id_aref rb_intern("[]") +#define f_aref(x,y) rb_funcall(x, id_aref, 1, y) + +#define id_post_match rb_intern("post_match") +#define f_post_match(x) rb_funcall(x, id_post_match, 0) + +#define id_split rb_intern("split") +#define f_split(x,y) rb_funcall(x, id_split, 1, y) + +#define id_include_p rb_intern("include?") +#define f_include_p(x,y) rb_funcall(x, id_include_p, 1, y) + +#define id_count rb_intern("count") +#define f_count(x,y) rb_funcall(x, id_count, 1, y) + +#define id_gsub_bang rb_intern("gsub!") +#define f_gsub_bang(x,y,z) rb_funcall(x, id_gsub_bang, 2, y, z) + +static VALUE +string_to_c_internal(VALUE self) +{ + VALUE s; + + s = self; + + if (RSTRING_LEN(s) == 0) + return rb_assoc_new(Qnil, self); + + { + VALUE m, sr, si, re, r, i; + int po; + + m = f_match(comp_pat0, s); + if (!NIL_P(m)) { + sr = f_aref(m, INT2FIX(1)); + si = f_aref(m, INT2FIX(2)); + re = f_post_match(m); + po = 1; + } + if (NIL_P(m)) { + m = f_match(comp_pat1, s); + if (!NIL_P(m)) { + sr = Qnil; + si = f_aref(m, INT2FIX(1)); + if (NIL_P(si)) + si = rb_usascii_str_new2(""); + { + VALUE t; + + t = f_aref(m, INT2FIX(2)); + if (NIL_P(t)) + t = rb_usascii_str_new2("1"); + rb_str_concat(si, t); + } + re = f_post_match(m); + po = 0; + } + } + if (NIL_P(m)) { + m = f_match(comp_pat2, s); + if (NIL_P(m)) + return rb_assoc_new(Qnil, self); + sr = f_aref(m, INT2FIX(1)); + if (NIL_P(f_aref(m, INT2FIX(2)))) + si = Qnil; + else { + VALUE t; + + si = f_aref(m, INT2FIX(3)); + t = f_aref(m, INT2FIX(4)); + if (NIL_P(t)) + t = rb_usascii_str_new2("1"); + rb_str_concat(si, t); + } + re = f_post_match(m); + po = 0; + } + r = INT2FIX(0); + i = INT2FIX(0); + if (!NIL_P(sr)) { + if (f_include_p(sr, a_slash)) + r = f_to_r(sr); + else if (f_gt_p(f_count(sr, a_dot_and_an_e), INT2FIX(0))) + r = f_to_f(sr); + else + r = f_to_i(sr); + } + if (!NIL_P(si)) { + if (f_include_p(si, a_slash)) + i = f_to_r(si); + else if (f_gt_p(f_count(si, a_dot_and_an_e), INT2FIX(0))) + i = f_to_f(si); + else + i = f_to_i(si); + } + if (po) + return rb_assoc_new(rb_complex_polar(r, i), re); + else + return rb_assoc_new(rb_complex_new2(r, i), re); + } +} + +static VALUE +string_to_c_strict(VALUE self) +{ + VALUE a = string_to_c_internal(self); + if (NIL_P(RARRAY_PTR(a)[0]) || RSTRING_LEN(RARRAY_PTR(a)[1]) > 0) { + VALUE s = f_inspect(self); + rb_raise(rb_eArgError, "invalid value for Complex: %s", + StringValuePtr(s)); + } + return RARRAY_PTR(a)[0]; +} + +#define id_gsub rb_intern("gsub") +#define f_gsub(x,y,z) rb_funcall(x, id_gsub, 2, y, z) + +static VALUE +string_to_c(VALUE self) +{ + VALUE s, a, backref; + + backref = rb_backref_get(); + rb_match_busy(backref); + + s = f_gsub(self, underscores_pat, an_underscore); + a = string_to_c_internal(s); + + rb_backref_set(backref); + + if (!NIL_P(RARRAY_PTR(a)[0])) + return RARRAY_PTR(a)[0]; + return rb_complex_new1(INT2FIX(0)); +} + +static VALUE +nucomp_s_convert(int argc, VALUE *argv, VALUE klass) +{ + VALUE a1, a2, backref; + + rb_scan_args(argc, argv, "11", &a1, &a2); + + backref = rb_backref_get(); + rb_match_busy(backref); + + switch (TYPE(a1)) { + case T_FIXNUM: + case T_BIGNUM: + case T_FLOAT: + break; + case T_STRING: + a1 = string_to_c_strict(a1); + break; + } + + switch (TYPE(a2)) { + case T_FIXNUM: + case T_BIGNUM: + case T_FLOAT: + break; + case T_STRING: + a2 = string_to_c_strict(a2); + break; + } + + rb_backref_set(backref); + + switch (TYPE(a1)) { + case T_COMPLEX: + { + get_dat1(a1); + + if (k_exact_p(dat->imag) && f_zero_p(dat->imag)) + a1 = dat->real; + } + } + + switch (TYPE(a2)) { + case T_COMPLEX: + { + get_dat1(a2); + + if (k_exact_p(dat->imag) && f_zero_p(dat->imag)) + a2 = dat->real; + } + } + + switch (TYPE(a1)) { + case T_COMPLEX: + if (argc == 1 || (k_exact_p(a2) && f_zero_p(a2))) + return a1; + } + + if (argc == 1) { + if (k_numeric_p(a1) && !f_real_p(a1)) + return a1; + } + else { + if ((k_numeric_p(a1) && k_numeric_p(a2)) && + (!f_real_p(a1) || !f_real_p(a2))) + return f_add(a1, + f_mul(a2, + f_complex_new_bang2(rb_cComplex, ZERO, ONE))); + } + + { + VALUE argv2[2]; + argv2[0] = a1; + argv2[1] = a2; + return nucomp_s_new(argc, argv2, klass); + } +} + +/* --- */ + +static VALUE +numeric_real(VALUE self) +{ + return self; +} + +static VALUE +numeric_imag(VALUE self) +{ + return INT2FIX(0); +} + +static VALUE +numeric_abs2(VALUE self) +{ + return f_mul(self, self); +} + +#define id_PI rb_intern("PI") + +/* + * call-seq: + * num.arg -> 0 or float + * num.angle -> 0 or float + * num.phase -> 0 or float + * + * Returns 0 if the value is positive, pi otherwise. + */ +static VALUE +numeric_arg(VALUE self) +{ + if (f_positive_p(self)) + return INT2FIX(0); + return rb_const_get(rb_mMath, id_PI); +} + +static VALUE +numeric_rect(VALUE self) +{ + return rb_assoc_new(self, INT2FIX(0)); +} + +static VALUE +numeric_polar(VALUE self) +{ + return rb_assoc_new(f_abs(self), f_arg(self)); +} + +static VALUE +numeric_conj(VALUE self) +{ + return self; +} + +/* + * call-seq: + * flo.arg -> 0 or float + * flo.angle -> 0 or float + * flo.phase -> 0 or float + * + * Returns 0 if the value is positive, pi otherwise. + */ +static VALUE +float_arg(VALUE self) +{ + if (isnan(RFLOAT_VALUE(self))) + return self; + return rb_call_super(0, 0); +} + +/* + * A complex number can be represented as a paired real number with + * imaginary unit; a+bi. Where a is real part, b is imaginary part + * and i is imaginary unit. Real a equals complex a+0i + * mathematically. + * + * In ruby, you can create complex object with Complex, Complex::rect, + * Complex::polar or to_c method. + * + * Complex(1) #=> (1+0i) + * Complex(2, 3) #=> (2+3i) + * Complex.polar(2, 3) #=> (-1.9799849932008908+0.2822400161197344i) + * 3.to_c #=> (3+0i) + * + * You can also create complex object from floating-point numbers or + * strings. + * + * Complex(0.3) #=> (0.3+0i) + * Complex('0.3-0.5i') #=> (0.3-0.5i) + * Complex('2/3+3/4i') #=> ((2/3)+(3/4)*i) + * Complex('1@2') #=> (-0.4161468365471424+0.9092974268256817i) + * + * 0.3.to_c #=> (0.3+0i) + * '0.3-0.5i'.to_c #=> (0.3-0.5i) + * '2/3+3/4i'.to_c #=> ((2/3)+(3/4)*i) + * '1@2'.to_c #=> (-0.4161468365471424+0.9092974268256817i) + * + * A complex object is either an exact or an inexact number. + * + * Complex(1, 1) / 2 #=> ((1/2)+(1/2)*i) + * Complex(1, 1) / 2.0 #=> (0.5+0.5i) + */ +void +Init_Complex(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + assert(fprintf(stderr, "assert() is now active\n")); + + id_abs = rb_intern("abs"); + id_abs2 = rb_intern("abs2"); + id_arg = rb_intern("arg"); + id_cmp = rb_intern("<=>"); + id_conj = rb_intern("conj"); + id_convert = rb_intern("convert"); + id_denominator = rb_intern("denominator"); + id_divmod = rb_intern("divmod"); + id_equal_p = rb_intern("=="); + id_expt = rb_intern("**"); + id_floor = rb_intern("floor"); + id_hash = rb_intern("hash"); + id_idiv = rb_intern("div"); + id_inspect = rb_intern("inspect"); + id_negate = rb_intern("-@"); + id_numerator = rb_intern("numerator"); + id_polar = rb_intern("polar"); + id_quo = rb_intern("quo"); + id_real_p = rb_intern("real?"); + id_to_f = rb_intern("to_f"); + id_to_i = rb_intern("to_i"); + id_to_r = rb_intern("to_r"); + id_to_s = rb_intern("to_s"); + + rb_cComplex = rb_define_class("Complex", rb_cNumeric); + + rb_define_alloc_func(rb_cComplex, nucomp_s_alloc); + rb_undef_method(CLASS_OF(rb_cComplex), "allocate"); + +#if 0 + rb_define_private_method(CLASS_OF(rb_cComplex), "new!", nucomp_s_new_bang, -1); + rb_define_private_method(CLASS_OF(rb_cComplex), "new", nucomp_s_new, -1); +#else + rb_undef_method(CLASS_OF(rb_cComplex), "new"); +#endif + + rb_define_singleton_method(rb_cComplex, "rectangular", nucomp_s_new, -1); + rb_define_singleton_method(rb_cComplex, "rect", nucomp_s_new, -1); + rb_define_singleton_method(rb_cComplex, "polar", nucomp_s_polar, 2); + + rb_define_global_function("Complex", nucomp_f_complex, -1); + + rb_undef_method(rb_cComplex, "%"); + rb_undef_method(rb_cComplex, "<"); + rb_undef_method(rb_cComplex, "<="); + rb_undef_method(rb_cComplex, "<=>"); + rb_undef_method(rb_cComplex, ">"); + rb_undef_method(rb_cComplex, ">="); + rb_undef_method(rb_cComplex, "between?"); + rb_undef_method(rb_cComplex, "div"); + rb_undef_method(rb_cComplex, "divmod"); + rb_undef_method(rb_cComplex, "floor"); + rb_undef_method(rb_cComplex, "ceil"); + rb_undef_method(rb_cComplex, "modulo"); + rb_undef_method(rb_cComplex, "remainder"); + rb_undef_method(rb_cComplex, "round"); + rb_undef_method(rb_cComplex, "step"); + rb_undef_method(rb_cComplex, "truncate"); + +#if 0 /* NUBY */ + rb_undef_method(rb_cComplex, "//"); +#endif + + rb_define_method(rb_cComplex, "real", nucomp_real, 0); + rb_define_method(rb_cComplex, "imaginary", nucomp_imag, 0); + rb_define_method(rb_cComplex, "imag", nucomp_imag, 0); + + rb_define_method(rb_cComplex, "-@", nucomp_negate, 0); + rb_define_method(rb_cComplex, "+", nucomp_add, 1); + rb_define_method(rb_cComplex, "-", nucomp_sub, 1); + rb_define_method(rb_cComplex, "*", nucomp_mul, 1); + rb_define_method(rb_cComplex, "/", nucomp_div, 1); + rb_define_method(rb_cComplex, "quo", nucomp_quo, 1); + rb_define_method(rb_cComplex, "fdiv", nucomp_fdiv, 1); + rb_define_method(rb_cComplex, "**", nucomp_expt, 1); + + rb_define_method(rb_cComplex, "==", nucomp_equal_p, 1); + rb_define_method(rb_cComplex, "coerce", nucomp_coerce, 1); + + rb_define_method(rb_cComplex, "abs", nucomp_abs, 0); + rb_define_method(rb_cComplex, "magnitude", nucomp_abs, 0); + rb_define_method(rb_cComplex, "abs2", nucomp_abs2, 0); + rb_define_method(rb_cComplex, "arg", nucomp_arg, 0); + rb_define_method(rb_cComplex, "angle", nucomp_arg, 0); + rb_define_method(rb_cComplex, "phase", nucomp_arg, 0); + rb_define_method(rb_cComplex, "rectangular", nucomp_rect, 0); + rb_define_method(rb_cComplex, "rect", nucomp_rect, 0); + rb_define_method(rb_cComplex, "polar", nucomp_polar, 0); + rb_define_method(rb_cComplex, "conjugate", nucomp_conj, 0); + rb_define_method(rb_cComplex, "conj", nucomp_conj, 0); +#if 0 + rb_define_method(rb_cComplex, "~", nucomp_conj, 0); /* gcc */ +#endif + + rb_define_method(rb_cComplex, "real?", nucomp_false, 0); +#if 0 + rb_define_method(rb_cComplex, "complex?", nucomp_true, 0); + rb_define_method(rb_cComplex, "exact?", nucomp_exact_p, 0); + rb_define_method(rb_cComplex, "inexact?", nucomp_inexact_p, 0); +#endif + + rb_define_method(rb_cComplex, "numerator", nucomp_numerator, 0); + rb_define_method(rb_cComplex, "denominator", nucomp_denominator, 0); + + rb_define_method(rb_cComplex, "hash", nucomp_hash, 0); + rb_define_method(rb_cComplex, "eql?", nucomp_eql_p, 1); + + rb_define_method(rb_cComplex, "to_s", nucomp_to_s, 0); + rb_define_method(rb_cComplex, "inspect", nucomp_inspect, 0); + + rb_define_method(rb_cComplex, "marshal_dump", nucomp_marshal_dump, 0); + rb_define_method(rb_cComplex, "marshal_load", nucomp_marshal_load, 1); + + /* --- */ + + rb_define_method(rb_cComplex, "to_i", nucomp_to_i, 0); + rb_define_method(rb_cComplex, "to_f", nucomp_to_f, 0); + rb_define_method(rb_cComplex, "to_r", nucomp_to_r, 0); + rb_define_method(rb_cNilClass, "to_c", nilclass_to_c, 0); + rb_define_method(rb_cNumeric, "to_c", numeric_to_c, 0); + + make_patterns(); + + rb_define_method(rb_cString, "to_c", string_to_c, 0); + + rb_define_private_method(CLASS_OF(rb_cComplex), "convert", nucomp_s_convert, -1); + + /* --- */ + + rb_define_method(rb_cNumeric, "real", numeric_real, 0); + rb_define_method(rb_cNumeric, "imaginary", numeric_imag, 0); + rb_define_method(rb_cNumeric, "imag", numeric_imag, 0); + rb_define_method(rb_cNumeric, "abs2", numeric_abs2, 0); + rb_define_method(rb_cNumeric, "arg", numeric_arg, 0); + rb_define_method(rb_cNumeric, "angle", numeric_arg, 0); + rb_define_method(rb_cNumeric, "phase", numeric_arg, 0); + rb_define_method(rb_cNumeric, "rectangular", numeric_rect, 0); + rb_define_method(rb_cNumeric, "rect", numeric_rect, 0); + rb_define_method(rb_cNumeric, "polar", numeric_polar, 0); + rb_define_method(rb_cNumeric, "conjugate", numeric_conj, 0); + rb_define_method(rb_cNumeric, "conj", numeric_conj, 0); + + rb_define_method(rb_cFloat, "arg", float_arg, 0); + rb_define_method(rb_cFloat, "angle", float_arg, 0); + rb_define_method(rb_cFloat, "phase", float_arg, 0); + + rb_define_const(rb_cComplex, "I", + f_complex_new_bang2(rb_cComplex, ZERO, ONE)); +} + +/* +Local variables: +c-file-style: "ruby" +End: +*/ diff --git a/config.guess b/config.guess new file mode 100644 index 0000000..5186e37 --- /dev/null +++ b/config.guess @@ -0,0 +1,1532 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +# Free Software Foundation, Inc. + +timestamp='2008-08-10' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner . +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[456]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:[3456]*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + EM64T | authenticamd) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d + s/[ ][ ]*/ /g + s/.*supported targets: *// + s/ .*// + p'` + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit ;; + esac + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^LIBC/{ + s: ::g + p + }'`" + test x"${LIBC}" != x && { + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit + } + test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BeMac:Haiku:*:*) + echo powerpc-apple-haiku + exit ;; + BePC:Haiku:*:*) + echo i586-pc-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.sub b/config.sub new file mode 100644 index 0000000..6759825 --- /dev/null +++ b/config.sub @@ -0,0 +1,1658 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +# Free Software Foundation, Inc. + +timestamp='2008-01-16' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ + uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64vr | mips64vrel \ + | mips64orion | mips64orionel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | mt \ + | msp430 \ + | nios | nios2 \ + | ns16k | ns32k \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | score \ + | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu | strongarm \ + | tahoe | thumb | tic4x | tic80 | tron \ + | v850 | v850e \ + | we32k \ + | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k) + basic_machine=$basic_machine-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tron-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tic55x | c55x*) + basic_machine=tic55x-unknown + os=-coff + ;; + tic6x | c6x*) + basic_machine=tic6x-unknown + os=-coff + ;; + tile*) + basic_machine=tile-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..f5b1858 --- /dev/null +++ b/configure.in @@ -0,0 +1,2193 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT() + +AC_PREREQ(2.60) + +AC_ARG_WITH(baseruby, + [ --with-baseruby=RUBY use RUBY as baseruby; RUBY is the pathname of ruby], + [ + case "$withval" in + *ruby*) + BASERUBY=$withval + ;; + *) + AC_MSG_ERROR(need ruby) + ;; + esac + ], + [ + BASERUBY="ruby" + ]) +test "`RUBYOPT=- $BASERUBY -e 'p 42' 2>/dev/null`" = 42 || + BASERUBY="echo executable host ruby is required. use --with-baseruby option.; false" +AC_SUBST(BASERUBY) + +AC_DEFUN([RUBY_MINGW32], +[case "$host_os" in +cygwin*) +AC_CACHE_CHECK(for mingw32 environment, rb_cv_mingw32, +[AC_TRY_CPP([ +#ifndef __MINGW32__ +# error +#endif +], rb_cv_mingw32=yes,rb_cv_mingw32=no) +rm -f conftest*]) +test "$rb_cv_mingw32" = yes && target_os="mingw32" + ;; +esac]) + +AC_DEFUN([RUBY_CPPOUTFILE], +[AC_CACHE_CHECK(whether ${CPP} accepts -o, rb_cv_cppoutfile, +[cppflags=$CPPFLAGS +CPPFLAGS='-o conftest.i' +AC_TRY_CPP([], rb_cv_cppoutfile=yes, rb_cv_cppoutfile=no) +CPPFLAGS=$cppflags +rm -f conftest*]) +if test "$rb_cv_cppoutfile" = yes; then + CPPOUTFILE='-o conftest.i' +elif test "$rb_cv_cppoutfile" = no; then + CPPOUTFILE='> conftest.i' +elif test -n "$rb_cv_cppoutfile"; then + CPPOUTFILE="$rb_cv_cppoutfile" +fi +AC_SUBST(CPPOUTFILE)]) + +AC_DEFUN([RUBY_PROG_GNU_LD], +[AC_CACHE_CHECK(whether the linker is GNU ld, rb_cv_prog_gnu_ld, +[if `$CC $CFLAGS $CPPFLAGS $LDFLAGS --print-prog-name=ld 2>&1` -v 2>&1 | grep "GNU ld" > /dev/null; then + rb_cv_prog_gnu_ld=yes +else + rb_cv_prog_gnu_ld=no +fi +]) +GNU_LD=$rb_cv_prog_gnu_ld +AC_SUBST(GNU_LD)]) + +unset GREP_OPTIONS +rb_version=`grep '^#define RUBY_VERSION ' $srcdir/version.h` +MAJOR=`expr "$rb_version" : '#define RUBY_VERSION "\([0-9][0-9]*\)\.[0-9][0-9]*\.[0-9][0-9]*"'` +MINOR=`expr "$rb_version" : '#define RUBY_VERSION "[0-9][0-9]*\.\([0-9][0-9]*\)\.[0-9][0-9]*"'` +TEENY=`expr "$rb_version" : '#define RUBY_VERSION "[0-9][0-9]*\.[0-9][0-9]*\.\([0-9][0-9]*\)"'` +if test "$MAJOR" = ""; then + AC_MSG_ERROR(could not determine MAJOR number from version.h) +fi +if test "$MINOR" = ""; then + AC_MSG_ERROR(could not determine MINOR number from version.h) +fi +if test "$TEENY" = ""; then + AC_MSG_ERROR(could not determine TEENY number from version.h) +fi +AC_SUBST(MAJOR) +AC_SUBST(MINOR) +AC_SUBST(TEENY) +if test "$MAJOR" = "1"; then + AC_DEFINE(CANONICALIZATION_FOR_MATHN) +fi +dnl checks for alternative programs +AC_ARG_WITH(gcc, [ --without-gcc never use gcc], [ + case $withval in + no) : ${CC=cc} + ;; + yes) : ${CC=gcc} + ;; + *) CC=$withval + ;; + esac]) +dnl If the user switches compilers, we can't believe the cache +if test ! -z "$ac_cv_prog_CC" -a ! -z "$CC" -a "$CC" != "$ac_cv_prog_CC" +then + AC_MSG_ERROR(cached CC is different -- throw away $cache_file +(it is also a good idea to do 'make clean' before compiling)) +fi + +if test "$program_prefix" = NONE; then + program_prefix= +fi + +AC_CANONICAL_TARGET +target_os=`echo $target_os | sed 's/linux-gnu$/linux/;s/linux-gnu/linux-/'` +ac_install_sh='' # unusable for extension libraries. + +ifelse(currently,disabled, [ +dnl checks for fat-binary +AC_ARG_ENABLE(fat-binary, + [ --enable-fat-binary=ARCHS + build an Apple/NeXT Multi Architecture Binary (MAB); + ARCHS is a comma-delimited list of architectures for + which to build; if ARCHS is omitted, then the package + will be built for all architectures supported by the + platform ("ppc" for MacOS/X and Darwin; "ppc,i386" + for Rhapsody; "m68k,i386,sparc" for OpenStep; + "m68k,i386,sparc,hppa" for NextStep); if this option + is disabled or omitted entirely, then the package + will be built only for the target platform], + [fat_binary=$enableval], [fat_binary=no]) +if test "$fat_binary" != no; then + + AC_MSG_CHECKING([target architectures]) + + # Respect TARGET_ARCHS setting from environment if available. + if test -z "$TARGET_ARCHS"; then + # Respect ARCH given to --enable-fat-binary if present. + if test "$fat_binary" != yes; then + TARGET_ARCHS=`echo "$fat_binary" | tr ',' ' '` + else + # Choose a default set of architectures based upon platform. + case "$target_os" in + darwin*) + TARGET_ARCHS="ppc" + ;; + rhapsody*) + TARGET_ARCHS="ppc i386" + ;; + openstep*) + TARGET_ARCHS="m68k i386 sparc" + ;; + nextstep*) + TARGET_ARCHS="m68k i386 sparc hppa" + ;; + *) + TARGET_ARCHS=`arch` + esac + fi + fi + + AC_MSG_RESULT([$TARGET_ARCHS]) + + # /usr/lib/arch_tool -archify_list $TARGET_ARCHS + ARCH_FLAG= + for archs in $TARGET_ARCHS + do + ARCH_FLAG="$ARCH_FLAG -arch $archs" + done + AC_DEFINE(NEXT_FAT_BINARY) +fi +], [fat_binary=no]) + +case $target_cpu in + i?86) frame_address=yes;; + *) frame_address=no;; +esac +AC_ARG_ENABLE(frame-address, + [ --enable-frame-address use GCC __builtin_frame_address(). ], + [frame_address=$enableval]) +if test $frame_address = yes; then + AC_DEFINE(USE_BUILTIN_FRAME_ADDRESS) +fi + +AC_ARG_PROGRAM + +dnl Checks for programs. + +: ${CFLAGS=} ${cflags='${optflags} ${debugflags} ${warnflags}'} +: ${CXXFLAGS=} ${cxxflags='${optflags} ${debugflags} ${warnflags}'} +if test x"${build}" != x"${host}"; then + AC_CHECK_TOOL(CC, gcc) +fi +AC_PROG_CC +AC_PROG_CXX +AC_PROG_GCC_TRADITIONAL +test $ac_cv_prog_cc_g = yes && : ${debugflags=-g} +if test "$GCC" = yes; then + linker_flag=-Wl, + : ${optflags=-O2} ${warnflags="-Wall -Wno-parentheses"} +else + linker_flag= +fi +CFLAGS="${CFLAGS} `eval echo $cflags`" +CXXFLAGS="${CXXFLAGS} `eval echo $cxxflags`" + +RUBY_PROG_GNU_LD +RUBY_CPPOUTFILE + +: ${OUTFLAG='-o '} +: ${COUTFLAG=${OUTFLAG}} +AC_SUBST(OUTFLAG) +AC_SUBST(COUTFLAG) + +RUBY_MINGW32 + +AC_CHECK_TOOL(RANLIB, ranlib, :) +AC_CHECK_TOOL(AR, ar) +if test -z "$AR"; then + AC_CHECK_PROGS(AR, aal, ar) +fi + +AC_CHECK_TOOL(AS, as) +ASFLAGS=$ASFLAGS +AC_SUBST(ASFLAGS) + +case "$target_os" in +cygwin*|mingw*) + ac_cv_prog_OBJCOPY=":";; +esac + +# BSD's ports and MacPorts prefix GNU binutils with 'g' +AC_CHECK_TOOLS(OBJDUMP, [objdump gobjdump]) +AC_CHECK_TOOLS(OBJCOPY, [objcopy gobjcopy]) + +case "$target_os" in +cygwin*|mingw*) + AC_CHECK_TOOL(NM, nm) + AC_CHECK_TOOL(WINDRES, windres) + AC_CHECK_TOOL(DLLWRAP, dllwrap) + target_cpu=`echo $target_cpu | sed s/i.86/i386/` + case "$target_os" in + mingw*) + test "$rb_cv_msvcrt" = "" && unset rb_cv_msvcrt + AC_CACHE_CHECK(for mingw32 runtime DLL, rb_cv_msvcrt, [ + AC_TRY_LINK([#include ], + [FILE* volatile f = stdin; return 0;], + [rb_cv_msvcrt=`$OBJDUMP -p conftest$ac_exeext | + tr A-Z a-z | + sed -n '/^[[ ]]*dll name: \(msvc.*\)\.dll$/{s//\1/p;q;}'`], + [rb_cv_msvcrt=msvcrt]) + test "$rb_cv_msvcrt" = "" && rb_cv_msvcrt=msvcrt]) + esac + : ${enable_shared=yes} + ;; +aix*) + AC_CHECK_TOOL(NM, nm, /usr/ccs/bin/nm, /usr/ccs/bin:$PATH) + ;; +hiuxmpp*) + # by TOYODA Eizi + AC_DEFINE(__HIUX_MPP__) + ;; +esac + +AC_PROG_LN_S +AC_PROG_MAKE_SET +AC_PROG_INSTALL + +# checks for UNIX variants that set C preprocessor variables +AC_USE_SYSTEM_EXTENSIONS + +AC_SUBST(RM, ['rm -f']) +AC_SUBST(CP, ['cp']) +if $as_mkdir_p; then + MAKEDIRS='mkdir -p' +else + MAKEDIRS='install -d' +fi +AC_SUBST(MAKEDIRS) +AC_SUBST(RMDIRS, ['$(top_srcdir)/tool/rmdirs']) +AC_SUBST(RMALL, ['rm -fr']) + +dnl check for large file stuff +mv confdefs.h confdefs1.h +: > confdefs.h +AC_SYS_LARGEFILE +mv confdefs.h largefile.h +mv confdefs1.h confdefs.h +cat largefile.h >> confdefs.h + +case "$target_os" in +mingw*) + ac_cv_type_off_t=yes + ac_cv_sizeof_off_t=8 + ;; +esac + +AC_CHECK_TYPES([long long, off_t]) + +AC_CHECK_SIZEOF(int, 4) +AC_CHECK_SIZEOF(short, 2) +AC_CHECK_SIZEOF(long, 4) +AC_CHECK_SIZEOF(long long, 0) +AC_CHECK_SIZEOF(__int64, 0) +AC_CHECK_SIZEOF(off_t, 0) +AC_CHECK_SIZEOF(void*, 4) +AC_CHECK_SIZEOF(float, 4) +AC_CHECK_SIZEOF(double, 8) +AC_CHECK_SIZEOF(time_t, 0) + +dnl RUBY_REPLACE_TYPE [typename] [default type] [macro type] [included] +AC_DEFUN([RUBY_REPLACE_TYPE], [dnl + AC_CHECK_TYPE([$1], + [AC_DEFINE_UNQUOTED(rb_[$1], [$1])], + [AC_DEFINE_UNQUOTED(rb_[$1], [$2])], + [$4]) + AC_CACHE_CHECK([for convertible type of [$1]], rb_cv_[$1]_convertible, [ + u= t= + AC_COMPILE_IFELSE( + [AC_LANG_BOOL_COMPILE_TRY([AC_INCLUDES_DEFAULT] + [$4], [(rb_[$1])-1 > 0])], + [u=U]) + if test x"$t" = x; then + AC_COMPILE_IFELSE( + [AC_LANG_BOOL_COMPILE_TRY([AC_INCLUDES_DEFAULT] + [$4], [sizeof(rb_[$1]) > sizeof(long)])], + [t=LL]) + fi + if test x"$t" = x; then + AC_COMPILE_IFELSE( + [AC_LANG_BOOL_COMPILE_TRY([AC_INCLUDES_DEFAULT] + [$4], [sizeof(rb_[$1]) == sizeof(long)])], + [t=LONG]) + fi + if test x"$t" = x; then + t=INT + fi + rb_cv_[$1]_convertible=${u}${t}]) + AC_DEFINE_UNQUOTED([$3]2NUM[(v)], [${rb_cv_[$1]_convertible}2NUM(v)]) + AC_DEFINE_UNQUOTED(NUM2[$3][(v)], [NUM2${rb_cv_[$1]_convertible}(v)]) +]) +RUBY_REPLACE_TYPE(pid_t, int, PIDT) +RUBY_REPLACE_TYPE(uid_t, int, UIDT) +RUBY_REPLACE_TYPE(gid_t, int, GIDT) + +AC_CACHE_CHECK(for prototypes, rb_cv_have_prototypes, + [AC_TRY_COMPILE([int foo(int x) { return 0; }], [return foo(10);], + rb_cv_have_prototypes=yes, + rb_cv_have_prototypes=no)]) +if test "$rb_cv_have_prototypes" = yes; then + AC_DEFINE(HAVE_PROTOTYPES) +fi + +AC_CACHE_CHECK(token paste string, rb_cv_tokenpaste, + [AC_TRY_COMPILE([#define paste(a,b) a##b], + [int xy = 1; return paste(x,y);], + rb_cv_tokenpaste=ansi, + rb_cv_tokenpaste=knr)]) +if test "$rb_cv_tokenpaste" = ansi; then + AC_DEFINE(TOKEN_PASTE(x,y),[x##y]) +else + AC_DEFINE(TOKEN_PASTE(x,y),[x/**/y]) +fi + +AC_CACHE_CHECK(stringization, rb_cv_stringization, [ + rb_cv_stringization=no + for string in "#expr" '"expr"'; do + AC_COMPILE_IFELSE([ + AC_LANG_BOOL_COMPILE_TRY([ +#define STRINGIZE0(expr) $string +#define STRINGIZE(expr) STRINGIZE0(expr) +#undef real_test_for_stringization +#define test_for_stringization -.real_test_for_stringization.- +const char stringized[[]] = STRINGIZE(test_for_stringization); +], [sizeof(stringized) == 32])], + [rb_cv_stringization="$string"; break], + [rb_cv_stringization=no]) + done] +) +AC_DEFINE(STRINGIZE(expr),STRINGIZE0(expr)) +if test x"$rb_cv_stringization" != xno -a "$rb_cv_stringization" != "#expr"; then + AC_DEFINE_UNQUOTED(STRINGIZE0(expr),$rb_cv_stringization) + AC_DEFINE(OLD_FASHIONED_STRINGIZATION,1) +fi + +AC_CACHE_CHECK([string literal concatenation], + rb_cv_string_literal_concatenation, [ + AC_COMPILE_IFELSE([ + AC_LANG_BOOL_COMPILE_TRY([ +const char concatenated_literal[[]] = "literals" "to" + "be" "concatenated."; +], [sizeof(concatenated_literal) == 26])], + [rb_cv_string_literal_concatenation=yes], + [rb_cv_string_literal_concatenation=no])] +) +if test "$rb_cv_string_literal_concatenation" = no; then + AC_DEFINE(NO_STRING_LITERAL_CONCATENATION,1) +fi + +AC_CACHE_CHECK(for variable length prototypes and stdarg.h, rb_cv_stdarg, + [AC_TRY_COMPILE([ +#include +int foo(int x, ...) { + va_list va; + va_start(va, x); + va_arg(va, int); + va_arg(va, char *); + va_arg(va, double); + return 0; +} +], [return foo(10, "", 3.14);], + rb_cv_stdarg=yes, + rb_cv_stdarg=no)]) +if test "$rb_cv_stdarg" = yes; then + AC_DEFINE(HAVE_STDARG_PROTOTYPES) +fi + +AC_CACHE_CHECK(for variable length macro, rb_cv_va_args_macro, + [AC_TRY_COMPILE([@%:@define FOO(a, ...) foo(a, @%:@@%:@__VA_ARGS__)], + [FOO(1);FOO(1,2);FOO(1,2,3);], + rb_cv_va_args_macro=yes, + rb_cv_va_args_macro=no)]) +if test "$rb_cv_va_args_macro" = yes; then + AC_DEFINE(HAVE_VA_ARGS_MACRO) +fi + +AC_DEFUN([RUBY_FUNC_ATTRIBUTE], [dnl +m4_ifval([$2], dnl + [AS_VAR_PUSHDEF([attrib],[$2])], dnl + [AS_VAR_PUSHDEF([attrib],[FUNC_]AS_TR_CPP($1))] dnl +)dnl +m4_ifval([$3], dnl + [AS_VAR_PUSHDEF([rbcv],[$3])], dnl + [AS_VAR_PUSHDEF([rbcv],[rb_cv_func_][$1])]dnl +)dnl +AC_CACHE_CHECK(for [$1] function attribute, rbcv, +[rbcv=x +if test "${ac_c_werror_flag+set}"; then + rb_c_werror_flag="$ac_c_werror_flag" +else + unset rb_c_werror_flag +fi +ac_c_werror_flag=yes +for mac in "__attribute__ (($1)) x" "x __attribute__ (($1))" "__declspec($1) x" x; do + AC_TRY_COMPILE( + [#define ]attrib[(x) $mac + ]attrib[(void conftest_attribute_check(void));], [], + [rbcv="$mac"; break]) +done +if test "${rb_c_werror_flag+set}"; then + ac_c_werror_flag="$rb_c_werror_flag" +else + unset ac_c_werror_flag +fi +]) +AC_DEFINE_UNQUOTED(attrib[(x)], $rbcv) +AS_VAR_POPDEF([attrib]) +AS_VAR_POPDEF([rbcv]) +]) + +RUBY_FUNC_ATTRIBUTE(noreturn, NORETURN) +RUBY_FUNC_ATTRIBUTE(deprecated, DEPRECATED) +RUBY_FUNC_ATTRIBUTE(noinline, NOINLINE) +RUBY_FUNC_ATTRIBUTE(stdcall) +RUBY_FUNC_ATTRIBUTE(cdecl) +RUBY_FUNC_ATTRIBUTE(fastcall) + +if test "$GCC" = yes; then + AC_CACHE_CHECK([for function alias], [rb_cv_gcc_function_alias], + [rb_cv_gcc_function_alias=no + for a in alias weak,alias; do + AC_TRY_LINK([void foo(void) {} + void bar(void) __attribute__(($a("foo")));], [bar()], + [rb_cv_gcc_function_alias=$a; break]) + done]) + if test "$rb_cv_gcc_function_alias" = no; then + AC_DEFINE([RUBY_ALIAS_FUNCTION(old_prot, new_name, args)], + [VALUE old_prot {return new_name args;}]) + else + AC_DEFINE_UNQUOTED([RUBY_ALIAS_FUNCTION(old_prot, new_name, args)], + [VALUE old_prot __attribute__(($rb_cv_gcc_function_alias(@%:@new_name)));]) + fi +fi + +AC_CACHE_CHECK([for RUBY_EXTERN], rb_cv_ruby_extern, +[rb_cv_ruby_extern=no +for mac in "__attribute__((dllimport))" "__declspec(dllimport)"; do + AC_TRY_COMPILE( + [extern $mac void conftest(void);], + [rb_cv_ruby_extern="extern $mac"; break]) +done]) +test "x$rb_cv_ruby_extern" = xno || AC_DEFINE_UNQUOTED(RUBY_EXTERN, $rb_cv_ruby_extern) + +XCFLAGS="$XCFLAGS -DRUBY_EXPORT" + +dnl Check whether we need to define sys_nerr locally +AC_CHECK_DECLS([sys_nerr], [], [], [$ac_includes_default +#include ]) + +case "$target_os" in +freebsd*) + AC_CACHE_CHECK([whether pthread should be enabled by default], + rb_cv_enable_pthread_default, + [AC_TRY_CPP([ +#include +#if __FreeBSD_version < 502102 +#error pthread should be disabled on this platform +#endif + ], + rb_cv_enable_pthread_default=yes, + rb_cv_enable_pthread_default=no)]) + enable_pthread_default=$rb_cv_enable_pthread_default + ;; +mingw*) + enable_pthread_default=no + ;; +*) + enable_pthread_default=yes + ;; +esac + +AC_ARG_ENABLE(pthread, + [ --enable-pthread use pthread library.], + [enable_pthread=$enableval], [enable_pthread=$enable_pthread_default]) + +dnl Checks for libraries. +case "$target_os" in +*bsd*|dragonfly*) + ;; +*) + ac_cv_func_daemon=no + ;; +esac + +case "$target_os" in +solaris*) + AC_DEFINE(SIZEOF_STRUCT_DIRENT_TOO_SMALL, 1) + LIBS="-lm $LIBS" + ;; +nextstep*) ;; +openstep*) ;; +rhapsody*) ;; +darwin*) LIBS="-lobjc $LIBS" + CPPFLAGS="$CPPFLAGS -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE" + AC_TRY_CPP([#include + #if MAC_OS_X_VERSION_MAX_ALLOWED <= 1040 + #error pre OS X 10.4 + [!<===== pre OS X 10.4 =====>] + #endif + ], + [ + ac_cv_header_ucontext_h=no + ], + [ + AC_DEFINE(BROKEN_SETREUID, 1) + AC_DEFINE(BROKEN_SETREGID, 1) + ]) + ac_cv_lib_crypt_crypt=no + AC_CACHE_CHECK(for broken crypt with 8bit chars, rb_cv_broken_crypt, + [AC_TRY_RUN([ +#include +#include +#include +int +main() +{ + char buf[256]; + strcpy(buf, crypt("", "\xE0\xA0")); + return strcmp(buf, crypt("", "\xE0\xA0")); +} +], + rb_cv_broken_crypt=no, + rb_cv_broken_crypt=yes, + rb_cv_broken_crypt=yes)]) + if test "$rb_cv_broken_crypt" = yes; then + AC_DEFINE(BROKEN_CRYPT, 1) + fi + ;; +hpux*) LIBS="-lm $LIBS" + ac_cv_c_inline=no;; +human*) ac_cv_func_getpgrp_void=yes + ac_cv_func_setitimer=no + ;; +beos*|haiku*) ac_cv_func_link=no + ac_cv_func_sched_yield=no + ac_cv_func_pthread_attr_setinheritsched=no + case "$target_os" in + beos*) ac_cv_header_net_socket_h=yes;; + haiku*) ac_cv_func_shutdown=no;; + esac + LIBS="$LIBS" # m lib is include in root under BeOS/Haiku + ;; +cygwin*) ac_cv_header_langinfo_h=yes + AC_LIBOBJ([langinfo]) + ;; +mingw*) LIBS="-lshell32 -lws2_32 $LIBS" + ac_cv_header_a_out_h=no + ac_cv_header_pwd_h=no + ac_cv_header_utime_h=no + ac_cv_header_sys_ioctl_h=no + ac_cv_header_sys_param_h=no + ac_cv_header_sys_resource_h=no + ac_cv_header_sys_select_h=no + ac_cv_header_sys_time_h=no + ac_cv_header_sys_times_h=no + ac_cv_header_sys_socket_h=no + ac_cv_func_times=yes + ac_cv_func_waitpid=yes + ac_cv_func_fsync=yes + ac_cv_func_seekdir=yes + ac_cv_func_telldir=yes + ac_cv_func_isinf=yes + ac_cv_func_isnan=yes + ac_cv_func_finite=yes + ac_cv_func_link=yes + ac_cv_func_truncate=yes + ac_cv_func_fseeko=yes + ac_cv_func_ftello=yes + ac_cv_lib_crypt_crypt=no + ac_cv_func_getpgrp_void=no + ac_cv_func_setpgrp_void=yes + ac_cv_func_memcmp_working=yes + ac_cv_lib_dl_dlopen=no + rb_cv_binary_elf=no + rb_cv_negative_time_t=no + ac_cv_func_fcntl=yes + AC_LIBOBJ([langinfo]) + ;; +os2-emx*) LIBS="-lm $LIBS" + ac_cv_lib_dir_opendir=no;; +msdosdjgpp*) LIBS="-lm $LIBS" + ac_cv_func_getpgrp_void=yes + ac_cv_func_setitimer=no + ac_cv_sizeof_rlim_t=4 + ac_cv_func_fork=no + ac_cv_func_setrlimit=no + ac_cv_header_sys_socket_h=no + ;; +bsdi*) LIBS="-lm $LIBS" + AC_DEFINE(BROKEN_SETREUID, 1) + AC_DEFINE(BROKEN_SETREGID, 1) + ac_cv_sizeof_rlim_t=8;; +freebsd*) LIBS="-lm $LIBS" + ;; +dragonfly*) LIBS="-lm $LIBS" + ;; +bow) ac_cv_func_setitimer=no + ;; +superux*) ac_cv_func_setitimer=no + ;; +*) LIBS="-lm $LIBS";; +esac +AC_CHECK_LIB(crypt, crypt) +AC_CHECK_LIB(dl, dlopen) # Dynamic linking for SunOS/Solaris and SYSV +AC_CHECK_LIB(dld, shl_load) # Dynamic linking for HP-UX +AC_CHECK_LIB(socket, socketpair) # SunOS/Solaris +AC_CHECK_LIB(rt, clock_gettime) # GNU/Linux + +case "$target_cpu" in +alpha*) case "$target_os"::"$GCC" in + *::yes) CFLAGS="-mieee $CFLAGS" ;; # gcc + osf*) CFLAGS="-ieee $CFLAGS" ;; # ccc + esac ;; +esac + +ac_cv_header_net_socket_h=${ac_cv_header_net_socket_h=no} +if test "$ac_cv_header_net_socket_h" = yes; then + ac_cv_header_sys_socket_h=${ac_cv_header_sys_socket_h=no} +else + ac_cv_header_sys_socket_h=${ac_cv_header_sys_socket_h=yes} +fi + +dnl Checks for header files. +AC_HEADER_DIRENT +dnl AC_HEADER_STDC has been checked in AC_USE_SYSTEM_EXTENSIONS +AC_HEADER_SYS_WAIT +AC_CHECK_HEADERS(limits.h sys/file.h sys/ioctl.h sys/syscall.h\ + fcntl.h sys/fcntl.h sys/select.h sys/time.h sys/times.h sys/param.h\ + syscall.h pwd.h grp.h a.out.h utime.h direct.h sys/resource.h \ + sys/mkdev.h sys/utime.h xti.h netinet/in_systm.h float.h ieeefp.h pthread.h \ + ucontext.h intrinsics.h langinfo.h locale.h sys/sendfile.h time.h \ + net/socket.h sys/socket.h) + +dnl Check additional types. +AC_CHECK_SIZEOF(rlim_t, 0, [ + #ifdef HAVE_SYS_TYPES_H + # include + #endif + #ifdef HAVE_SYS_TIME_H + # include + #endif + #ifdef HAVE_SYS_RESOURCE_H + # include + #endif + #ifdef HAVE_UNISTD_H + # include + #endif + #include +]) + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_TYPE_SIZE_T +AC_CHECK_SIZEOF(size_t, 0) +AC_CHECK_SIZEOF(ptrdiff_t, $ac_cv_sizeof_size_t) +AC_STRUCT_ST_BLKSIZE +AC_STRUCT_ST_BLOCKS +AC_STRUCT_ST_RDEV +AC_CHECK_MEMBERS([struct stat.st_atim]) +AC_CHECK_MEMBERS([struct stat.st_atimespec]) +AC_CHECK_MEMBERS([struct stat.st_atimensec]) +AC_CHECK_MEMBERS([struct stat.st_mtim]) +AC_CHECK_MEMBERS([struct stat.st_mtimespec]) +AC_CHECK_MEMBERS([struct stat.st_mtimensec]) +AC_CHECK_MEMBERS([struct stat.st_ctim]) +AC_CHECK_MEMBERS([struct stat.st_ctimespec]) +AC_CHECK_MEMBERS([struct stat.st_ctimensec]) + +AC_CHECK_TYPES([struct timespec], [], [], [@%:@ifdef HAVE_TIME_H +@%:@include +@%:@endif]) + +AC_CHECK_TYPE(fd_mask, [AC_DEFINE(HAVE_RB_FD_INIT, 1)]) + +dnl RUBY_DEFINT TYPENAME, SIZE, [SIGNED-OR-UNSIGNED], [INCLUDES = DEFAULT-INCLUDES] +AC_DEFUN([RUBY_DEFINT], [dnl +AC_CACHE_CHECK([for $1], [rb_cv_type_$1], +[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT([$4]) +typedef $1 t; int s = sizeof(t) == 42;])], + [rb_cv_type_$1=yes], + [case m4_bmatch([$2], [^[1-9][0-9]*$], $2, [$ac_cv_sizeof_]AS_TR_SH($2)) in + "1") rb_cv_type_$1="m4_if([$3], [], [signed ], [$3 ])char";; + "$ac_cv_sizeof_short") rb_cv_type_$1="m4_if([$3], [], [], [$3 ])short";; + "$ac_cv_sizeof_int") rb_cv_type_$1="m4_if([$3], [], [], [$3 ])int";; + "$ac_cv_sizeof_long") rb_cv_type_$1="m4_if([$3], [], [], [$3 ])long";; + "$ac_cv_sizeof_long_long") rb_cv_type_$1="m4_if([$3], [], [], [$3 ])long long";; + "$ac_cv_sizeof___int64") rb_cv_type_$1="m4_if([$3], [], [], [$3 ])__int64";; + *) rb_cv_type_$1=no;; + esac])]) +if test "${rb_cv_type_$1}" != no; then + AC_DEFINE([HAVE_]AS_TR_CPP($1), 1) + test "${rb_cv_type_$1}" = yes || AC_DEFINE_UNQUOTED($1, [$rb_cv_type_$1]) +fi +]) + +RUBY_DEFINT(int8_t, 1) +RUBY_DEFINT(uint8_t, 1, unsigned) +RUBY_DEFINT(int16_t, 2) +RUBY_DEFINT(uint16_t, 2, unsigned) +RUBY_DEFINT(int32_t, 4) +RUBY_DEFINT(uint32_t, 4, unsigned) +RUBY_DEFINT(int64_t, 8) +RUBY_DEFINT(uint64_t, 8, unsigned) +RUBY_DEFINT(int128_t, 16) +RUBY_DEFINT(uint128_t, 16, unsigned) +RUBY_DEFINT(intptr_t, void*) +RUBY_DEFINT(uintptr_t, void*, unsigned) +RUBY_DEFINT(ssize_t, size_t) dnl may differ from int, so not use AC_TYPE_SSIZE_T. + +AC_CACHE_CHECK(for stack end address, rb_cv_stack_end_address, +[rb_cv_stack_end_address=no +for addr in __libc_stack_end _SEND; do + AC_TRY_LINK( + [extern void *$addr;], + [if (!$addr) return 1;], + [rb_cv_stack_end_address="$addr"; break]) +done]) +if test $rb_cv_stack_end_address != no; then + AC_DEFINE_UNQUOTED(STACK_END_ADDRESS, $rb_cv_stack_end_address) +fi + +dnl Checks for library functions. +AC_TYPE_GETGROUPS +AC_TYPE_SIGNAL +case "${target_cpu}-${target_os}" in +powerpc-darwin*) + AC_LIBSOURCES(alloca.c) + AC_SUBST([ALLOCA], [\${LIBOBJDIR}alloca.${ac_objext}]) + AC_DEFINE(C_ALLOCA) + AC_DEFINE_UNQUOTED(alloca, alloca) + ;; +*) + AC_FUNC_ALLOCA + ;; +esac +AC_FUNC_MEMCMP + +# http://sources.redhat.com/ml/libc-hacker/2005-08/msg00008.html +# Debian GNU/Linux Etch's libc6.1 2.3.6.ds1-13etch5 has this problem. +# Debian GNU/Linux Lenny's libc6.1 2.7-10 has no problem. +AC_CACHE_CHECK(for broken erfc of glibc-2.3.6 on IA64, rb_cv_broken_glibc_ia64_erfc, + [AC_TRY_RUN([ +#include +int +main() +{ + erfc(10000.0); + return 0; +} +], + rb_cv_broken_glibc_ia64_erfc=no, + rb_cv_broken_glibc_ia64_erfc=yes, + rb_cv_broken_glibc_ia64_erfc=no)]) +case $rb_cv_broken_glibc_ia64_erfc in + yes) ac_cv_func_erf=no;; +esac + +AC_REPLACE_FUNCS(dup2 memmove strerror\ + strchr strstr crypt flock vsnprintf\ + isnan finite isinf hypot acosh erf tgamma lgamma_r cbrt \ + strlcpy strlcat) +AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall chroot fsync getcwd eaccess\ + truncate ftruncate chsize times utimes utimensat fcntl lockf lstat\ + link symlink readlink\ + setitimer setruid seteuid setreuid setresuid setproctitle socketpair\ + setrgid setegid setregid setresgid issetugid pause lchown lchmod\ + getpgrp setpgrp getpgid setpgid initgroups getgroups setgroups\ + getpriority getrlimit setrlimit sysconf group_member\ + dlopen sigprocmask sigaction sigsetjmp _setjmp _longjmp snprintf\ + setsid telldir seekdir fchmod cosh sinh tanh log2 round signbit\ + setuid setgid daemon select_large_fdset setenv unsetenv\ + mktime timegm gmtime_r clock_gettime gettimeofday\ + pread sendfile shutdown sigaltstack) + +AC_CACHE_CHECK(for __builtin_setjmp, ac_cv_func___builtin_setjmp, +[AC_TRY_LINK([@%:@include + jmp_buf jb; void t(v) int v; {__builtin_longjmp(jb, v);}], + [__builtin_setjmp(jb);], + [ac_cv_func___builtin_setjmp=yes], + [ac_cv_func___builtin_setjmp=no]) +]) + +test x$ac_cv_func__longjmp = xno && ac_cv_func__setjmp=no + +AC_MSG_CHECKING(for setjmp type) +AC_ARG_WITH(setjmp-type, + [ --with-setjmp-type select setjmp type], [ + case $withval in + __builtin_setjmp) setjmp_prefix=__builtin_;; + _setjmp) setjmp_prefix=_;; + sigsetjmp) setjmp_prefix=sig;; + setjmp) setjmp_prefix=;; + '') unset setjmp_prefix;; + *) AC_MSG_ERROR(invalid setjmp type: $withval);; + esac], [unset setjmp_prefix]) +if test ${setjmp_prefix+set}; then + if test "${setjmp_prefix}" && eval test '$ac_cv_func_'${setjmp_prefix}setjmp = no; then + AC_MSG_ERROR(${setjmp_prefix}setjmp is not available) + fi +elif test "$ac_cv_func___builtin_setjmp" = yes; then + setjmp_prefix=__builtin_ +elif test "$ac_cv_func__setjmp" = yes; then + setjmp_prefix=_ +elif test "$ac_cv_func_sigsetjmp" = yes; then + case $target_os in + solaris*|cygwin*) + setjmp_prefix=;; + *) + setjmp_prefix=sig;; + esac +else + setjmp_prefix= +fi +if test x$setjmp_prefix = xsig; then + setjmp_sigmask=yes +else + unset setjmp_sigmask +fi +AC_MSG_RESULT(${setjmp_prefix}setjmp) +AC_DEFINE_UNQUOTED([RUBY_SETJMP(env)], [${setjmp_prefix}setjmp(env${setjmp_sigmask+,0})]) +AC_DEFINE_UNQUOTED([RUBY_LONGJMP(env,val)], [${setjmp_prefix}longjmp(env,val)]) +AC_DEFINE_UNQUOTED(RUBY_JMP_BUF, ${setjmp_sigmask+${setjmp_prefix}}jmp_buf) + +AC_ARG_ENABLE(setreuid, + [ --enable-setreuid use setreuid()/setregid() according to need even if obsolete.], + [use_setreuid=$enableval]) +if test "$use_setreuid" = yes; then + AC_DEFINE(USE_SETREUID) + AC_DEFINE(USE_SETREGID) +fi +AC_STRUCT_TIMEZONE +AC_CACHE_CHECK(for struct tm.tm_gmtoff, rb_cv_member_struct_tm_tm_gmtoff, + [AC_TRY_COMPILE([#include ], + [struct tm t; t.tm_gmtoff = 3600;], + [rb_cv_member_struct_tm_tm_gmtoff=yes], + [rb_cv_member_struct_tm_tm_gmtoff=no])]) +if test "$rb_cv_member_struct_tm_tm_gmtoff" = yes; then + AC_DEFINE(HAVE_STRUCT_TM_TM_GMTOFF) +fi +AC_CACHE_CHECK(for external int daylight, rb_cv_have_daylight, + [AC_TRY_LINK([#include + int i;], + [i = daylight;], + rb_cv_have_daylight=yes, + rb_cv_have_daylight=no)]) +if test "$rb_cv_have_daylight" = yes; then + AC_DEFINE(HAVE_DAYLIGHT) +fi +AC_DEFUN([RUBY_CHECK_VARTYPE], [dnl +AC_CACHE_CHECK([for external $1], rb_cv_var_$1, + [rb_cv_var_$1=no + AC_TRY_COMPILE([ +#ifndef _XOPEN_SOURCE +#define _XOPEN_SOURCE 1 +#endif +$2 +; +const volatile void *volatile t;], + [t = &(&$1)[0];], + [for t in $3; do + AC_TRY_COMPILE([ +#ifndef _XOPEN_SOURCE +#define _XOPEN_SOURCE 1 +#endif +$2 +; +extern $t $1; +const volatile void *volatile t;], + [t = &(&$1)[0];], + [rb_cv_var_$1=$t; break]) + done])]) +if test "[$rb_cv_var_]$1" != no; then + AC_DEFINE([HAVE_VAR_]m4_toupper($1)) + AC_DEFINE_UNQUOTED([TYPEOF_VAR_]m4_toupper($1), $rb_cv_var_$1) +fi]) +RUBY_CHECK_VARTYPE(timezone, [@%:@include ], [long int]) +RUBY_CHECK_VARTYPE(altzone, [@%:@include ], [long int]) +AC_CHECK_FUNCS(timezone) +if test "$ac_cv_func_timezone" = yes; then + AC_CACHE_CHECK([whether timezone requires zero arguments], rb_cv_func_timezone_void, + [AC_TRY_COMPILE([@%:@include ], + [(void)timezone(0, 0);], + [rb_cv_func_timezone_void=no], + [rb_cv_func_timezone_void=yes])] + ) + if test $rb_cv_func_timezone_void = yes; then + AC_DEFINE(TIMEZONE_VOID) + fi +fi + +AC_CACHE_CHECK(for negative time_t for gmtime(3), rb_cv_negative_time_t, + [AC_TRY_RUN([ +#include + +void +check(tm, y, m, d, h, s) + struct tm *tm; + int y, m, d, h, s; +{ + if (!tm || + tm->tm_year != y || + tm->tm_mon != m-1 || + tm->tm_mday != d || + tm->tm_hour != h || + tm->tm_sec != s) { + exit(1); + } +} + +int +main() +{ + time_t t = -1; + struct tm *tm; + + check(gmtime(&t), 69, 12, 31, 23, 59); + t = ~(time_t)0 << 31; + check(gmtime(&t), 1, 12, 13, 20, 52); + return 0; +} +], + rb_cv_negative_time_t=yes, + rb_cv_negative_time_t=no, + rb_cv_negative_time_t=yes)]) +if test "$rb_cv_negative_time_t" = yes; then + AC_DEFINE(NEGATIVE_TIME_T) +fi + +if test "$ac_cv_func_sigprocmask" = yes && test "$ac_cv_func_sigaction" = yes; then + AC_DEFINE(POSIX_SIGNAL) +else + AC_CHECK_FUNCS(sigsetmask) + AC_CACHE_CHECK(for BSD signal semantics, rb_cv_bsd_signal, + [AC_TRY_RUN([ +#include +#include + +void +sig_handler(dummy) + int dummy; +{ +} + +int +main() +{ + signal(SIGINT, sig_handler); + kill(getpid(), SIGINT); + kill(getpid(), SIGINT); + return 0; +} +], + rb_cv_bsd_signal=yes, + rb_cv_bsd_signal=no, + rb_cv_bsd_signal=$ac_cv_func_sigsetmask)]) + if test "$rb_cv_bsd_signal" = yes; then + AC_DEFINE(BSD_SIGNAL) + fi +fi + +AC_FUNC_GETPGRP +AC_FUNC_SETPGRP + +AC_C_BIGENDIAN +AC_C_CONST +AC_C_CHAR_UNSIGNED +AC_C_INLINE +AC_C_VOLATILE + +if test x"$target_cpu" = xia64; then + AC_LIBOBJ([ia64]) + AC_CACHE_CHECK(for __libc_ia64_register_backing_store_base, + rb_cv___libc_ia64_register_backing_store_base, + [rb_cv___libc_ia64_register_backing_store_base=no + AC_TRY_LINK( + [extern unsigned long __libc_ia64_register_backing_store_base;], + [unsigned long p = __libc_ia64_register_backing_store_base; + printf("%ld\n", p);], + [rb_cv___libc_ia64_register_backing_store_base=yes])]) + if test $rb_cv___libc_ia64_register_backing_store_base = yes; then + AC_DEFINE(HAVE___LIBC_IA64_REGISTER_BACKING_STORE_BASE) + fi +fi + +AC_CACHE_CHECK(whether right shift preserve sign bit, rb_cv_rshift_sign, + [AC_COMPILE_IFELSE(AC_LANG_BOOL_COMPILE_TRY([], [(-1==(-1>>1))]), + rb_cv_rshift_sign=yes, + rb_cv_rshift_sign=no)]) +if test "$rb_cv_rshift_sign" = yes; then + AC_DEFINE(RSHIFT(x,y), ((x)>>(int)y)) +else + AC_DEFINE(RSHIFT(x,y), (((x)<0) ? ~((~(x))>>y) : (x)>>y)) +fi + +AC_MSG_CHECKING(read count field in FILE structures) +AC_CACHE_VAL(rb_cv_fcnt, +[for fcnt in dnl + _cnt dnl + __cnt dnl + _r dnl + readCount dnl + _rcount dnl for emx0.9c +; do + AC_TRY_COMPILE([#include +], + [FILE *f = stdin; f->$fcnt = 0;], + rb_cv_fcnt="$fcnt"; break, + rb_cv_fcnt="not found") +done]) +if test "$rb_cv_fcnt" = "not found"; then + AC_MSG_RESULT([not found(OK if using GNU libc)]) +else + AC_MSG_RESULT($rb_cv_fcnt) + AC_DEFINE_UNQUOTED(FILE_COUNT, $rb_cv_fcnt) +fi + +AC_MSG_CHECKING(read buffer ptr field in FILE structures) +AC_CACHE_VAL(rb_cv_frptr, +[for frptr in dnl + _IO_read_ptr dnl + _ptr dnl + __ptr dnl + bufpos dnl + _p dnl + __bufpos dnl +; do + AC_TRY_COMPILE([#include +], + [FILE *f = stdin; char buf[256]; f->$frptr = buf;], + rb_cv_frptr="$frptr"; break, + rb_cv_frptr="not found") +done]) +if test "$rb_cv_frptr" = "not found"; then + AC_MSG_RESULT([not found]) +else + AC_MSG_RESULT($rb_cv_frptr) + AC_DEFINE_UNQUOTED(FILE_READPTR, $rb_cv_frptr) + + if test "$rb_cv_fcnt" = "not found"; then + AC_MSG_CHECKING(read buffer end field in FILE structures) + AC_CACHE_VAL(rb_cv_frend, + [for frend in dnl + _IO_read_end dnl + bufread dnl + __bufread dnl + ; do + AC_TRY_COMPILE([#include + ], + [FILE *f = stdin; char buf[256]; f->$frend = buf;], + rb_cv_frend="$frend"; break, + rb_cv_frend="not found") + done]) + if test "$rb_cv_frend" = "not found"; then + AC_MSG_RESULT([not found]) + else + AC_MSG_RESULT($rb_cv_frend) + AC_DEFINE_UNQUOTED(FILE_READEND, $rb_cv_frend) + fi + fi +fi + +AC_CACHE_CHECK([whether st_ino is huge], rb_cv_huge_st_ino, +[AC_COMPILE_IFELSE([AC_LANG_BOOL_COMPILE_TRY([ +#include +struct stat test_stat; +], [sizeof(test_stat.st_ino)>sizeof(long)])], +rb_cv_huge_st_ino=yes, +rb_cv_huge_st_ino=no) +]) +if test $rb_cv_huge_st_ino = yes; then + AC_DEFINE(HUGE_ST_INO) +fi + +if test "$ac_cv_func_sysconf" = yes; then + AC_DEFUN([RUBY_CHECK_SYSCONF], [dnl + AC_CACHE_CHECK([whether _SC_$1 is supported], rb_cv_have_sc_[]m4_tolower($1), + [AC_TRY_COMPILE([#include + ], + [_SC_$1 >= 0], + rb_cv_have_sc_[]m4_tolower($1)=yes, + rb_cv_have_sc_[]m4_tolower($1)=no) + ]) + if test "$rb_cv_have_sc_[]m4_tolower($1)" = yes; then + AC_DEFINE(HAVE__SC_$1) + fi + ]) + RUBY_CHECK_SYSCONF(CLK_TCK) +fi + +case "$target_cpu" in +m68*|i?86|ia64|sparc*|alpha*) rb_cv_stack_grow_dir=-1;; +hppa*) rb_cv_stack_grow_dir=+1;; +esac +AC_CACHE_CHECK(stack growing direction, rb_cv_stack_grow_dir, + [AC_TRY_RUN([ +/* recurse to get rid of inlining */ +static int +stack_growup_p(addr, n) + volatile int *addr, n; +{ + volatile int end; + if (n > 0) + return *addr = stack_growup_p(addr, n - 1); + else + return (&end > addr); +} +int main() +{ + int x; + return stack_growup_p(&x, 10); +} +], rb_cv_stack_grow_dir=-1, rb_cv_stack_grow_dir=+1, rb_cv_stack_grow_dir=0)]) +AC_DEFINE_UNQUOTED(STACK_GROW_DIRECTION, $rb_cv_stack_grow_dir) + +if test x"$enable_pthread" = xyes; then + for pthread_lib in thr pthread pthreads c c_r root; do + AC_CHECK_LIB($pthread_lib, pthread_kill, + rb_with_pthread=yes, rb_with_pthread=no) + if test "$rb_with_pthread" = "yes"; then break; fi + done + if test x"$rb_with_pthread" = xyes; then + AC_DEFINE(_REENTRANT) + AC_DEFINE(_THREAD_SAFE) + AC_DEFINE(HAVE_LIBPTHREAD) + case $pthread_lib in + c) + ;; + root) + ;; + c_r) + MAINLIBS="-pthread $MAINLIBS" + ;; + *) + LIBS="-l$pthread_lib $LIBS" + ;; + esac + else + AC_MSG_WARN("Don't know how to find pthread library on your system -- thread support disabled") + fi + AC_CHECK_FUNCS(nanosleep sched_yield pthread_attr_setinheritsched) + if test x"$ac_cv_func_nanosleep" = xno; then + AC_CHECK_LIB(rt, nanosleep) + if test x"$ac_cv_lib_rt_nanosleep" = xyes; then + AC_DEFINE(HAVE_NANOSLEEP) + fi + fi +fi +if test x"$ac_cv_header_ucontext_h" = xyes; then + if test x"$rb_with_pthread" = xyes; then + AC_CHECK_FUNCS(getcontext setcontext) + fi +fi + +if test "$ac_cv_func_fork" = "yes" -a "$rb_with_pthread" = "yes"; then + AC_CACHE_CHECK([if fork works with pthread], rb_cv_fork_with_pthread, + [AC_TRY_RUN([ +#include +#include +#include +#include +#include +#include +#include +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +void * +thread_func(void *dmy) +{ + return dmy; +} + +int +use_threads(void) +{ + pthread_t tid; + if (pthread_create(&tid, 0, thread_func, 0) != 0) { + return -1; + } + if (pthread_join(tid, 0) != 0) { + return -1; + } + return 0; +} + +int +main(int argc, char *argv[]) +{ + pid_t pid; + if (use_threads()) return EXIT_FAILURE; + pid = fork(); + + if (pid) { + int loc; + sleep(1); + if (waitpid(pid, &loc, WNOHANG) == 0) { + kill(pid, SIGKILL); + return EXIT_FAILURE; + } + if (!WIFEXITED(loc) || WEXITSTATUS(loc) != EXIT_SUCCESS) + return EXIT_FAILURE; + } + else { + if (use_threads()) return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +}], + rb_cv_fork_with_pthread=yes, + rb_cv_fork_with_pthread=no, + rb_cv_fork_with_pthread=yes)]) + test x$rb_cv_fork_with_pthread = xyes || AC_DEFINE(CANNOT_FORK_WITH_PTHREAD) +fi + +AC_CHECK_FUNCS(backtrace) + +AC_ARG_WITH(valgrind, + [ --with-valgrind enable valgrind memcheck support.], + [AC_CHECK_HEADERS(valgrind/memcheck.h)]) + +dnl wheather use dln_a_out or not +AC_ARG_WITH(dln-a-out, + [ --with-dln-a-out use dln_a_out if possible], [ + case $withval in + dnl ( + yes) + if test "$enable_shared" = yes; then + AC_MSG_ERROR(dln_a_out can not make shared library) + fi + with_dln_a_out=yes;; + dnl ( + *) with_dln_a_out=no;; + esac], [with_dln_a_out=no]) + +AC_CACHE_CHECK(whether ELF binaries are produced, rb_cv_binary_elf, +[AC_TRY_LINK([],[], [ +case "`head -1 conftest$EXEEXT | cat -e`" in +dnl ( +'^?ELF'*) + rb_cv_binary_elf=yes;; +dnl ( +*) + rb_cv_binary_elf=no;; +esac], +rb_cv_binary_elf=no)]) + +if test "$rb_cv_binary_elf" = yes; then + AC_DEFINE(USE_ELF) + if test "$with_dln_a_out" = yes; then + AC_MSG_ERROR(dln_a_out does not work with ELF) + fi +fi + +case "$target_os" in +linux* | gnu* | k*bsd*-gnu | bsdi*) + if test "$rb_cv_binary_elf" = no; then + with_dln_a_out=yes + else + LDFLAGS="$LDFLAGS -rdynamic" + fi;; +esac +LIBEXT=a + +AC_SUBST(DLDFLAGS)dnl +AC_SUBST(ARCH_FLAG)dnl + +AC_SUBST(STATIC)dnl +AC_SUBST(CCDLFLAGS)dnl +AC_SUBST(LDSHARED)dnl +AC_SUBST(LDSHAREDXX)dnl +AC_SUBST(DLEXT)dnl +AC_SUBST(DLEXT2)dnl +AC_SUBST(LIBEXT)dnl + +STATIC= +: ${PATHFLAG=''} + +if test "$with_dln_a_out" != yes; then + rb_cv_dlopen=unknown + AC_MSG_CHECKING(whether OS depend dynamic link works) + if test "$GCC" = yes; then + case "$target_os" in + nextstep*) CCDLFLAGS="$CCDLFLAGS -fno-common";; + openstep*) CCDLFLAGS="$CCDLFLAGS -fno-common";; + rhapsody*) CCDLFLAGS="$CCDLFLAGS -fno-common";; + darwin*) CCDLFLAGS="$CCDLFLAGS -fno-common";; + human*|bsdi*|beos*|haiku*|cygwin*|mingw*|aix*|interix*) ;; + *) CCDLFLAGS="$CCDLFLAGS -fPIC";; + esac + else + case "$target_os" in + hpux*) CCDLFLAGS="$CCDLFLAGS +Z";; + solaris*|irix*) CCDLFLAGS="$CCDLFLAGS -KPIC" ;; + sunos*) CCDLFLAGS="$CCDLFLAGS -PIC" ;; + esix*|uxpds*) CCDLFLAGS="$CCDLFLAGS -KPIC" ;; + *) : ${CCDLFLAGS=""} ;; + esac + fi + + + AC_ARG_ENABLE(rpath, + [ --disable-rpath embed run path into extension libraries.], + [enable_rpath=$enableval], [enable_rpath="$rb_cv_binary_elf"]) + if test "$enable_rpath" = yes; then + RPATHFLAG=" ${linker_flag}-R%1\$-s" + fi + + case "$target_os" in + hpux*) DLDFLAGS="$DLDFLAGS -E" + : ${LDSHARED='ld -b'} + XLDFLAGS="$XLDFLAGS -Wl,-E" + : ${LIBPATHENV=SHLIB_PATH} + if test "$rb_cv_prog_gnu_ld" = no; then + RPATHFLAG=' +b %1$-s' + fi + rb_cv_dlopen=yes;; + solaris*) if test "$GCC" = yes; then + : ${LDSHARED='$(CC) -shared'} + if test "$rb_cv_prog_gnu_ld" = yes; then + LDFLAGS="$LDFLAGS -Wl,-E" + fi + else + : ${LDSHARED='ld -G'} + fi + rb_cv_dlopen=yes;; + sunos*) : ${LDSHARED='ld -assert nodefinitions'} + rb_cv_dlopen=yes;; + irix*) : ${LDSHARED='ld -shared'} + rb_cv_dlopen=yes;; + sysv4*) : ${LDSHARED='ld -G'} + rb_cv_dlopen=yes;; + nto-qnx*) : ${LDSHARED="qcc -shared"} + rb_cv_dlopen=yes ;; + esix*|uxpds*) : ${LDSHARED="ld -G"} + rb_cv_dlopen=yes ;; + osf*) : ${LDSHARED="ld -shared -expect_unresolved \"*\""} + rb_cv_dlopen=yes ;; + bsdi3*) case "$CC" in + *shlicc*) : ${LDSHARED="$CC -r"} + rb_cv_dlopen=yes ;; + esac ;; + linux* | gnu* | k*bsd*-gnu | netbsd* | bsdi*) + : ${LDSHARED='${CC} -shared'} + if test "$rb_cv_binary_elf" = yes; then + LDFLAGS="$LDFLAGS -Wl,-export-dynamic" + fi + rb_cv_dlopen=yes ;; + interix*) : ${LDSHARED="$CC -shared"} + XLDFLAGS="$XLDFLAGS -Wl,-E" + LIBPATHFLAG=" -L%1\$-s" + rb_cv_dlopen=yes ;; + freebsd*|dragonfly*) : ${LDSHARED="$CC -shared"} + if test "$rb_cv_binary_elf" = yes; then + LDFLAGS="$LDFLAGS -rdynamic" + DLDFLAGS="$DLDFLAGS "'-Wl,-soname,$(.TARGET)' + else + test "$GCC" = yes && test "$rb_cv_prog_gnu_ld" = yes || LDSHARED="ld -Bshareable" + fi + rb_cv_dlopen=yes ;; + openbsd*) : ${LDSHARED="\$(CC) -shared ${CCDLFLAGS}"} + if test "$rb_cv_binary_elf" = yes; then + LDFLAGS="$LDFLAGS -Wl,-E" + fi + rb_cv_dlopen=yes ;; + nextstep*) : ${LDSHARED='cc -r -nostdlib'} + LDFLAGS="$LDFLAGS -u libsys_s" + rb_cv_dlopen=yes ;; + openstep*) : ${LDSHARED='cc -dynamic -bundle -undefined suppress'} + : ${LDFLAGS=""} + rb_cv_dlopen=yes ;; + rhapsody*) : ${LDSHARED='cc -dynamic -bundle -undefined suppress'} + : ${LDFLAGS=""} + rb_cv_dlopen=yes ;; + darwin*) : ${LDSHARED='cc -dynamic -bundle -undefined suppress -flat_namespace'} + : ${LDFLAGS=""} + : ${LIBPATHENV=DYLD_LIBRARY_PATH} + # /usr/local/include is always searched for + # some reason, but /usr/local/lib is not. + hdr=`find /usr/local/include -name \*.h -type f | sed 's:^/usr/local/include/::;q'` + if test -n "$hdr" && $CC -E -include "$hdr" -xc /dev/null | fgrep -q "$hdr"; then + $CC -print-search-dirs | grep -q '^libraries:.*:/usr/local/lib/*' || + echo " $LDFLAGS " | grep -q ' -L */usr/local/lib/* ' || + LDFLAGS="${LDFLAGS:+$LDFLAGS }-L/usr/local/lib" + fi + rb_cv_dlopen=yes ;; + aix*) if test "$GCC" = yes; then + : ${LDSHARED='$(CC) -shared'} + else + : ${LDSHARED='$(CC)'} + fi + LDSHARED="$LDSHARED ${linker_flag}-G" + DLDFLAGS='-eInit_$(TARGET)' + XLDFLAGS="${linker_flag}-bE:ruby.imp ${linker_flag}-brtl" + XLDFLAGS="$XLDFLAGS ${linker_flag}-blibpath:${prefix}/lib:${LIBPATH:-/usr/lib:/lib}" + : ${ARCHFILE="ruby.imp"} + TRY_LINK='$(CC) $(LDFLAGS) -oconftest $(INCFLAGS) -I$(hdrdir) $(CPPFLAGS)' + TRY_LINK="$TRY_LINK"' $(CFLAGS) $(src) $(LIBPATH) $(LOCAL_LIBS) $(LIBS)' + : ${LIBPATHENV=SHLIB_PATH} + RPATHFLAG=" ${linker_flag}-blibpath:%1\$-s" + rb_cv_dlopen=yes ;; + human*) : ${DLDFLAGS=''} + : ${LDSHARED=''} + : ${LDFLAGS=''} + : ${LINK_SO='ar cru $@ $(OBJS)'} + rb_cv_dlopen=yes ;; + beos*) case "$target_cpu" in + powerpc*) + : ${LDSHARED="ld -xms"} + DLDFLAGS="$DLDFLAGS "'-export Init_$(TARGET) -lbe -lroot glue-noinit.a init_term_dyn.o start_dyn.o' + LDFLAGS="$LDFLAGS -L/boot/home/config/lib -lbe -lroot" + ;; + i586*) + : ${LDSHARED="ld -shared"} + DLDFLAGS="$DLDFLAGS -L/boot/develop/lib/x86 -L/boot/home/config/lib \$(topdir)/_APP_ -lbe -lroot" + LDFLAGS="$LDFLAGS -L/boot/develop/lib/x86 -L/boot/home/config/lib -lbe -lroot" + ;; + esac + : ${LIBPATHENV=LIBRARY_PATH} + rb_cv_dlopen=yes ;; + haiku*) case "$target_cpu" in + powerpc*) + : ${LDSHARED="ld -xms"} + DLDFLAGS="$DLDFLAGS "'-export Init_$(TARGET) -lbe -lroot glue-noinit.a init_term_dyn.o start_dyn.o' + ;; + i586*) + : ${LDSHARED="ld -shared"} + DLDFLAGS="$DLDFLAGS -L/boot/develop/lib/x86 -lbe -lroot" + ;; + esac + : ${LIBPATHENV=LIBRARY_PATH} + rb_cv_dlopen=yes ;; + nto-qnx*) DLDFLAGS="$DLDFLAGS -L/lib -L/usr/lib -L/usr/local/lib" + : ${LDSHARED='ld -Bshareable -x'} + LDFLAGS="$LDFLAGS -L/lib -L/usr/lib -L/usr/local/lib" + rb_cv_dlopen=yes;; + cygwin*|mingw*) : ${LDSHARED="${CC} -shared "'$(if $(filter-out -g -g0,$(debugflags)),,-s)'} + XLDFLAGS="$XLDFLAGS -Wl,--stack,0x00200000,--enable-auto-import" + DLDFLAGS="${DLDFLAGS} -Wl,--enable-auto-image-base,--enable-auto-import" + : ${LIBPATHENV=""} + rb_cv_dlopen=yes ;; + hiuxmpp) : ${LDSHARED='ld -r'} ;; + atheos*) : ${LDSHARED="$CC -shared"} + rb_cv_dlopen=yes ;; + os2-emx*) LDFLAGS="$LDFLAGS -Zomf" + ;; + *) : ${LDSHARED='ld'} ;; + esac + AC_MSG_RESULT($rb_cv_dlopen) +fi +if test "${LDSHAREDXX}" = ""; then + case "${LDSHARED}" in + *'$(CC)'*) + LDSHAREDXX=`echo "${LDSHARED}" | sed 's/\$(CC)/$(CXX)/'` + ;; + *'${CC}'*) + LDSHAREDXX=`echo "${LDSHARED}" | sed 's/\${CC}/${CXX}/'` + ;; + *$CC*) + LDSHAREDXX=`echo "${LDSHARED}" | sed "s|$CC|$CXX|"` + ;; + ld" "*) + ;; + esac +fi +case ${RPATHFLAG} in +*'%1$'*) + : ${LIBPATHFLAG=' -L%1$-s'};; +*) + : ${LIBPATHFLAG=' -L%s'};; +esac + +AC_SUBST(LINK_SO) +AC_SUBST(LIBPATHFLAG) +AC_SUBST(RPATHFLAG) +AC_SUBST(LIBPATHENV, "${LIBPATHENV-LD_LIBRARY_PATH}") +AC_SUBST(TRY_LINK) + +dln_a_out_works=no +if test "$ac_cv_header_a_out_h" = yes; then + if test "$with_dln_a_out" = yes || test "$rb_cv_dlopen" = unknown; then + cat confdefs.h > config.h + AC_CACHE_CHECK(whether matz's dln works, rb_cv_dln_a_out, + [AC_TRY_COMPILE([ +#define USE_DLN_A_OUT +#include "dln.c" +], + [], + rb_cv_dln_a_out=yes, + rb_cv_dln_a_out=no)]) + if test "$rb_cv_dln_a_out" = yes; then + dln_a_out_works=yes + AC_DEFINE(USE_DLN_A_OUT) + fi + fi +fi + +if test "$dln_a_out_works" = yes; then + if test "$GCC" = yes; then + STATIC=-static + else + STATIC=-Bstatic + fi + DLEXT=so + CCDLFLAGS= +else + case "$target_os" in + hpux*) DLEXT=sl;; + nextstep*) DLEXT=bundle;; + openstep*) DLEXT=bundle;; + rhapsody*) DLEXT=bundle;; + darwin*) DLEXT=bundle;; + os2-emx*) DLEXT=dll;; + cygwin*|mingw*) DLEXT=so;; + *) DLEXT=so;; + esac +fi +len=2 # .rb +n=`expr "$DLEXT" : '.*'`; test "$n" -gt "$len" && len=$n +n=`expr "$DLEXT2" : '.*'`; test "$n" -gt "$len" && len=$n +AC_DEFINE_UNQUOTED(DLEXT_MAXLEN, `expr $len + 1`) +test ".$DLEXT" = "." || AC_DEFINE_UNQUOTED(DLEXT, ".$DLEXT") +test ".$DLEXT2" = "." || AC_DEFINE_UNQUOTED(DLEXT2, ".$DLEXT2") +AC_SUBST(DLEXT) + +AC_SUBST(STRIP)dnl +if test "$with_dln_a_out" = yes; then + STRIP=true +else + STRIP=strip +fi + +case "$target_os" in + linux* | gnu* | k*bsd*-gnu) + STRIP='strip -S -x';; + nextstep*) + STRIP='strip -A -n';; + openstep*) + STRIP='strip -A -n';; + rhapsody*) + STRIP='strip -A -n';; + darwin*) + STRIP='strip -A -n';; +esac + +EXTSTATIC= +AC_SUBST(EXTSTATIC)dnl +AC_ARG_WITH(static-linked-ext, + [ --with-static-linked-ext link external modules statically], + [case $withval in + yes) STATIC= + EXTSTATIC=static;; + *) ;; + esac]) + +case "$target_os" in + human*) + AC_CHECK_LIB(signal, _harderr) + AC_CHECK_LIB(hmem, hmemset) + AC_CHECK_FUNCS(select) + AC_CACHE_CHECK(whether PD libc _dtos18 fail to convert big number, + rb_cv_missing__dtos18, + [AC_TRY_RUN( +changequote(<<, >>)dnl +<< +#include +int +main() +{ + char buf[256]; + sprintf(buf, "%g", 1e+300); + return (strcmp (buf, "1e+300") ? 0 : 1); +} +>>, +changequote([, ])dnl +rb_cv_missing__dtos18=yes, rb_cv_missing__dtos18=no, rb_cv_missing__dtos18=no)]) + if test "$rb_cv_missing__dtos18" = yes; then + AC_DEFINE(MISSING__DTOS18) + fi + AC_CACHE_CHECK(whether PD libc fconvert fail to round, + rb_cv_missing_fconvert, + [AC_TRY_RUN( +changequote(<<, >>)dnl +<< +#include +#include +int +main() +{ + char buf[256]; + sprintf(buf, "%f", log(exp(1.0))); + return (strcmp (buf, "1.000000") ? 0 : 1); +} +>>, +changequote([, ])dnl +rb_cv_missing_fconvert=yes, rb_cv_missing_fconvert=no, rb_cv_missing_fconvert=no)]) + if test "$rb_cv_missing_fconvert" = yes; then + AC_DEFINE(MISSING_FCONVERT) + fi + AC_LIBOBJ([x68.o]) + CFLAGS="$CFLAGS -fansi-only" + XCFLAGS="$XCFLAGS -cc1-stack=262144 -cpp-stack=2694144" + EXEEXT=.x + OBJEXT=o + setup=Setup.x68 + ;; + dnl OS/2 environment w/ Autoconf 2.1x for EMX + os2-emx) + AC_LIBOBJ([os2]) + setup=Setup.emx + ;; + *djgpp*) + setup=Setup.dj + ;; + *) + setup=Setup + ;; +esac + +AC_SUBST(setup) + +if test "$prefix" = NONE; then + prefix=$ac_default_prefix +fi + +#if test "$fat_binary" != no ; then +# CFLAGS="$CFLAGS $ARCH_FLAG" +#fi + +if test x"$cross_compiling" = xyes; then + test x"$MINIRUBY" = x && MINIRUBY="${RUBY-ruby} -I`pwd` "-r'$(arch)-fake' + PREP='$(arch)-fake.rb' + RUNRUBY='$(MINIRUBY) -I`cd $(srcdir)/lib; pwd`' +else + MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib' + MINIRUBY="$MINIRUBY"' -I$(EXTOUT)/common -I./- -r$(srcdir)/ext/purelib.rb' + PREP='miniruby$(EXEEXT)' + RUNRUBY='$(MINIRUBY) $(srcdir)/runruby.rb --extout=$(EXTOUT)' +fi +AC_SUBST(MINIRUBY) +AC_SUBST(PREP) +AC_SUBST(RUNRUBY) +AC_SUBST(EXTOUT, [${EXTOUT=.ext}]) + +FIRSTMAKEFILE="" +LIBRUBY_A='lib$(RUBY_SO_NAME)-static.a' +LIBRUBY='$(LIBRUBY_A)' +LIBRUBYARG_STATIC='-l$(RUBY_SO_NAME)-static' +LIBRUBYARG='$(LIBRUBYARG_STATIC)' +SOLIBS= + +case "$target_os" in + cygwin*|mingw*|beos*|haiku*|openstep*|nextstep*|rhapsody*|darwin*|os2-emx*) + : ${DLDLIBS=""} + ;; + *) + DLDLIBS="$DLDLIBS -lc" + ;; +esac + +RUBY_SO_NAME='$(RUBY_INSTALL_NAME)' +LIBRUBY_LDSHARED=$LDSHARED +LIBRUBY_DLDFLAGS=$DLDFLAGS +LIBRUBY_SO='lib$(RUBY_SO_NAME).so.$(MAJOR).$(MINOR).$(TEENY)' +LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).so' +ENABLE_SHARED=no + +AC_ARG_ENABLE(shared, + [ --enable-shared build a shared library for Ruby. ], + [enable_shared=$enableval]) +LIBRUBYARG_SHARED='-l$(RUBY_SO_NAME)' +if test "$enable_shared" = 'yes'; then + LIBRUBY='$(LIBRUBY_SO)' + LIBRUBYARG='$(LIBRUBYARG_SHARED)' + CFLAGS="$CFLAGS $CCDLFLAGS" + ENABLE_SHARED=yes + if test "$rb_cv_binary_elf" = yes; then + SOLIBS='$(LIBS)' + fi + case "$target_os" in + sunos4*) + LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).so.$(MAJOR).$(MINOR) lib$(RUBY_SO_NAME).so' + ;; + linux* | gnu* | k*bsd*-gnu | atheos*) + LIBRUBY_DLDFLAGS='-Wl,-soname,lib$(RUBY_SO_NAME).so.$(MAJOR).$(MINOR)' + LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).so.$(MAJOR).$(MINOR) lib$(RUBY_SO_NAME).so' + ;; + freebsd*|dragonfly*) + SOLIBS='$(LIBS)' + LIBRUBY_SO='lib$(RUBY_SO_NAME).so.$(MAJOR)$(MINOR)' + if test "$rb_cv_binary_elf" != "yes" ; then + LIBRUBY_SO="$LIBRUBY_SO.\$(TEENY)" + LIBRUBY_ALIASES='' + fi + ;; + netbsd*) + SOLIBS='$(LIBS)' + LIBRUBY_SO='lib$(RUBY_SO_NAME).so.$(MAJOR)$(MINOR).$(TEENY)' + LIBRUBY_DLDFLAGS='-Wl,-soname,lib$(RUBY_SO_NAME).so.$(MAJOR)$(MINOR)' + if test "$rb_cv_binary_elf" = yes; then # ELF platforms + LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).so.$(MAJOR)$(MINOR) lib$(RUBY_SO_NAME).so' + else # a.out platforms + LIBRUBY_ALIASES="" + fi + ;; + openbsd*) + SOLIBS='$(LIBS)' + LIBRUBY_SO='lib$(RUBY_INSTALL_NAME).so.$(MAJOR).'`expr ${MINOR} \* 10 + ${TEENY}` + ;; + solaris*) + SOLIBS='$(LIBS)' + LIBRUBY_SO='lib$(RUBY_SO_NAME).so.$(MAJOR)' + LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).so.$(MAJOR).$(MINOR).$(TEENY) lib$(RUBY_SO_NAME).so' + if test "$GCC" = yes; then + LIBRUBY_DLDFLAGS="$DLDFLAGS "'-Wl,-h,$(@F)' + fi + XLDFLAGS="$XLDFLAGS "'-R${libdir}' + ;; + hpux*) + XLDFLAGS="$XLDFLAGS "'-Wl,+s,+b,$(libdir)' + LIBRUBY_SO='lib$(RUBY_SO_NAME).sl.$(MAJOR).$(MINOR).$(TEENY)' + LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).sl.$(MAJOR).$(MINOR) lib$(RUBY_SO_NAME).sl' + ;; + aix*) + LIBRUBY_DLDFLAGS="${linker_flag}-bnoentry $XLDFLAGS" + LIBRUBYARG_SHARED='-L${libdir} -l${RUBY_SO_NAME}' + SOLIBS='-lm -lc' + ;; + beos*) + case "$target_cpu" in + powerpc*) + LIBRUBY_DLDFLAGS='-f ruby.exp -lnet -lbe -lroot glue-noinit.a init_term_dyn.o start_dyn.o' + ;; + esac + ;; + darwin*) + LIBRUBY_SO='lib$(RUBY_SO_NAME).$(MAJOR).$(MINOR).$(TEENY).dylib' + LIBRUBY_LDSHARED='cc -dynamiclib -undefined suppress -flat_namespace' + LIBRUBY_DLDFLAGS='-install_name $(libdir)/lib$(RUBY_SO_NAME).dylib -current_version $(MAJOR).$(MINOR).$(TEENY) -compatibility_version $(ruby_version)' + LIBRUBY_ALIASES='lib$(RUBY_SO_NAME).$(MAJOR).$(MINOR).dylib lib$(RUBY_SO_NAME).dylib' + ;; + interix*) + LIBRUBYARG_SHARED='-L. -L${libdir} -l$(RUBY_SO_NAME)' + ;; + *) + ;; + esac +fi +if test "$enable_rpath" = yes; then + LIBRUBY_RPATHFLAGS="${linker_flag}-R ${linker_flag}\$(libdir) -L\$(libdir)" + LIBRUBYARG_SHARED="$LIBRUBY_RPATHFLAGS $LIBRUBYARG_SHARED" + LIBRUBYARG_STATIC="$LIBRUBY_RPATHFLAGS $LIBRUBYARG_STATIC" +fi + +LDFLAGS="-L. $LDFLAGS" +AC_SUBST(ARCHFILE) + +dnl build rdoc index if requested +RDOCTARGET="" +AC_ARG_ENABLE(install-doc, + [ --disable-install-doc do not install rdoc indexes during install ], + [install_doc=$enableval], [install_doc=yes]) +if test "$install_doc" != no; then + RDOCTARGET="install-doc" +fi +AC_SUBST(RDOCTARGET) + +if test "$rb_with_pthread" = "yes"; then + THREAD_MODEL=pthread +fi +MINIDLNOBJ=dln.o +case "$target_os" in + linux*) + ;; + netbsd*) + CFLAGS="$CFLAGS -pipe" + ;; + nextstep*|openstep*) + # The -fno-common is needed if we wish to embed the Ruby interpreter + # into a plugin module of some project (as opposed to embedding it + # within the project's application). The -I/usr/local/include is + # needed because CPP as discovered by configure (cc -E -traditional) + # fails to consult /usr/local/include by default. This causes + # mkmf.rb's have_header() to fail if the desired resource happens to be + # installed in the /usr/local tree. + CFLAGS="$CFLAGS -pipe -fno-common" + CPPFLAGS="$CPPFLAGS -I/usr/local/include" + ;; + rhapsody*) + CFLAGS="$CFLAGS -pipe -no-precomp -fno-common" + ;; + darwin*) + CFLAGS="$CFLAGS -pipe -fno-common" + MINIDLNOBJ=dmydln.o + ;; + os2-emx) + CFLAGS="$CFLAGS -DOS2" + LIBRUBY_A=`echo $LIBRUBY_A | sed 's/^lib//'` + LIBRUBY_SO=`echo $LIBRUBY_SO | sed 's/^lib//'` + LIBRUBY_ALIASES=`for i in $LIBRUBY_ALIASES; do echo "$i"; done | sed 's/^lib//'` + ;; + osf*) + if test "$GCC" != "yes" ; then + # compile something small: taint.c is fine for this. + # the main point is the '-v' flag of 'cc'. + case "`cc -v -I. -c main.c -o /tmp/main.o 2>&1`" in + */gemc_cc*) # we have the new DEC GEM CC + CFLAGS="$CFLAGS -oldc" + ;; + *) # we have the old MIPS CC + ;; + esac + # cleanup + rm -f /tmp/main.o + CFLAGS="$CFLAGS -std" + fi + ;; + beos*) + case "$target_cpu" in + powerpc*) + CFLAGS="$CFLAGS -relax_pointers" + ;; + esac + CPPFLAGS="$CPPFLAGS -I/boot/home/config/include" + ;; + cygwin*|mingw*) + LIBRUBY_DLDFLAGS="${DLDFLAGS}"' -Wl,--out-implib=$(LIBRUBY)' + case "$target_os" in + cygwin*) + if test x"$enable_shared" = xyes; then + LIBRUBY_SO='cyg$(RUBY_SO_NAME)'${MAJOR}${MINOR}${TEENY}.dll + LIBRUBY_DLDFLAGS="${LIBRUBY_DLDFLAGS}"' $(RUBYDEF)' + fi + ;; + mingw*) + RUBY_SO_NAME=${rb_cv_msvcrt}-'$(RUBY_INSTALL_NAME)'${MAJOR}${MINOR}${TEENY} + if test x"$enable_shared" = xyes; then + LIBRUBY_SO='$(RUBY_SO_NAME)'.dll + LIBRUBY_DLDFLAGS="${LIBRUBY_DLDFLAGS}"' $(RUBYDEF)' + fi + AC_LIBOBJ([win32]) + COMMON_LIBS=m +# COMMON_MACROS="WIN32_LEAN_AND_MEAN=" + COMMON_HEADERS="winsock2.h windows.h" + THREAD_MODEL=win32 + ;; + esac + LIBRUBY_ALIASES='' + FIRSTMAKEFILE=GNUmakefile:cygwin/GNUmakefile.in + SOLIBS='$(LIBS)' + if test x"$enable_shared" = xyes; then + LIBRUBY='lib$(RUBY_SO_NAME).dll.a' + else + LIBRUBY_SO=dummy + LIBRUBY='lib$(RUBY_SO_NAME).a' + LIBRUBYARG='-l$(RUBY_SO_NAME)' + fi + MINIDLNOBJ=dmydln.o + ;; + hpux*) + case "$YACC" in + *yacc*) + XCFLAGS="$XCFLAGS -DYYMAXDEPTH=300" + YACC="$YACC -Nl40000 -Nm40000" + ;; + esac + MINIDLNOBJ=dmydln.o + ;; + *msdosdjgpp*) + FIRSTMAKEFILE=GNUmakefile:djgpp/GNUmakefile.in + ;; + *) + ;; +esac +MINIOBJS="$MINIDLNOBJ" + +case "$FIRSTMAKEFILE" in +dnl ( +*GNUmakefile:*) + gnumake=yes + ;; +dnl ( +*) + AC_MSG_CHECKING([for if ${MAKE-make} is GNU make]) + mkdir conftest.dir + echo "all:; @echo yes" > conftest.dir/GNUmakefile + echo "all:; @echo no" > conftest.dir/Makefile + gnumake=`(cd conftest.dir; ${MAKE-make})` + rm -fr conftest.dir + case "$gnumake" in + dnl ( + *yes*) + echo "include Makefile" > GNUmakefile + echo "-include uncommon.mk" >> GNUmakefile + gnumake=yes;; + dnl ( + *) + gnumake=no;; + esac + AC_MSG_RESULT($gnumake) + ;; +esac + +CPPFLAGS="$CPPFLAGS "'$(DEFS)' +test -z "$CPPFLAGS" || CPPFLAGS="$CPPFLAGS "; CPPFLAGS="$CPPFLAGS"'${cppflags}' +cflagspat=`eval echo '"'"${cflags}"'"' | sed 's/[][|.*]/\\&/g'` +CFLAGS=`echo "$CFLAGS" | sed "s|$cflagspat"'|${cflags}|'` +cxxflagspat=`eval echo '"'"${cxxflags}"'"' | sed 's/[][|.*]/\\&/g'` +CXXFLAGS=`echo "$CXXFLAGS" | sed "s|$cxxflagspat"'|${cxxflags}|'` +AC_SUBST(cppflags, [])dnl +AC_SUBST(cflags, ['${optflags} ${debugflags} ${warnflags}'])dnl +AC_SUBST(optflags)dnl +AC_SUBST(debugflags)dnl +AC_SUBST(warnflags)dnl +AC_SUBST(XCFLAGS)dnl +AC_SUBST(XLDFLAGS)dnl +AC_SUBST(LIBRUBY_LDSHARED) +AC_SUBST(LIBRUBY_DLDFLAGS) +AC_SUBST(RUBY_INSTALL_NAME) +AC_SUBST(rubyw_install_name) +AC_SUBST(RUBYW_INSTALL_NAME) +AC_SUBST(RUBY_SO_NAME) +AC_SUBST(LIBRUBY_A) +AC_SUBST(LIBRUBY_SO) +AC_SUBST(LIBRUBY_ALIASES) +AC_SUBST(LIBRUBY) +AC_SUBST(LIBRUBYARG) +AC_SUBST(LIBRUBYARG_STATIC) +AC_SUBST(LIBRUBYARG_SHARED) +AC_SUBST(SOLIBS) +AC_SUBST(DLDLIBS) +AC_SUBST(ENABLE_SHARED) +AC_SUBST(MAINLIBS) +AC_SUBST(COMMON_LIBS) +AC_SUBST(COMMON_MACROS) +AC_SUBST(COMMON_HEADERS) +AC_SUBST(EXPORT_PREFIX) +AC_SUBST(MINIOBJS) +AC_SUBST(THREAD_MODEL) + +MAKEFILES="Makefile `echo $FIRSTMAKEFILE | sed 's/:.*//'`" +MAKEFILES="`echo $MAKEFILES`" +AC_SUBST(MAKEFILES) + +ri_prefix= +test "$program_prefix" != NONE && + ri_prefix=$program_prefix + +ri_suffix= +test "$program_suffix" != NONE && + ri_suffix=$program_suffix + +RUBY_INSTALL_NAME="${ri_prefix}ruby${ri_suffix}" +case "$target_os" in + cygwin*|mingw*) + RUBYW_INSTALL_NAME="${ri_prefix}rubyw${ri_suffix}" + rubyw_install_name="$RUBYW_INSTALL_NAME" + ;; +esac +RUBY_LIB_PREFIX=`eval echo \\"${libdir}/${RUBY_INSTALL_NAME}\\"` + +AC_ARG_WITH(ruby-version, + [ --with-ruby-version=STR ruby version string for version specific directories [[full]] (full|minor|STR)], + [ruby_version=$withval], + [ruby_version=full]) +case "$ruby_version" in + full) + ruby_version="${MAJOR}.${MINOR}.${TEENY}" + ;; + minor) + ruby_version="${MAJOR}.${MINOR}" + ;; +esac + +AC_ARG_WITH(sitedir, + [ --with-sitedir=DIR site libraries in DIR [[LIBDIR/ruby/site_ruby]]], + [sitedir=$withval], + [sitedir='${libdir}/${RUBY_INSTALL_NAME}/site_ruby']) +SITE_DIR=`eval echo \\"${sitedir}\\"` + +AC_ARG_WITH(vendordir, + [ --with-vendordir=DIR vendor libraries in DIR [[LIBDIR/ruby/vendor_ruby]]], + [vendordir=$withval], + [vendordir='${libdir}/${RUBY_INSTALL_NAME}/vendor_ruby']) +VENDOR_DIR=`eval echo \\"${vendordir}\\"` + +case "$target_os" in + cygwin*|mingw*|*djgpp*|os2-emx*) + RUBY_LIB_PREFIX="`eval echo "$RUBY_LIB_PREFIX" | sed 's|^NONE/|/|;s|^'"$prefix"'/|/|'`" + RUBY_SITE_LIB_PATH="`eval echo "$SITE_DIR" | sed 's|^NONE/|/|;s|^'"$prefix"'/|/|'`" + RUBY_VENDOR_LIB_PATH="`eval echo "$VENDOR_DIR" | sed 's|^NONE/|/|;s|^'"$prefix"'/|/|'`" + ;; + *) + RUBY_LIB_PREFIX="`eval echo \\"$RUBY_LIB_PREFIX\\" | sed 's|^NONE/|'"$prefix"'/|'`" + RUBY_SITE_LIB_PATH="`eval echo \\"$SITE_DIR\\" | sed 's|^NONE/|'"$prefix"'/|'`" + RUBY_VENDOR_LIB_PATH="`eval echo \\"$VENDOR_DIR\\" | sed 's|^NONE/|'"$prefix"'/|'`" + ;; +esac +RUBY_LIB_PATH="${RUBY_LIB_PREFIX}/${ruby_version}" +RUBY_SITE_LIB_PATH2="${RUBY_SITE_LIB_PATH}/${ruby_version}" +RUBY_VENDOR_LIB_PATH2="${RUBY_VENDOR_LIB_PATH}/${ruby_version}" + +AC_DEFINE_UNQUOTED(RUBY_LIB, "${RUBY_LIB_PATH}") +AC_DEFINE_UNQUOTED(RUBY_SITE_LIB, "${RUBY_SITE_LIB_PATH}") +AC_DEFINE_UNQUOTED(RUBY_SITE_LIB2, "${RUBY_SITE_LIB_PATH2}") +AC_DEFINE_UNQUOTED(RUBY_VENDOR_LIB, "${RUBY_VENDOR_LIB_PATH}") +AC_DEFINE_UNQUOTED(RUBY_VENDOR_LIB2, "${RUBY_VENDOR_LIB_PATH2}") + +AC_SUBST(arch)dnl +AC_SUBST(sitearch)dnl +AC_SUBST(ruby_version)dnl +AC_SUBST(sitedir)dnl +AC_SUBST(vendordir)dnl + +configure_args=$ac_configure_args +AC_SUBST(configure_args)dnl + +if test "$fat_binary" != no ; then + arch="fat-${target_os}" + + AC_DEFINE_UNQUOTED(RUBY_THIN_ARCHLIB, + "${RUBY_LIB_PATH}/" __ARCHITECTURE__ "-${target_os}") + + AC_DEFINE_UNQUOTED(RUBY_SITE_THIN_ARCHLIB, + "${RUBY_SITE_LIB_PATH}/" __ARCHITECTURE__ "-${target_os}") + AC_DEFINE_UNQUOTED(RUBY_VENDOR_THIN_ARCHLIB, + "${RUBY_VENDOR_LIB_PATH}/" __ARCHITECTURE__ "-${target_os}") + AC_DEFINE_UNQUOTED(RUBY_PLATFORM, __ARCHITECTURE__ "-${target_os}") +else + arch="${target_cpu}-${target_os}" + AC_DEFINE_UNQUOTED(RUBY_PLATFORM, "${arch}") +fi + +case "$target_os" in + mingw*) sitearch="$target_cpu-$rb_cv_msvcrt" ;; + *) sitearch="${arch}" ;; +esac + +AC_DEFINE_UNQUOTED(RUBY_ARCHLIB, "${RUBY_LIB_PATH}/${arch}") +AC_DEFINE_UNQUOTED(RUBY_SITE_ARCHLIB, "${RUBY_SITE_LIB_PATH2}/${sitearch}") +AC_DEFINE_UNQUOTED(RUBY_VENDOR_ARCHLIB, "${RUBY_VENDOR_LIB_PATH2}/${sitearch}") + +AC_ARG_WITH(search-path, + [ --with-search-path=DIR specify the additional search path], + [search_path=$withval]) +if test "$search_path" != ""; then + AC_DEFINE_UNQUOTED(RUBY_SEARCH_PATH,"$search_path") +fi + +AC_ARG_WITH(rubyhdrdir, + AS_HELP_STRING([--with-rubyhdrdir=DIR], [core headers in DIR [[INCLUDEDIR/RUBY_BASE_NAME-RUBY_VERSION]]]), + [rubyhdrdir=$withval], + [rubyhdrdir='${includedir}/${RUBY_INSTALL_NAME}-${ruby_version}']) + +AC_ARG_WITH(sitehdrdir, + [ --with-site-hdrdir=DIR core headers in DIR [INCLUDEDIR]], + [sitehdrdir=$withval], + [sitehdrdir='${rubyhdrdir}/site_ruby']) + +AC_ARG_WITH(vendorhdrdir, + [ --with-vendor-hdrdir=DIR core headers in DIR [INCLUDEDIR]], + [vendorhdrdir=$withval], + [vendorhdrdir='${rubyhdrdir}/vendor_ruby']) + +AC_SUBST(rubyhdrdir)dnl +AC_SUBST(sitehdrdir)dnl +AC_SUBST(vendorhdrdir)dnl + +AC_ARG_WITH(mantype, + [ --with-mantype=TYPE specify man page type; TYPE is one of man and doc], + [ + case "$withval" in + man|doc) + MANTYPE=$withval + ;; + *) + AC_MSG_ERROR(invalid man type: $withval) + ;; + esac + ]) +if test -z "$MANTYPE"; then + AC_PATH_PROGS(NROFF, nroff awf, /bin/false, "/usr/bin:/usr/ucb") + if ${NROFF} -mdoc ${srcdir}/man/ruby.1 >/dev/null 2>&1; then + MANTYPE=doc + else + MANTYPE=man + fi +fi +AC_SUBST(MANTYPE) + +arch_hdrdir="${EXTOUT}/include/${arch}/ruby" +$MAKEDIRS "${arch_hdrdir}" +config_h="${arch_hdrdir}/config.h" +if test -f "${config_h}" && tr -d '\015' < confdefs.h | cmp -s "${config_h}" -; then + echo "config.h unchanged" +else + echo "creating config.h" + tr -d '\015' < confdefs.h > "${config_h}" +fi +tr -d '\015' < largefile.h > confdefs.h +rm largefile.h + +BUILTIN_ENCS=["`sed -n -e '/^BUILTIN_ENCS[ ]*=/{' \ + -e s/// -e :l -e '/\\\\$/N' -e 's/\\\\\\n/ /' -e 't l' -e p \ + -e '}' "${srcdir}/enc/Makefile.in"`"] +BUILTIN_ENCOBJS= +for e in $BUILTIN_ENCS; do BUILTIN_ENCOBJS="$BUILTIN_ENCOBJS `basename $e .c`"'.$(OBJEXT)'; done +AC_SUBST(BUILTIN_ENCOBJS) + +BUILTIN_TRANSES=["`sed -n -e '/^BUILTIN_TRANSES[ ]*=/{' \ + -e s/// -e :l -e '/\\\\$/N' -e 's/\\\\\\n/ /' -e 't l' -e p \ + -e '}' "${srcdir}/enc/Makefile.in"`"] +BUILTIN_TRANSSRCS= +BUILTIN_TRANSOBJS= +for e in $BUILTIN_TRANSES; do + BUILTIN_TRANSSRCS="$BUILTIN_TRANSSRCS `basename $e .trans`"'.c'; + BUILTIN_TRANSOBJS="$BUILTIN_TRANSOBJS `basename $e .trans`"'.$(OBJEXT)'; +done +AC_SUBST(BUILTIN_TRANSSRCS) +AC_SUBST(BUILTIN_TRANSOBJS) + +AC_CONFIG_FILES($FIRSTMAKEFILE) +AC_CONFIG_FILES(Makefile, [{ + if test -d "$srcdir/.svn"; then + VCS='svn' + VCSUP='$(VCS) up $(SVNUPOPTIONS)' + elif test -d "$srcdir/.git/svn"; then + VCS='git svn' + VCSUP='$(VCS) rebase $(GITSVNREBASEOPTIONS)' + elif test -d "$srcdir/.git"; then + VCS='git' + VCSUP='$(VCS) pull $(GITPULLOPTIONS)' + else + VCS='echo cannot' + VCSUP='$(VCS)' + fi + sed '/^MISSING/s/\$U\././g;/^VCS *=/s#@VCS@#'"$VCS"'#;/^VCSUP *=/s#@VCSUP@#'"$VCSUP"'#' Makefile + echo; test x"$EXEEXT" = x || echo 'miniruby: miniruby$(EXEEXT)' + test "$RUBY_INSTALL_NAME$EXEEXT" = ruby || echo 'ruby: $(PROGRAM);' + if test "$gnumake" != yes; then + echo ['$(MKFILES): $(srcdir)/common.mk'] + sed ['s/{\$([^(){}]*)[^{}]*}//g'] ${srcdir}/common.mk + else + echo 'distclean-local::; @$(RM) GNUmakefile uncommon.mk' + fi + } >> confmk$$.tmp && mv -f confmk$$.tmp Makefile], +[RUBY_INSTALL_NAME=$RUBY_INSTALL_NAME EXEEXT=$EXEEXT gnumake=$gnumake]) +AC_OUTPUT diff --git a/cont.c b/cont.c new file mode 100644 index 0000000..966600f --- /dev/null +++ b/cont.c @@ -0,0 +1,1032 @@ +/********************************************************************** + + cont.c - + + $Author: yugui $ + created at: Thu May 23 09:03:43 2007 + + Copyright (C) 2007 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "vm_core.h" +#include "gc.h" +#include "eval_intern.h" + +#define CAPTURE_JUST_VALID_VM_STACK 1 + +enum context_type { + CONTINUATION_CONTEXT = 0, + FIBER_CONTEXT = 1, + ROOT_FIBER_CONTEXT = 2 +}; + +typedef struct rb_context_struct { + enum context_type type; + VALUE self; + int argc; + VALUE value; + VALUE *vm_stack; +#ifdef CAPTURE_JUST_VALID_VM_STACK + int vm_stack_slen; /* length of stack (head of th->stack) */ + int vm_stack_clen; /* length of control frames (tail of th->stack) */ +#endif + VALUE *machine_stack; + VALUE *machine_stack_src; +#ifdef __ia64 + VALUE *machine_register_stack; + VALUE *machine_register_stack_src; + int machine_register_stack_size; +#endif + rb_thread_t saved_thread; + rb_jmpbuf_t jmpbuf; + int machine_stack_size; +} rb_context_t; + +enum fiber_status { + CREATED, + RUNNING, + TERMINATED +}; + +typedef struct rb_fiber_struct { + rb_context_t cont; + VALUE prev; + enum fiber_status status; + struct rb_fiber_struct *prev_fiber; + struct rb_fiber_struct *next_fiber; +} rb_fiber_t; + +static VALUE rb_cContinuation; +static VALUE rb_cFiber; +static VALUE rb_eFiberError; + +#define GetContPtr(obj, ptr) \ + Data_Get_Struct(obj, rb_context_t, ptr) + +#define GetFiberPtr(obj, ptr) do {\ + ptr = (rb_fiber_t*)DATA_PTR(obj);\ + if (!ptr) rb_raise(rb_eFiberError, "uninitialized fiber");\ +} while(0) + +NOINLINE(static VALUE cont_capture(volatile int *stat)); + +void rb_thread_mark(rb_thread_t *th); + +static void +cont_mark(void *ptr) +{ + RUBY_MARK_ENTER("cont"); + if (ptr) { + rb_context_t *cont = ptr; + rb_gc_mark(cont->value); + rb_thread_mark(&cont->saved_thread); + + if (cont->vm_stack) { +#ifdef CAPTURE_JUST_VALID_VM_STACK + rb_gc_mark_locations(cont->vm_stack, + cont->vm_stack + cont->vm_stack_slen + cont->vm_stack_clen); +#else + rb_gc_mark_localtion(cont->vm_stack, + cont->vm_stack, cont->saved_thread.stack_size); +#endif + } + + if (cont->machine_stack) { + rb_gc_mark_locations(cont->machine_stack, + cont->machine_stack + cont->machine_stack_size); + } +#ifdef __ia64 + if (cont->machine_register_stack) { + rb_gc_mark_locations(cont->machine_register_stack, + cont->machine_register_stack + cont->machine_register_stack_size); + } +#endif + } + RUBY_MARK_LEAVE("cont"); +} + +static void +cont_free(void *ptr) +{ + RUBY_FREE_ENTER("cont"); + if (ptr) { + rb_context_t *cont = ptr; + RUBY_FREE_UNLESS_NULL(cont->saved_thread.stack); fflush(stdout); + RUBY_FREE_UNLESS_NULL(cont->machine_stack); +#ifdef __ia64 + RUBY_FREE_UNLESS_NULL(cont->machine_register_stack); +#endif + RUBY_FREE_UNLESS_NULL(cont->vm_stack); + + /* free rb_cont_t or rb_fiber_t */ + ruby_xfree(ptr); + } + RUBY_FREE_LEAVE("cont"); +} + +static void +fiber_mark(void *ptr) +{ + RUBY_MARK_ENTER("cont"); + if (ptr) { + rb_fiber_t *fib = ptr; + rb_gc_mark(fib->prev); + cont_mark(&fib->cont); + } + RUBY_MARK_LEAVE("cont"); +} + +static void +fiber_link_join(rb_fiber_t *fib) +{ + VALUE current_fibval = rb_fiber_current(); + rb_fiber_t *current_fib; + GetFiberPtr(current_fibval, current_fib); + + /* join fiber link */ + fib->next_fiber = current_fib->next_fiber; + fib->prev_fiber = current_fib; + current_fib->next_fiber->prev_fiber = fib; + current_fib->next_fiber = fib; +} + +static void +fiber_link_remove(rb_fiber_t *fib) +{ + fib->prev_fiber->next_fiber = fib->next_fiber; + fib->next_fiber->prev_fiber = fib->prev_fiber; +} + +static void +fiber_free(void *ptr) +{ + RUBY_FREE_ENTER("fiber"); + if (ptr) { + rb_fiber_t *fib = ptr; + + if (fib->cont.type != ROOT_FIBER_CONTEXT) { + st_free_table(fib->cont.saved_thread.local_storage); + } + fiber_link_remove(fib); + + cont_free(&fib->cont); + } + RUBY_FREE_LEAVE("fiber"); +} + +static void +cont_save_machine_stack(rb_thread_t *th, rb_context_t *cont) +{ + int size; + rb_thread_t *sth = &cont->saved_thread; + + SET_MACHINE_STACK_END(&th->machine_stack_end); +#ifdef __ia64 + th->machine_register_stack_end = rb_ia64_bsp(); +#endif + + if (th->machine_stack_start > th->machine_stack_end) { + size = cont->machine_stack_size = th->machine_stack_start - th->machine_stack_end; + cont->machine_stack_src = th->machine_stack_end; + } + else { + size = cont->machine_stack_size = th->machine_stack_end - th->machine_stack_start; + cont->machine_stack_src = th->machine_stack_start; + } + + if (cont->machine_stack) { + REALLOC_N(cont->machine_stack, VALUE, size); + } + else { + cont->machine_stack = ALLOC_N(VALUE, size); + } + + FLUSH_REGISTER_WINDOWS; + MEMCPY(cont->machine_stack, cont->machine_stack_src, VALUE, size); + +#ifdef __ia64 + rb_ia64_flushrs(); + size = cont->machine_register_stack_size = th->machine_register_stack_end - th->machine_register_stack_start; + cont->machine_register_stack_src = th->machine_register_stack_start; + if (cont->machine_register_stack) { + REALLOC_N(cont->machine_register_stack, VALUE, size); + } + else { + cont->machine_register_stack = ALLOC_N(VALUE, size); + } + + MEMCPY(cont->machine_register_stack, cont->machine_register_stack_src, VALUE, size); +#endif + + sth->machine_stack_start = sth->machine_stack_end = 0; +#ifdef __ia64 + sth->machine_register_stack_start = sth->machine_register_stack_end = 0; +#endif +} + +static void +cont_init(rb_context_t *cont) +{ + rb_thread_t *th = GET_THREAD(); + + /* save thread context */ + cont->saved_thread = *th; +} + +static rb_context_t * +cont_new(VALUE klass) +{ + rb_context_t *cont; + volatile VALUE contval; + + contval = Data_Make_Struct(klass, rb_context_t, cont_mark, cont_free, cont); + cont->self = contval; + cont_init(cont); + return cont; +} + +void rb_vm_stack_to_heap(rb_thread_t *th); + +static VALUE +cont_capture(volatile int *stat) +{ + rb_context_t *cont; + rb_thread_t *th = GET_THREAD(), *sth; + volatile VALUE contval; + + rb_vm_stack_to_heap(th); + cont = cont_new(rb_cContinuation); + contval = cont->self; + sth = &cont->saved_thread; + +#ifdef CAPTURE_JUST_VALID_VM_STACK + cont->vm_stack_slen = th->cfp->sp + th->mark_stack_len - th->stack; + cont->vm_stack_clen = th->stack + th->stack_size - (VALUE*)th->cfp; + cont->vm_stack = ALLOC_N(VALUE, cont->vm_stack_slen + cont->vm_stack_clen); + MEMCPY(cont->vm_stack, th->stack, VALUE, cont->vm_stack_slen); + MEMCPY(cont->vm_stack + cont->vm_stack_slen, (VALUE*)th->cfp, VALUE, cont->vm_stack_clen); +#else + cont->vm_stack = ALLOC_N(VALUE, th->stack_size); + MEMCPY(cont->vm_stack, th->stack, VALUE, th->stack_size); +#endif + sth->stack = 0; + + cont_save_machine_stack(th, cont); + + if (ruby_setjmp(cont->jmpbuf)) { + VALUE value; + + value = cont->value; + if (cont->argc == -1) rb_exc_raise(value); + cont->value = Qnil; + *stat = 1; + return value; + } + else { + *stat = 0; + return cont->self; + } +} + +NOINLINE(NORETURN(static void cont_restore_1(rb_context_t *))); + +static void +cont_restore_1(rb_context_t *cont) +{ + rb_thread_t *th = GET_THREAD(), *sth = &cont->saved_thread; + + /* restore thread context */ + if (cont->type == CONTINUATION_CONTEXT) { + /* continuation */ + VALUE fib; + + th->fiber = sth->fiber; + fib = th->fiber ? th->fiber : th->root_fiber; + + if (fib) { + rb_context_t *fcont; + GetContPtr(fib, fcont); + th->stack_size = fcont->saved_thread.stack_size; + th->stack = fcont->saved_thread.stack; + } +#ifdef CAPTURE_JUST_VALID_VM_STACK + MEMCPY(th->stack, cont->vm_stack, VALUE, cont->vm_stack_slen); + MEMCPY(th->stack + sth->stack_size - cont->vm_stack_clen, + cont->vm_stack + cont->vm_stack_slen, VALUE, cont->vm_stack_clen); +#else + MEMCPY(th->stack, cont->vm_stack, VALUE, sth->stack_size); +#endif + } + else { + /* fiber */ + th->stack = sth->stack; + th->stack_size = sth->stack_size; + th->local_storage = sth->local_storage; + th->fiber = cont->self; + } + + th->cfp = sth->cfp; + th->safe_level = sth->safe_level; + th->raised_flag = sth->raised_flag; + th->state = sth->state; + th->status = sth->status; + th->tag = sth->tag; + th->trap_tag = sth->trap_tag; + th->errinfo = sth->errinfo; + th->first_proc = sth->first_proc; + + /* restore machine stack */ +#ifdef _M_AMD64 + { + /* workaround for x64 SEH */ + jmp_buf buf; + setjmp(buf); + ((_JUMP_BUFFER*)(&cont->jmpbuf))->Frame = + ((_JUMP_BUFFER*)(&buf))->Frame; + } +#endif + if (cont->machine_stack_src) { + FLUSH_REGISTER_WINDOWS; + MEMCPY(cont->machine_stack_src, cont->machine_stack, + VALUE, cont->machine_stack_size); + } + +#ifdef __ia64 + if (cont->machine_register_stack_src) { + MEMCPY(cont->machine_register_stack_src, cont->machine_register_stack, + VALUE, cont->machine_register_stack_size); + } +#endif + + ruby_longjmp(cont->jmpbuf, 1); +} + +NORETURN(NOINLINE(static void cont_restore_0(rb_context_t *, VALUE *))); + +#ifdef __ia64 +#define C(a) rse_##a##0, rse_##a##1, rse_##a##2, rse_##a##3, rse_##a##4 +#define E(a) rse_##a##0= rse_##a##1= rse_##a##2= rse_##a##3= rse_##a##4 +static volatile int C(a), C(b), C(c), C(d), C(e); +static volatile int C(f), C(g), C(h), C(i), C(j); +static volatile int C(k), C(l), C(m), C(n), C(o); +static volatile int C(p), C(q), C(r), C(s), C(t); +int rb_dummy_false = 0; +NORETURN(NOINLINE(static void register_stack_extend(rb_context_t *, VALUE *))); +static void +register_stack_extend(rb_context_t *cont, VALUE *curr_bsp) +{ + if (rb_dummy_false) { + /* use registers as much as possible */ + E(a) = E(b) = E(c) = E(d) = E(e) = + E(f) = E(g) = E(h) = E(i) = E(j) = + E(k) = E(l) = E(m) = E(n) = E(o) = + E(p) = E(q) = E(r) = E(s) = E(t) = 0; + E(a) = E(b) = E(c) = E(d) = E(e) = + E(f) = E(g) = E(h) = E(i) = E(j) = + E(k) = E(l) = E(m) = E(n) = E(o) = + E(p) = E(q) = E(r) = E(s) = E(t) = 0; + } + if (curr_bsp < cont->machine_register_stack_src+cont->machine_register_stack_size) { + register_stack_extend(cont, (VALUE*)rb_ia64_bsp()); + } + cont_restore_1(cont); +} +#undef C +#undef E +#endif + +static void +cont_restore_0(rb_context_t *cont, VALUE *addr_in_prev_frame) +{ + if (cont->machine_stack_src) { +#define STACK_PAD_SIZE 1024 + VALUE space[STACK_PAD_SIZE]; + +#if STACK_GROW_DIRECTION < 0 /* downward */ + if (addr_in_prev_frame > cont->machine_stack_src) { + cont_restore_0(cont, &space[0]); + } +#elif STACK_GROW_DIRECTION > 0 /* upward */ + if (addr_in_prev_frame < cont->machine_stack_src + cont->machine_stack_size) { + cont_restore_0(cont, &space[STACK_PAD_SIZE-1]); + } +#else + if (addr_in_prev_frame > &space[0]) { + /* Stack grows downward */ + if (addr_in_prev_frame > cont->machine_stack_src) { + cont_restore_0(cont, &space[0]); + } + } + else { + /* Stack grows upward */ + if (addr_in_prev_frame < cont->machine_stack_src + cont->machine_stack_size) { + cont_restore_0(cont, &space[STACK_PAD_SIZE-1]); + } + } +#endif + } +#ifdef __ia64 + register_stack_extend(cont, (VALUE*)rb_ia64_bsp()); +#else + cont_restore_1(cont); +#endif +} + +/* + * Document-class: Continuation + * + * Continuation objects are generated by + * Kernel#callcc. They hold a return address and execution + * context, allowing a nonlocal return to the end of the + * callcc block from anywhere within a program. + * Continuations are somewhat analogous to a structured version of C's + * setjmp/longjmp (although they contain more state, so + * you might consider them closer to threads). + * + * For instance: + * + * arr = [ "Freddie", "Herbie", "Ron", "Max", "Ringo" ] + * callcc{|$cc|} + * puts(message = arr.shift) + * $cc.call unless message =~ /Max/ + * + * produces: + * + * Freddie + * Herbie + * Ron + * Max + * + * This (somewhat contrived) example allows the inner loop to abandon + * processing early: + * + * callcc {|cont| + * for i in 0..4 + * print "\n#{i}: " + * for j in i*5...(i+1)*5 + * cont.call() if j == 17 + * printf "%3d", j + * end + * end + * } + * print "\n" + * + * produces: + * + * 0: 0 1 2 3 4 + * 1: 5 6 7 8 9 + * 2: 10 11 12 13 14 + * 3: 15 16 + */ + +/* + * call-seq: + * callcc {|cont| block } => obj + * + * Generates a Continuation object, which it passes to the + * associated block. Performing a cont.call will + * cause the callcc to return (as will falling through the + * end of the block). The value returned by the callcc is + * the value of the block, or the value passed to + * cont.call. See class Continuation + * for more details. Also see Kernel::throw for + * an alternative mechanism for unwinding a call stack. + */ + +static VALUE +rb_callcc(VALUE self) +{ + volatile int called; + volatile VALUE val = cont_capture(&called); + + if (called) { + return val; + } + else { + return rb_yield(val); + } +} + +static VALUE +make_passing_arg(int argc, VALUE *argv) +{ + switch(argc) { + case 0: + return Qnil; + case 1: + return argv[0]; + default: + return rb_ary_new4(argc, argv); + } +} + +/* + * call-seq: + * cont.call(args, ...) + * cont[args, ...] + * + * Invokes the continuation. The program continues from the end of the + * callcc block. If no arguments are given, the original + * callcc returns nil. If one argument is + * given, callcc returns it. Otherwise, an array + * containing args is returned. + * + * callcc {|cont| cont.call } #=> nil + * callcc {|cont| cont.call 1 } #=> 1 + * callcc {|cont| cont.call 1, 2, 3 } #=> [1, 2, 3] + */ + +static VALUE +rb_cont_call(int argc, VALUE *argv, VALUE contval) +{ + rb_context_t *cont; + rb_thread_t *th = GET_THREAD(); + GetContPtr(contval, cont); + + if (cont->saved_thread.self != th->self) { + rb_raise(rb_eRuntimeError, "continuation called across threads"); + } + if (cont->saved_thread.trap_tag != th->trap_tag) { + rb_raise(rb_eRuntimeError, "continuation called across trap"); + } + if (cont->saved_thread.fiber) { + rb_context_t *fcont; + GetContPtr(cont->saved_thread.fiber, fcont); + + if (th->fiber != cont->saved_thread.fiber) { + rb_raise(rb_eRuntimeError, "continuation called across fiber"); + } + } + + cont->argc = argc; + cont->value = make_passing_arg(argc, argv); + + cont_restore_0(cont, &contval); + return Qnil; /* unreachable */ +} + +/*********/ +/* fiber */ +/*********/ + +/* + * Document-class: Fiber + * + * Fibers are primitives for implementing light weight cooperative + * concurrency in Ruby. Basically they are a means of creating code blocks + * that can be paused and resumed, much like threads. The main difference + * is that they are never preempted and that the scheduling must be done by + * the programmer and not the VM. + * + * As opposed to other stackless light weight concurrency models, each fiber + * comes with a small 4KB stack. This enables the fiber to be paused from deeply + * nested function calls within the fiber block. + * + * When a fiber is created it will not run automatically. Rather it must be + * be explicitly asked to run using the Fiber#resume method. + * The code running inside the fiber can give up control by calling + * Fiber.yield in which case it yields control back to caller + * (the caller of the Fiber#resume). + * + * Upon yielding or termination the Fiber returns the value of the last + * executed expression + * + * For instance: + * + * fiber = Fiber.new do + * Fiber.yield 1 + * 2 + * end + * + * puts fiber.resume + * puts fiber.resume + * puts fiber.resume + * + * produces + * + * 1 + * 2 + * FiberError: dead fiber called + * + * The Fiber#resume method accepts an arbitary number of + * parameters, if it is the first call to resume then they + * will be passed as block arguments. Otherwise they will be the return + * value of the call to Fiber.yield + * + * Example: + * + * fiber = Fiber.new do |first| + * second = Fiber.yield first + 2 + * end + * + * puts fiber.resume 10 + * puts fiber.resume 14 + * puts fiber.resume 18 + * + * produces + * + * 12 + * 14 + * FiberError: dead fiber called + * + */ + +#define FIBER_VM_STACK_SIZE (4 * 1024) + +static VALUE +fiber_alloc(VALUE klass) +{ + return Data_Wrap_Struct(klass, fiber_mark, fiber_free, 0); +} + +static rb_fiber_t* +fiber_t_alloc(VALUE fibval) +{ + rb_fiber_t *fib = ALLOC(rb_fiber_t); + + memset(fib, 0, sizeof(rb_fiber_t)); + fib->cont.self = fibval; + fib->cont.type = FIBER_CONTEXT; + cont_init(&fib->cont); + fib->prev = Qnil; + fib->status = CREATED; + + DATA_PTR(fibval) = fib; + + return fib; +} + +static VALUE +fiber_init(VALUE fibval, VALUE proc) +{ + rb_fiber_t *fib = fiber_t_alloc(fibval); + rb_context_t *cont = &fib->cont; + rb_thread_t *th = &cont->saved_thread; + + fiber_link_join(fib); + + /* initialize cont */ + cont->vm_stack = 0; + + th->stack = 0; + th->stack_size = FIBER_VM_STACK_SIZE; + th->stack = ALLOC_N(VALUE, th->stack_size); + + th->cfp = (void *)(th->stack + th->stack_size); + th->cfp--; + th->cfp->pc = 0; + th->cfp->sp = th->stack + 1; + th->cfp->bp = 0; + th->cfp->lfp = th->stack; + *th->cfp->lfp = 0; + th->cfp->dfp = th->stack; + th->cfp->self = Qnil; + th->cfp->flag = 0; + th->cfp->iseq = 0; + th->cfp->proc = 0; + th->cfp->block_iseq = 0; + th->tag = 0; + th->local_storage = st_init_numtable(); + + th->first_proc = proc; + + MEMCPY(&cont->jmpbuf, &th->root_jmpbuf, rb_jmpbuf_t, 1); + + return fibval; +} + +static VALUE +rb_fiber_init(VALUE fibval) +{ + return fiber_init(fibval, rb_block_proc()); +} + +VALUE +rb_fiber_new(VALUE (*func)(ANYARGS), VALUE obj) +{ + return fiber_init(fiber_alloc(rb_cFiber), rb_proc_new(func, obj)); +} + +static VALUE +return_fiber(void) +{ + rb_fiber_t *fib; + VALUE curr = rb_fiber_current(); + GetFiberPtr(curr, fib); + + if (fib->prev == Qnil) { + rb_thread_t *th = GET_THREAD(); + + if (th->root_fiber != curr) { + return th->root_fiber; + } + else { + rb_raise(rb_eFiberError, "can't yield from root fiber"); + } + } + else { + VALUE prev = fib->prev; + fib->prev = Qnil; + return prev; + } +} + +VALUE rb_fiber_transfer(VALUE fib, int argc, VALUE *argv); + +static void +rb_fiber_terminate(rb_fiber_t *fib) +{ + VALUE value = fib->cont.value; + fib->status = TERMINATED; + rb_fiber_transfer(return_fiber(), 1, &value); +} + +void +rb_fiber_start(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_fiber_t *fib; + rb_context_t *cont; + rb_proc_t *proc; + int state; + + GetFiberPtr(th->fiber, fib); + cont = &fib->cont; + + TH_PUSH_TAG(th); + if ((state = EXEC_TAG()) == 0) { + int argc; + VALUE *argv, args; + GetProcPtr(cont->saved_thread.first_proc, proc); + args = cont->value; + argv = (argc = cont->argc) > 1 ? RARRAY_PTR(args) : &args; + cont->value = Qnil; + th->errinfo = Qnil; + th->local_lfp = proc->block.lfp; + th->local_svar = Qnil; + + fib->status = RUNNING; + cont->value = rb_vm_invoke_proc(th, proc, proc->block.self, argc, argv, 0); + } + TH_POP_TAG(); + + if (state) { + if (TAG_RAISE) { + th->thrown_errinfo = th->errinfo; + } + else { + th->thrown_errinfo = + rb_vm_make_jump_tag_but_local_jump(state, th->errinfo); + } + RUBY_VM_SET_INTERRUPT(th); + } + + rb_fiber_terminate(fib); + rb_bug("rb_fiber_start: unreachable"); +} + +static rb_fiber_t * +root_fiber_alloc(rb_thread_t *th) +{ + rb_fiber_t *fib; + + /* no need to allocate vm stack */ + fib = fiber_t_alloc(fiber_alloc(rb_cFiber)); + fib->cont.type = ROOT_FIBER_CONTEXT; + fib->prev_fiber = fib->next_fiber = fib; + + return fib; +} + +VALUE +rb_fiber_current() +{ + rb_thread_t *th = GET_THREAD(); + if (th->fiber == 0) { + /* save root */ + rb_fiber_t *fib = root_fiber_alloc(th); + th->root_fiber = th->fiber = fib->cont.self; + } + return th->fiber; +} + +static VALUE +fiber_store(rb_fiber_t *next_fib) +{ + rb_thread_t *th = GET_THREAD(); + rb_fiber_t *fib; + + if (th->fiber) { + GetFiberPtr(th->fiber, fib); + fib->cont.saved_thread = *th; + } + else { + /* create current fiber */ + fib = root_fiber_alloc(th); + th->root_fiber = th->fiber = fib->cont.self; + } + + cont_save_machine_stack(th, &fib->cont); + + if (ruby_setjmp(fib->cont.jmpbuf)) { + /* restored */ + GetFiberPtr(th->fiber, fib); + if (fib->cont.argc == -1) rb_exc_raise(fib->cont.value); + return fib->cont.value; + } + else { + return Qundef; + } +} + +static inline VALUE +fiber_switch(VALUE fibval, int argc, VALUE *argv, int is_resume) +{ + VALUE value; + rb_fiber_t *fib; + rb_context_t *cont; + rb_thread_t *th = GET_THREAD(); + + GetFiberPtr(fibval, fib); + cont = &fib->cont; + + if (cont->saved_thread.self != th->self) { + rb_raise(rb_eFiberError, "fiber called across threads"); + } + else if (cont->saved_thread.trap_tag != th->trap_tag) { + rb_raise(rb_eFiberError, "fiber called across trap"); + } + else if (fib->status == TERMINATED) { + value = rb_exc_new2(rb_eFiberError, "dead fiber called"); + if (th->fiber != fibval) rb_exc_raise(value); + fibval = fib->prev; + if (NIL_P(fibval)) fibval = th->root_fiber; + GetFiberPtr(fibval, fib); + cont = &fib->cont; + cont->argc = -1; + cont->value = value; + cont_restore_0(cont, &value); + } + + if (is_resume) { + fib->prev = rb_fiber_current(); + } + + cont->argc = argc; + cont->value = make_passing_arg(argc, argv); + + if ((value = fiber_store(fib)) == Qundef) { + cont_restore_0(cont, &value); + rb_bug("rb_fiber_resume: unreachable"); + } + + RUBY_VM_CHECK_INTS(); + + return value; +} + +VALUE +rb_fiber_transfer(VALUE fib, int argc, VALUE *argv) +{ + return fiber_switch(fib, argc, argv, 0); +} + +VALUE +rb_fiber_resume(VALUE fibval, int argc, VALUE *argv) +{ + rb_fiber_t *fib; + GetFiberPtr(fibval, fib); + + if (fib->prev != Qnil) { + rb_raise(rb_eFiberError, "double resume"); + } + + return fiber_switch(fibval, argc, argv, 1); +} + +VALUE +rb_fiber_yield(int argc, VALUE *argv) +{ + return rb_fiber_transfer(return_fiber(), argc, argv); +} + +/* + * call-seq: + * fiber.alive? -> true or false + * + * Returns true if the fiber can still be resumed (or transferred to). + * After finishing execution of the fiber block this method will always + * return false. + */ +VALUE +rb_fiber_alive_p(VALUE fibval) +{ + rb_fiber_t *fib; + GetFiberPtr(fibval, fib); + return fib->status != TERMINATED; +} + +/* + * call-seq: + * fiber.resume(args, ...) -> obj + * + * Resumes the fiber from the point at which the last Fiber.yield + * was called, or starts running it if it is the first call to + * resume. Arguments passed to resume will be the value of + * the Fiber.yield expression or will be passed as block + * parameters to the fiber's block if this is the first resume. + * + * Alternatively, when resume is called it evaluates to the arguments passed + * to the next Fiber.yield statement inside the fiber's block + * or to the block value if it runs to completion without any + * Fiber.yield + */ +static VALUE +rb_fiber_m_resume(int argc, VALUE *argv, VALUE fib) +{ + return rb_fiber_resume(fib, argc, argv); +} + +/* + * call-seq: + * fiber.transfer(args, ...) -> obj + * + * Transfer control to another fiber, resuming it from where it last + * stopped or starting it if it was not resumed before. The calling + * fiber will be suspended much like in a call to Fiber.yield. + * + * The fiber which recieves the transfer call is treats it much like + * a resume call. Arguments passed to transfer are treated like those + * passed to resume. + * + * You cannot resume a fiber that transferred control to another one. + * This will cause a double resume error. You need to transfer control + * back to this fiber before it can yield and resume. + */ +static VALUE +rb_fiber_m_transfer(int argc, VALUE *argv, VALUE fib) +{ + return rb_fiber_transfer(fib, argc, argv); +} + +/* + * call-seq: + * Fiber.yield(args, ...) -> obj + * + * Yields control back to the context that resumed the fiber, passing + * along any arguments that were passed to it. The fiber will resume + * processing at this point when resume is called next. + * Any arguments passed to the next resume will be the + * value that this Fiber.yield expression evaluates to. + */ +static VALUE +rb_fiber_s_yield(int argc, VALUE *argv, VALUE klass) +{ + return rb_fiber_yield(argc, argv); +} + +/* + * call-seq: + * Fiber.current() -> fiber + * + * Returns the current fiber. You need to require 'fiber' + * before using this method. If you are not running in the context of + * a fiber this method will return the root fiber. + */ +static VALUE +rb_fiber_s_current(VALUE klass) +{ + return rb_fiber_current(); +} + +void +Init_Cont(void) +{ + rb_cFiber = rb_define_class("Fiber", rb_cObject); + rb_define_alloc_func(rb_cFiber, fiber_alloc); + rb_eFiberError = rb_define_class("FiberError", rb_eStandardError); + rb_define_singleton_method(rb_cFiber, "yield", rb_fiber_s_yield, -1); + rb_define_method(rb_cFiber, "initialize", rb_fiber_init, 0); + rb_define_method(rb_cFiber, "resume", rb_fiber_m_resume, -1); +} + +void +ruby_Init_Continuation_body(void) +{ + rb_cContinuation = rb_define_class("Continuation", rb_cObject); + rb_undef_alloc_func(rb_cContinuation); + rb_undef_method(CLASS_OF(rb_cContinuation), "new"); + rb_define_method(rb_cContinuation, "call", rb_cont_call, -1); + rb_define_method(rb_cContinuation, "[]", rb_cont_call, -1); + rb_define_global_function("callcc", rb_callcc, 0); +} + +void +ruby_Init_Fiber_as_Coroutine(void) +{ + rb_define_method(rb_cFiber, "transfer", rb_fiber_m_transfer, -1); + rb_define_method(rb_cFiber, "alive?", rb_fiber_alive_p, 0); + rb_define_singleton_method(rb_cFiber, "current", rb_fiber_s_current, 0); +} diff --git a/debug.c b/debug.c new file mode 100644 index 0000000..5f8fe9c --- /dev/null +++ b/debug.c @@ -0,0 +1,159 @@ +/********************************************************************** + + debug.c - + + $Author: yugui $ + created at: 04/08/25 02:31:54 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" +#include "ruby/util.h" +#include "debug.h" +#include "eval_intern.h" +#include "vm_core.h" + +/* for gdb */ +const union { + enum ruby_special_consts special_consts; + enum ruby_value_type value_type; + enum ruby_tag_type tag_type; + enum node_type node_type; + enum { + RUBY_ENCODING_INLINE_MAX = ENCODING_INLINE_MAX, + RUBY_ENCODING_SHIFT = ENCODING_SHIFT, + RUBY_ENCODING_MASK = ENCODING_MASK, + RUBY_ENC_CODERANGE_MASK = ENC_CODERANGE_MASK, + RUBY_ENC_CODERANGE_UNKNOWN = ENC_CODERANGE_UNKNOWN, + RUBY_ENC_CODERANGE_7BIT = ENC_CODERANGE_7BIT, + RUBY_ENC_CODERANGE_VALID = ENC_CODERANGE_VALID, + RUBY_ENC_CODERANGE_BROKEN = ENC_CODERANGE_BROKEN, + RUBY_FL_MARK = FL_MARK, + RUBY_FL_RESERVED = FL_RESERVED, + RUBY_FL_FINALIZE = FL_FINALIZE, + RUBY_FL_TAINT = FL_TAINT, + RUBY_FL_UNTRUSTED = FL_UNTRUSTED, + RUBY_FL_EXIVAR = FL_EXIVAR, + RUBY_FL_FREEZE = FL_FREEZE, + RUBY_FL_SINGLETON = FL_SINGLETON, + RUBY_FL_USER0 = FL_USER0, + RUBY_FL_USER1 = FL_USER1, + RUBY_FL_USER2 = FL_USER2, + RUBY_FL_USER3 = FL_USER3, + RUBY_FL_USER4 = FL_USER4, + RUBY_FL_USER5 = FL_USER5, + RUBY_FL_USER6 = FL_USER6, + RUBY_FL_USER7 = FL_USER7, + RUBY_FL_USER8 = FL_USER8, + RUBY_FL_USER9 = FL_USER9, + RUBY_FL_USER10 = FL_USER10, + RUBY_FL_USER11 = FL_USER11, + RUBY_FL_USER12 = FL_USER12, + RUBY_FL_USER13 = FL_USER13, + RUBY_FL_USER14 = FL_USER14, + RUBY_FL_USER15 = FL_USER15, + RUBY_FL_USER16 = FL_USER16, + RUBY_FL_USER17 = FL_USER17, + RUBY_FL_USER18 = FL_USER18, + RUBY_FL_USHIFT = FL_USHIFT, + RUBY_NODE_TYPESHIFT = NODE_TYPESHIFT, + RUBY_NODE_TYPEMASK = NODE_TYPEMASK, + RUBY_NODE_LSHIFT = NODE_LSHIFT, + RUBY_NODE_LMASK = NODE_LMASK, + RUBY_NODE_FL_NEWLINE = NODE_FL_NEWLINE + } various; +} ruby_dummy_gdb_enums; + +const VALUE RUBY_FL_USER19 = FL_USER19; + +int +ruby_debug_print_indent(int level, int debug_level, int indent_level) +{ + if (level < debug_level) { + fprintf(stderr, "%*s", indent_level, ""); + fflush(stderr); + return Qtrue; + } + return Qfalse; +} + +void +ruby_debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} + +VALUE +ruby_debug_print_value(int level, int debug_level, const char *header, VALUE obj) +{ + if (level < debug_level) { + VALUE str; + str = rb_inspect(obj); + fprintf(stderr, "DBG> %s: %s\n", header, + obj == -1 ? "" : StringValueCStr(str)); + fflush(stderr); + } + return obj; +} + +void +ruby_debug_print_v(VALUE v) +{ + ruby_debug_print_value(0, 1, "", v); +} + +ID +ruby_debug_print_id(int level, int debug_level, const char *header, ID id) +{ + if (level < debug_level) { + fprintf(stderr, "DBG> %s: %s\n", header, rb_id2name(id)); + fflush(stderr); + } + return id; +} + +NODE * +ruby_debug_print_node(int level, int debug_level, const char *header, const NODE *node) +{ + if (level < debug_level) { + fprintf(stderr, "DBG> %s: %s (%lu)\n", header, + ruby_node_name(nd_type(node)), nd_line(node)); + } + return (NODE *)node; +} + +void +ruby_debug_breakpoint(void) +{ + /* */ +} + +#ifdef RUBY_DEBUG_ENV +static void +set_debug_option(const char *str, int len, void *arg) +{ +#define SET_WHEN(name, var) do { \ + if (len == sizeof(name) - 1 && \ + strncmp(str, name, len) == 0) { \ + extern int var; \ + var = 1; \ + return; \ + } \ + } while (0) + SET_WHEN("gc_stress", *ruby_initial_gc_stress_ptr); + SET_WHEN("core", ruby_enable_coredump); + fprintf(stderr, "unexpected debug option: %.*s\n", len, str); +} + +void +ruby_set_debug_option(const char *str) +{ + ruby_each_words(str, set_debug_option, 0); +} +#endif diff --git a/debug.h b/debug.h new file mode 100644 index 0000000..98caab1 --- /dev/null +++ b/debug.h @@ -0,0 +1,36 @@ +/********************************************************************** + + debug.h - YARV Debug function interface + + $Author: ko1 $ + created at: 04/08/25 02:33:49 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_DEBUG_H +#define RUBY_DEBUG_H + +#include "ruby/ruby.h" +#include "node.h" + +#define dpv(h,v) ruby_debug_print_value(-1, 0, h, v) +#define dp(v) ruby_debug_print_value(-1, 0, "", v) +#define dpi(i) ruby_debug_print_id(-1, 0, "", i) +#define dpn(n) ruby_debug_print_node(-1, 0, "", n) + +#define bp() ruby_debug_breakpoint() + +VALUE ruby_debug_print_value(int level, int debug_level, const char *header, VALUE v); +ID ruby_debug_print_id(int level, int debug_level, const char *header, ID id); +NODE *ruby_debug_print_node(int level, int debug_level, const char *header, const NODE *node); +int ruby_debug_print_indent(int level, int debug_level, int indent_level); +void ruby_debug_breakpoint(void); +void ruby_debug_gc_check_func(void); + +#ifdef RUBY_DEBUG_ENV +void ruby_set_debug_option(const char *str); +#endif + +#endif /* RUBY_DEBUG_H */ diff --git a/defs/keywords b/defs/keywords new file mode 100644 index 0000000..f0d82a7 --- /dev/null +++ b/defs/keywords @@ -0,0 +1,51 @@ +%{ +struct kwtable {const char *name; int id[2]; enum lex_state_e state;}; +const struct kwtable *rb_reserved_word(const char *, unsigned int); +#ifndef RIPPER +%} + +struct kwtable; +%% +__ENCODING__, {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END +__LINE__, {keyword__LINE__, keyword__LINE__}, EXPR_END +__FILE__, {keyword__FILE__, keyword__FILE__}, EXPR_END +BEGIN, {keyword_BEGIN, keyword_BEGIN}, EXPR_END +END, {keyword_END, keyword_END}, EXPR_END +alias, {keyword_alias, keyword_alias}, EXPR_FNAME +and, {keyword_and, keyword_and}, EXPR_VALUE +begin, {keyword_begin, keyword_begin}, EXPR_BEG +break, {keyword_break, keyword_break}, EXPR_MID +case, {keyword_case, keyword_case}, EXPR_VALUE +class, {keyword_class, keyword_class}, EXPR_CLASS +def, {keyword_def, keyword_def}, EXPR_FNAME +defined?, {keyword_defined, keyword_defined}, EXPR_ARG +do, {keyword_do, keyword_do}, EXPR_BEG +else, {keyword_else, keyword_else}, EXPR_BEG +elsif, {keyword_elsif, keyword_elsif}, EXPR_VALUE +end, {keyword_end, keyword_end}, EXPR_END +ensure, {keyword_ensure, keyword_ensure}, EXPR_BEG +false, {keyword_false, keyword_false}, EXPR_END +for, {keyword_for, keyword_for}, EXPR_VALUE +if, {keyword_if, modifier_if}, EXPR_VALUE +in, {keyword_in, keyword_in}, EXPR_VALUE +module, {keyword_module, keyword_module}, EXPR_VALUE +next, {keyword_next, keyword_next}, EXPR_MID +nil, {keyword_nil, keyword_nil}, EXPR_END +not, {keyword_not, keyword_not}, EXPR_ARG +or, {keyword_or, keyword_or}, EXPR_VALUE +redo, {keyword_redo, keyword_redo}, EXPR_END +rescue, {keyword_rescue, modifier_rescue}, EXPR_MID +retry, {keyword_retry, keyword_retry}, EXPR_END +return, {keyword_return, keyword_return}, EXPR_MID +self, {keyword_self, keyword_self}, EXPR_END +super, {keyword_super, keyword_super}, EXPR_ARG +then, {keyword_then, keyword_then}, EXPR_BEG +true, {keyword_true, keyword_true}, EXPR_END +undef, {keyword_undef, keyword_undef}, EXPR_FNAME +unless, {keyword_unless, modifier_unless}, EXPR_VALUE +until, {keyword_until, modifier_until}, EXPR_VALUE +when, {keyword_when, keyword_when}, EXPR_VALUE +while, {keyword_while, modifier_while}, EXPR_VALUE +yield, {keyword_yield, keyword_yield}, EXPR_ARG +%% +#endif diff --git a/defs/known_errors.def b/defs/known_errors.def new file mode 100644 index 0000000..9f09aa9 --- /dev/null +++ b/defs/known_errors.def @@ -0,0 +1,122 @@ +EPERM +ENOENT +ESRCH +EINTR +EIO +ENXIO +E2BIG +ENOEXEC +EBADF +ECHILD +EAGAIN +ENOMEM +EACCES +EFAULT +ENOTBLK +EBUSY +EEXIST +EXDEV +ENODEV +ENOTDIR +EISDIR +EINVAL +ENFILE +EMFILE +ENOTTY +ETXTBSY +EFBIG +ENOSPC +ESPIPE +EROFS +EMLINK +EPIPE +EDOM +ERANGE +EDEADLK +ENAMETOOLONG +ENOLCK +ENOSYS +ENOTEMPTY +ELOOP +EWOULDBLOCK +ENOMSG +EIDRM +ECHRNG +EL2NSYNC +EL3HLT +EL3RST +ELNRNG +EUNATCH +ENOCSI +EL2HLT +EBADE +EBADR +EXFULL +ENOANO +EBADRQC +EBADSLT +EDEADLOCK +EBFONT +ENOSTR +ENODATA +ETIME +ENOSR +ENONET +ENOPKG +EREMOTE +ENOLINK +EADV +ESRMNT +ECOMM +EPROTO +EMULTIHOP +EDOTDOT +EBADMSG +EOVERFLOW +ENOTUNIQ +EBADFD +EREMCHG +ELIBACC +ELIBBAD +ELIBSCN +ELIBMAX +ELIBEXEC +EILSEQ +ERESTART +ESTRPIPE +EUSERS +ENOTSOCK +EDESTADDRREQ +EMSGSIZE +EPROTOTYPE +ENOPROTOOPT +EPROTONOSUPPORT +ESOCKTNOSUPPORT +EOPNOTSUPP +EPFNOSUPPORT +EAFNOSUPPORT +EADDRINUSE +EADDRNOTAVAIL +ENETDOWN +ENETUNREACH +ENETRESET +ECONNABORTED +ECONNRESET +ENOBUFS +EISCONN +ENOTCONN +ESHUTDOWN +ETOOMANYREFS +ETIMEDOUT +ECONNREFUSED +EHOSTDOWN +EHOSTUNREACH +EALREADY +EINPROGRESS +ESTALE +EUCLEAN +ENOTNAM +ENAVAIL +EISNAM +EREMOTEIO +EDQUOT diff --git a/defs/lex.c.src b/defs/lex.c.src new file mode 100644 index 0000000..f0d82a7 --- /dev/null +++ b/defs/lex.c.src @@ -0,0 +1,51 @@ +%{ +struct kwtable {const char *name; int id[2]; enum lex_state_e state;}; +const struct kwtable *rb_reserved_word(const char *, unsigned int); +#ifndef RIPPER +%} + +struct kwtable; +%% +__ENCODING__, {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END +__LINE__, {keyword__LINE__, keyword__LINE__}, EXPR_END +__FILE__, {keyword__FILE__, keyword__FILE__}, EXPR_END +BEGIN, {keyword_BEGIN, keyword_BEGIN}, EXPR_END +END, {keyword_END, keyword_END}, EXPR_END +alias, {keyword_alias, keyword_alias}, EXPR_FNAME +and, {keyword_and, keyword_and}, EXPR_VALUE +begin, {keyword_begin, keyword_begin}, EXPR_BEG +break, {keyword_break, keyword_break}, EXPR_MID +case, {keyword_case, keyword_case}, EXPR_VALUE +class, {keyword_class, keyword_class}, EXPR_CLASS +def, {keyword_def, keyword_def}, EXPR_FNAME +defined?, {keyword_defined, keyword_defined}, EXPR_ARG +do, {keyword_do, keyword_do}, EXPR_BEG +else, {keyword_else, keyword_else}, EXPR_BEG +elsif, {keyword_elsif, keyword_elsif}, EXPR_VALUE +end, {keyword_end, keyword_end}, EXPR_END +ensure, {keyword_ensure, keyword_ensure}, EXPR_BEG +false, {keyword_false, keyword_false}, EXPR_END +for, {keyword_for, keyword_for}, EXPR_VALUE +if, {keyword_if, modifier_if}, EXPR_VALUE +in, {keyword_in, keyword_in}, EXPR_VALUE +module, {keyword_module, keyword_module}, EXPR_VALUE +next, {keyword_next, keyword_next}, EXPR_MID +nil, {keyword_nil, keyword_nil}, EXPR_END +not, {keyword_not, keyword_not}, EXPR_ARG +or, {keyword_or, keyword_or}, EXPR_VALUE +redo, {keyword_redo, keyword_redo}, EXPR_END +rescue, {keyword_rescue, modifier_rescue}, EXPR_MID +retry, {keyword_retry, keyword_retry}, EXPR_END +return, {keyword_return, keyword_return}, EXPR_MID +self, {keyword_self, keyword_self}, EXPR_END +super, {keyword_super, keyword_super}, EXPR_ARG +then, {keyword_then, keyword_then}, EXPR_BEG +true, {keyword_true, keyword_true}, EXPR_END +undef, {keyword_undef, keyword_undef}, EXPR_FNAME +unless, {keyword_unless, modifier_unless}, EXPR_VALUE +until, {keyword_until, modifier_until}, EXPR_VALUE +when, {keyword_when, keyword_when}, EXPR_VALUE +while, {keyword_while, modifier_while}, EXPR_VALUE +yield, {keyword_yield, keyword_yield}, EXPR_ARG +%% +#endif diff --git a/defs/opt_insn_unif.def b/defs/opt_insn_unif.def new file mode 100644 index 0000000..1ac613d --- /dev/null +++ b/defs/opt_insn_unif.def @@ -0,0 +1,29 @@ +# +# a definition of instruction unification +# +# + +__END__ + +putobject putobject +putobject putstring +putobject setlocal +putobject setdynamic + +putstring putstring +putstring putobject +putstring setlocal +putstring setdynamic + +# putnil end + +dup setlocal + +# from tarai +getlocal getlocal +# getlocal send + +# from tak, ackermann +getlocal putobject + + diff --git a/defs/opt_operand.def b/defs/opt_operand.def new file mode 100644 index 0000000..5ca1d74 --- /dev/null +++ b/defs/opt_operand.def @@ -0,0 +1,59 @@ +# +# configration file for operand union optimization +# +# format: +# [insn name] op1, op2 ... +# +# wildcard: * +# + +__END__ + +getlocal 2 +getlocal 3 +getlocal 4 + +setlocal 2 +setlocal 3 +setlocal 4 + +getdynamic *, 0 +getdynamic 1, 0 +getdynamic 2, 0 +getdynamic 3, 0 +getdynamic 4, 0 + +setdynamic *, 0 +setdynamic 1, 0 +setdynamic 2, 0 +setdynamic 3, 0 +setdynamic 4, 0 + +putobject INT2FIX(0) +putobject INT2FIX(1) +putobject Qtrue +putobject Qfalse + +# CALL +send *, *, Qfalse, 0, * +send *, 0, Qfalse, 0, * +send *, 1, Qfalse, 0, * +send *, 2, Qfalse, 0, * +send *, 3, Qfalse, 0, * + +# FCALL +send *, *, Qfalse, 0x04, * +send *, 0, Qfalse, 0x04, * +send *, 1, Qfalse, 0x04, * +send *, 2, Qfalse, 0x04, * +send *, 3, Qfalse, 0x04, * + +# VCALL +send *, 0, Qfalse, 0x0c, * + + +__END__ + + + + diff --git a/dir.c b/dir.c new file mode 100644 index 0000000..7fc12ea --- /dev/null +++ b/dir.c @@ -0,0 +1,1976 @@ +/********************************************************************** + + dir.c - + + $Author: usa $ + created at: Wed Jan 5 09:51:01 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" + +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +#if defined HAVE_DIRENT_H && !defined _WIN32 +# include +# define NAMLEN(dirent) strlen((dirent)->d_name) +#elif defined HAVE_DIRECT_H && !defined _WIN32 +# include +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# if !defined __NeXT__ +# define NAMLEN(dirent) (dirent)->d_namlen +# else +# /* On some versions of NextStep, d_namlen is always zero, so avoid it. */ +# define NAMLEN(dirent) strlen((dirent)->d_name) +# endif +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +# ifdef _WIN32 +# include "win32/dir.h" +# endif +#endif + +#include + +#ifndef HAVE_STDLIB_H +char *getenv(); +#endif + +#ifndef HAVE_STRING_H +char *strchr(char*,char); +#endif + +#include + +#include "ruby/util.h" + +#if !defined HAVE_LSTAT && !defined lstat +#define lstat stat +#endif + +#define FNM_NOESCAPE 0x01 +#define FNM_PATHNAME 0x02 +#define FNM_DOTMATCH 0x04 +#define FNM_CASEFOLD 0x08 +#if CASEFOLD_FILESYSTEM +#define FNM_SYSCASE FNM_CASEFOLD +#else +#define FNM_SYSCASE 0 +#endif + +#define FNM_NOMATCH 1 +#define FNM_ERROR 2 + +# define Next(p, e, enc) (p + rb_enc_mbclen(p, e, enc)) +# define Inc(p, e, enc) ((p) = Next(p, e, enc)) + +static char * +bracket( + const char *p, /* pattern (next to '[') */ + const char *pend, + const char *s, /* string */ + const char *send, + int flags, + rb_encoding *enc) +{ + const int nocase = flags & FNM_CASEFOLD; + const int escape = !(flags & FNM_NOESCAPE); + unsigned int c1, c2; + int r; + int ok = 0, not = 0; + + if (*p == '!' || *p == '^') { + not = 1; + p++; + } + + while (*p != ']') { + const char *t1 = p; + if (escape && *t1 == '\\') + t1++; + if (!*t1) + return NULL; + p = t1 + (r = rb_enc_mbclen(t1, pend, enc)); + if (p[0] == '-' && p[1] != ']') { + const char *t2 = p + 1; + int r2; + if (escape && *t2 == '\\') + t2++; + if (!*t2) + return NULL; + p = t2 + (r2 = rb_enc_mbclen(t2, pend, enc)); + if (ok) continue; + if ((r <= (send-s) && memcmp(t1, s, r) == 0) || + (r2 <= (send-s) && memcmp(t2, s, r) == 0)) { + ok = 1; + continue; + } + c1 = rb_enc_codepoint(s, send, enc); + if (nocase) c1 = rb_enc_toupper(c1, enc); + c2 = rb_enc_codepoint(t1, pend, enc); + if (nocase) c2 = rb_enc_toupper(c2, enc); + if (c1 < c2) continue; + c2 = rb_enc_codepoint(t2, pend, enc); + if (nocase) c2 = rb_enc_toupper(c2, enc); + if (c1 > c2) continue; + } + else { + if (ok) continue; + if (r <= (send-s) && memcmp(t1, s, r) == 0) { + ok = 1; + continue; + } + if (!nocase) continue; + c1 = rb_enc_toupper(rb_enc_codepoint(s, send, enc), enc); + c2 = rb_enc_toupper(rb_enc_codepoint(p, pend, enc), enc); + if (c1 != c2) continue; + } + ok = 1; + } + + return ok == not ? NULL : (char *)p + 1; +} + +/* If FNM_PATHNAME is set, only path element will be matched. (upto '/' or '\0') + Otherwise, entire string will be matched. + End marker itself won't be compared. + And if function succeeds, *pcur reaches end marker. +*/ +#define UNESCAPE(p) (escape && *(p) == '\\' ? (p) + 1 : (p)) +#define ISEND(p) (!*(p) || (pathname && *(p) == '/')) +#define RETURN(val) return *pcur = p, *scur = s, (val); + +static int +fnmatch_helper( + const char **pcur, /* pattern */ + const char **scur, /* string */ + int flags, + rb_encoding *enc) +{ + const int period = !(flags & FNM_DOTMATCH); + const int pathname = flags & FNM_PATHNAME; + const int escape = !(flags & FNM_NOESCAPE); + const int nocase = flags & FNM_CASEFOLD; + + const char *ptmp = 0; + const char *stmp = 0; + + const char *p = *pcur; + const char *pend = p + strlen(p); + const char *s = *scur; + const char *send = s + strlen(s); + + int r; + + if (period && *s == '.' && *UNESCAPE(p) != '.') /* leading period */ + RETURN(FNM_NOMATCH); + + while (1) { + switch (*p) { + case '*': + do { p++; } while (*p == '*'); + if (ISEND(UNESCAPE(p))) { + p = UNESCAPE(p); + RETURN(0); + } + if (ISEND(s)) + RETURN(FNM_NOMATCH); + ptmp = p; + stmp = s; + continue; + + case '?': + if (ISEND(s)) + RETURN(FNM_NOMATCH); + p++; + Inc(s, send, enc); + continue; + + case '[': { + const char *t; + if (ISEND(s)) + RETURN(FNM_NOMATCH); + if ((t = bracket(p + 1, pend, s, send, flags, enc)) != 0) { + p = t; + Inc(s, send, enc); + continue; + } + goto failed; + } + } + + /* ordinary */ + p = UNESCAPE(p); + if (ISEND(s)) + RETURN(ISEND(p) ? 0 : FNM_NOMATCH); + if (ISEND(p)) + goto failed; + r = rb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND_P(r)) + goto failed; + if (r <= (send-s) && memcmp(p, s, r) == 0) { + p += r; + s += r; + continue; + } + if (!nocase) goto failed; + if (rb_enc_toupper(rb_enc_codepoint(p, pend, enc), enc) != + rb_enc_toupper(rb_enc_codepoint(s, send, enc), enc)) + goto failed; + p += r; + Inc(s, send, enc); + continue; + + failed: /* try next '*' position */ + if (ptmp && stmp) { + p = ptmp; + Inc(stmp, send, enc); /* !ISEND(*stmp) */ + s = stmp; + continue; + } + RETURN(FNM_NOMATCH); + } +} + +static int +fnmatch( + const char *pattern, + rb_encoding *enc, + const char *string, + int flags) +{ + const char *p = pattern; + const char *s = string; + const char *send = s + strlen(string); + const int period = !(flags & FNM_DOTMATCH); + const int pathname = flags & FNM_PATHNAME; + + const char *ptmp = 0; + const char *stmp = 0; + + if (pathname) { + while (1) { + if (p[0] == '*' && p[1] == '*' && p[2] == '/') { + do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/'); + ptmp = p; + stmp = s; + } + if (fnmatch_helper(&p, &s, flags, enc) == 0) { + while (*s && *s != '/') Inc(s, send, enc); + if (*p && *s) { + p++; + s++; + continue; + } + if (!*p && !*s) + return 0; + } + /* failed : try next recursion */ + if (ptmp && stmp && !(period && *stmp == '.')) { + while (*stmp && *stmp != '/') Inc(stmp, send, enc); + if (*stmp) { + p = ptmp; + stmp++; + s = stmp; + continue; + } + } + return FNM_NOMATCH; + } + } + else + return fnmatch_helper(&p, &s, flags, enc); +} + +VALUE rb_cDir; + +struct dir_data { + DIR *dir; + VALUE path; + rb_encoding *enc; +}; + +static void +mark_dir(struct dir_data *dir) +{ + rb_gc_mark(dir->path); +} + +static void +free_dir(struct dir_data *dir) +{ + if (dir) { + if (dir->dir) closedir(dir->dir); + } + xfree(dir); +} + +static VALUE dir_close(VALUE); + +static VALUE +dir_s_alloc(VALUE klass) +{ + struct dir_data *dirp; + VALUE obj = Data_Make_Struct(klass, struct dir_data, mark_dir, free_dir, dirp); + + dirp->dir = NULL; + dirp->path = Qnil; + dirp->enc = NULL; + + return obj; +} + +/* + * call-seq: + * Dir.new( string ) -> aDir + * + * Returns a new directory object for the named directory. + */ +static VALUE +dir_initialize(int argc, VALUE *argv, VALUE dir) +{ + struct dir_data *dp; + rb_encoding *fsenc; + VALUE dirname, opt; + static VALUE sym_enc; + + if (!sym_enc) { + sym_enc = ID2SYM(rb_intern("encoding")); + } + fsenc = rb_filesystem_encoding(); + + rb_scan_args(argc, argv, "11", &dirname, &opt); + + if (!NIL_P(opt)) { + VALUE v, enc=Qnil; + opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); + + v = rb_hash_aref(opt, sym_enc); + if (!NIL_P(v)) enc = v; + + if (!NIL_P(enc)) { + fsenc = rb_to_encoding(enc); + } + } + + FilePathValue(dirname); + + Data_Get_Struct(dir, struct dir_data, dp); + if (dp->dir) closedir(dp->dir); + dp->dir = NULL; + dp->path = Qnil; + dp->enc = fsenc; + dp->dir = opendir(RSTRING_PTR(dirname)); + if (dp->dir == NULL) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + dp->dir = opendir(RSTRING_PTR(dirname)); + } + if (dp->dir == NULL) { + rb_sys_fail(RSTRING_PTR(dirname)); + } + } + dp->path = rb_str_dup_frozen(dirname); + + return dir; +} + +/* + * call-seq: + * Dir.open( string ) => aDir + * Dir.open( string ) {| aDir | block } => anObject + * + * With no block, open is a synonym for + * Dir::new. If a block is present, it is passed + * aDir as a parameter. The directory is closed at the end of + * the block, and Dir::open returns the value of the + * block. + */ +static VALUE +dir_s_open(int argc, VALUE *argv, VALUE klass) +{ + struct dir_data *dp; + VALUE dir = Data_Make_Struct(klass, struct dir_data, mark_dir, free_dir, dp); + + dir_initialize(argc, argv, dir); + if (rb_block_given_p()) { + return rb_ensure(rb_yield, dir, dir_close, dir); + } + + return dir; +} + +static void +dir_closed(void) +{ + rb_raise(rb_eIOError, "closed directory"); +} + +static void +dir_check(VALUE dir) +{ + if (!OBJ_UNTRUSTED(dir) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: operation on trusted Dir"); + rb_check_frozen(dir); +} + +#define GetDIR(obj, dirp) do {\ + dir_check(dir);\ + Data_Get_Struct(obj, struct dir_data, dirp);\ + if (dirp->dir == NULL) dir_closed();\ +} while (0) + + +/* + * call-seq: + * dir.inspect => string + * + * Return a string describing this Dir object. + */ +static VALUE +dir_inspect(VALUE dir) +{ + struct dir_data *dirp; + + Data_Get_Struct(dir, struct dir_data, dirp); + if (!NIL_P(dirp->path)) { + const char *c = rb_obj_classname(dir); + return rb_sprintf("#<%s:%s>", c, RSTRING_PTR(dirp->path)); + } + return rb_funcall(dir, rb_intern("to_s"), 0, 0); +} + +/* + * call-seq: + * dir.path => string or nil + * + * Returns the path parameter passed to dir's constructor. + * + * d = Dir.new("..") + * d.path #=> ".." + */ +static VALUE +dir_path(VALUE dir) +{ + struct dir_data *dirp; + + Data_Get_Struct(dir, struct dir_data, dirp); + if (NIL_P(dirp->path)) return Qnil; + return rb_str_dup(dirp->path); +} + +#if defined HAVE_READDIR_R +# define READDIR(dir, enc, entry, dp) (readdir_r(dir, entry, &(dp)) == 0 && dp != 0) +#else +# define READDIR(dir, enc, entry, dp) ((dp = readdir(dir)) != 0) +#endif +#if defined HAVE_READDIR_R +# define IF_HAVE_READDIR_R(something) something +#else +# define IF_HAVE_READDIR_R(something) /* nothing */ +#endif + +#if defined SIZEOF_STRUCT_DIRENT_TOO_SMALL +# include +# define NAME_MAX_FOR_STRUCT_DIRENT 255 +# if defined NAME_MAX +# if NAME_MAX_FOR_STRUCT_DIRENT < NAME_MAX +# undef NAME_MAX_FOR_STRUCT_DIRENT +# define NAME_MAX_FOR_STRUCT_DIRENT NAME_MAX +# endif +# endif +# if defined _POSIX_NAME_MAX +# if NAME_MAX_FOR_STRUCT_DIRENT < _POSIX_NAME_MAX +# undef NAME_MAX_FOR_STRUCT_DIRENT +# define NAME_MAX_FOR_STRUCT_DIRENT _POSIX_NAME_MAX +# endif +# endif +# if defined _XOPEN_NAME_MAX +# if NAME_MAX_FOR_STRUCT_DIRENT < _XOPEN_NAME_MAX +# undef NAME_MAX_FOR_STRUCT_DIRENT +# define NAME_MAX_FOR_STRUCT_DIRENT _XOPEN_NAME_MAX +# endif +# endif +# define DEFINE_STRUCT_DIRENT \ + union { \ + struct dirent dirent; \ + char dummy[offsetof(struct dirent, d_name) + \ + NAME_MAX_FOR_STRUCT_DIRENT + 1]; \ + } +# define STRUCT_DIRENT(entry) ((entry).dirent) +#else +# define DEFINE_STRUCT_DIRENT struct dirent +# define STRUCT_DIRENT(entry) (entry) +#endif + +/* + * call-seq: + * dir.read => string or nil + * + * Reads the next entry from dir and returns it as a string. + * Returns nil at the end of the stream. + * + * d = Dir.new("testdir") + * d.read #=> "." + * d.read #=> ".." + * d.read #=> "config.h" + */ +static VALUE +dir_read(VALUE dir) +{ + struct dir_data *dirp; + struct dirent *dp; + IF_HAVE_READDIR_R(DEFINE_STRUCT_DIRENT entry); + + GetDIR(dir, dirp); + errno = 0; + if (READDIR(dirp->dir, dirp->enc, &STRUCT_DIRENT(entry), dp)) { + return rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc); + } + else if (errno == 0) { /* end of stream */ + return Qnil; + } + else { + rb_sys_fail(0); + } + return Qnil; /* not reached */ +} + +/* + * call-seq: + * dir.each { |filename| block } => dir + * + * Calls the block once for each entry in this directory, passing the + * filename of each entry as a parameter to the block. + * + * d = Dir.new("testdir") + * d.each {|x| puts "Got #{x}" } + * + * produces: + * + * Got . + * Got .. + * Got config.h + * Got main.rb + */ +static VALUE +dir_each(VALUE dir) +{ + struct dir_data *dirp; + struct dirent *dp; + IF_HAVE_READDIR_R(DEFINE_STRUCT_DIRENT entry); + + RETURN_ENUMERATOR(dir, 0, 0); + GetDIR(dir, dirp); + rewinddir(dirp->dir); + while (READDIR(dirp->dir, dirp->enc, &STRUCT_DIRENT(entry), dp)) { + rb_yield(rb_external_str_new_with_enc(dp->d_name, NAMLEN(dp), dirp->enc)); + if (dirp->dir == NULL) dir_closed(); + } + return dir; +} + +/* + * call-seq: + * dir.pos => integer + * dir.tell => integer + * + * Returns the current position in dir. See also + * Dir#seek. + * + * d = Dir.new("testdir") + * d.tell #=> 0 + * d.read #=> "." + * d.tell #=> 12 + */ +static VALUE +dir_tell(VALUE dir) +{ +#ifdef HAVE_TELLDIR + struct dir_data *dirp; + long pos; + + GetDIR(dir, dirp); + pos = telldir(dirp->dir); + return rb_int2inum(pos); +#else + rb_notimplement(); +#endif +} + +/* + * call-seq: + * dir.seek( integer ) => dir + * + * Seeks to a particular location in dir. integer + * must be a value returned by Dir#tell. + * + * d = Dir.new("testdir") #=> # + * d.read #=> "." + * i = d.tell #=> 12 + * d.read #=> ".." + * d.seek(i) #=> # + * d.read #=> ".." + */ +static VALUE +dir_seek(VALUE dir, VALUE pos) +{ + struct dir_data *dirp; + off_t p = NUM2OFFT(pos); + + GetDIR(dir, dirp); +#ifdef HAVE_SEEKDIR + seekdir(dirp->dir, p); + return dir; +#else + rb_notimplement(); +#endif +} + +/* + * call-seq: + * dir.pos( integer ) => integer + * + * Synonym for Dir#seek, but returns the position + * parameter. + * + * d = Dir.new("testdir") #=> # + * d.read #=> "." + * i = d.pos #=> 12 + * d.read #=> ".." + * d.pos = i #=> 12 + * d.read #=> ".." + */ +static VALUE +dir_set_pos(VALUE dir, VALUE pos) +{ + dir_seek(dir, pos); + return pos; +} + +/* + * call-seq: + * dir.rewind => dir + * + * Repositions dir to the first entry. + * + * d = Dir.new("testdir") + * d.read #=> "." + * d.rewind #=> # + * d.read #=> "." + */ +static VALUE +dir_rewind(VALUE dir) +{ + struct dir_data *dirp; + + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(dir)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + GetDIR(dir, dirp); + rewinddir(dirp->dir); + return dir; +} + +/* + * call-seq: + * dir.close => nil + * + * Closes the directory stream. Any further attempts to access + * dir will raise an IOError. + * + * d = Dir.new("testdir") + * d.close #=> nil + */ +static VALUE +dir_close(VALUE dir) +{ + struct dir_data *dirp; + + GetDIR(dir, dirp); + closedir(dirp->dir); + dirp->dir = NULL; + + return Qnil; +} + +static void +dir_chdir(VALUE path) +{ + if (chdir(RSTRING_PTR(path)) < 0) + rb_sys_fail(RSTRING_PTR(path)); +} + +static int chdir_blocking = 0; +static VALUE chdir_thread = Qnil; + +struct chdir_data { + VALUE old_path, new_path; + int done; +}; + +static VALUE +chdir_yield(struct chdir_data *args) +{ + dir_chdir(args->new_path); + args->done = Qtrue; + chdir_blocking++; + if (chdir_thread == Qnil) + chdir_thread = rb_thread_current(); + return rb_yield(args->new_path); +} + +static VALUE +chdir_restore(struct chdir_data *args) +{ + if (args->done) { + chdir_blocking--; + if (chdir_blocking == 0) + chdir_thread = Qnil; + dir_chdir(args->old_path); + } + return Qnil; +} + +/* + * call-seq: + * Dir.chdir( [ string] ) => 0 + * Dir.chdir( [ string] ) {| path | block } => anObject + * + * Changes the current working directory of the process to the given + * string. When called without an argument, changes the directory to + * the value of the environment variable HOME, or + * LOGDIR. SystemCallError (probably + * Errno::ENOENT) if the target directory does not exist. + * + * If a block is given, it is passed the name of the new current + * directory, and the block is executed with that as the current + * directory. The original working directory is restored when the block + * exits. The return value of chdir is the value of the + * block. chdir blocks can be nested, but in a + * multi-threaded program an error will be raised if a thread attempts + * to open a chdir block while another thread has one + * open. + * + * Dir.chdir("/var/spool/mail") + * puts Dir.pwd + * Dir.chdir("/tmp") do + * puts Dir.pwd + * Dir.chdir("/usr") do + * puts Dir.pwd + * end + * puts Dir.pwd + * end + * puts Dir.pwd + * + * produces: + * + * /var/spool/mail + * /tmp + * /usr + * /tmp + * /var/spool/mail + */ +static VALUE +dir_s_chdir(int argc, VALUE *argv, VALUE obj) +{ + VALUE path = Qnil; + + rb_secure(2); + if (rb_scan_args(argc, argv, "01", &path) == 1) { + FilePathValue(path); + } + else { + const char *dist = getenv("HOME"); + if (!dist) { + dist = getenv("LOGDIR"); + if (!dist) rb_raise(rb_eArgError, "HOME/LOGDIR not set"); + } + path = rb_str_new2(dist); + } + + if (chdir_blocking > 0) { + if (!rb_block_given_p() || rb_thread_current() != chdir_thread) + rb_warn("conflicting chdir during another chdir block"); + } + + if (rb_block_given_p()) { + struct chdir_data args; + char *cwd = my_getcwd(); + + args.old_path = rb_tainted_str_new2(cwd); xfree(cwd); + args.new_path = path; + args.done = Qfalse; + return rb_ensure(chdir_yield, (VALUE)&args, chdir_restore, (VALUE)&args); + } + dir_chdir(path); + + return INT2FIX(0); +} + +/* + * call-seq: + * Dir.getwd => string + * Dir.pwd => string + * + * Returns the path to the current working directory of this process as + * a string. + * + * Dir.chdir("/tmp") #=> 0 + * Dir.getwd #=> "/tmp" + */ +static VALUE +dir_s_getwd(VALUE dir) +{ + char *path; + VALUE cwd; + + rb_secure(4); + path = my_getcwd(); + cwd = rb_tainted_str_new2(path); + rb_enc_associate(cwd, rb_filesystem_encoding()); + + xfree(path); + return cwd; +} + +static void +check_dirname(volatile VALUE *dir) +{ + char *path, *pend; + + rb_secure(2); + FilePathValue(*dir); + path = RSTRING_PTR(*dir); + if (path && *(pend = rb_path_end(rb_path_skip_prefix(path)))) { + *dir = rb_str_new(path, pend - path); + } +} + +/* + * call-seq: + * Dir.chroot( string ) => 0 + * + * Changes this process's idea of the file system root. Only a + * privileged process may make this call. Not available on all + * platforms. On Unix systems, see chroot(2) for more + * information. + */ +static VALUE +dir_s_chroot(VALUE dir, VALUE path) +{ +#if defined(HAVE_CHROOT) && !defined(__CHECKER__) + check_dirname(&path); + + if (chroot(RSTRING_PTR(path)) == -1) + rb_sys_fail(RSTRING_PTR(path)); + + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * Dir.mkdir( string [, integer] ) => 0 + * + * Makes a new directory named by string, with permissions + * specified by the optional parameter anInteger. The + * permissions may be modified by the value of + * File::umask, and are ignored on NT. Raises a + * SystemCallError if the directory cannot be created. See + * also the discussion of permissions in the class documentation for + * File. + * + */ +static VALUE +dir_s_mkdir(int argc, VALUE *argv, VALUE obj) +{ + VALUE path, vmode; + int mode; + + if (rb_scan_args(argc, argv, "11", &path, &vmode) == 2) { + mode = NUM2INT(vmode); + } + else { + mode = 0777; + } + + check_dirname(&path); + if (mkdir(RSTRING_PTR(path), mode) == -1) + rb_sys_fail(RSTRING_PTR(path)); + + return INT2FIX(0); +} + +/* + * call-seq: + * Dir.delete( string ) => 0 + * Dir.rmdir( string ) => 0 + * Dir.unlink( string ) => 0 + * + * Deletes the named directory. Raises a subclass of + * SystemCallError if the directory isn't empty. + */ +static VALUE +dir_s_rmdir(VALUE obj, VALUE dir) +{ + check_dirname(&dir); + if (rmdir(RSTRING_PTR(dir)) < 0) + rb_sys_fail(RSTRING_PTR(dir)); + + return INT2FIX(0); +} + +static void +sys_warning_1(const char* mesg) +{ + rb_sys_warning("%s", mesg); +} + +#define GLOB_VERBOSE (1UL << (sizeof(int) * CHAR_BIT - 1)) +#define sys_warning(val) \ + (void)((flags & GLOB_VERBOSE) && rb_protect((VALUE (*)(VALUE))sys_warning_1, (VALUE)(val), 0)) + +#define GLOB_ALLOC(type) (type *)malloc(sizeof(type)) +#define GLOB_ALLOC_N(type, n) (type *)malloc(sizeof(type) * (n)) +#define GLOB_FREE(ptr) free(ptr) +#define GLOB_JUMP_TAG(status) ((status == -1) ? rb_memerror() : rb_jump_tag(status)) + +/* + * ENOTDIR can be returned by stat(2) if a non-leaf element of the path + * is not a directory. + */ +#define to_be_ignored(e) ((e) == ENOENT || (e) == ENOTDIR) + +/* System call with warning */ +static int +do_stat(const char *path, struct stat *pst, int flags) + +{ + int ret = stat(path, pst); + if (ret < 0 && !to_be_ignored(errno)) + sys_warning(path); + + return ret; +} + +static int +do_lstat(const char *path, struct stat *pst, int flags) +{ + int ret = lstat(path, pst); + if (ret < 0 && !to_be_ignored(errno)) + sys_warning(path); + + return ret; +} + +static DIR * +do_opendir(const char *path, int flags) +{ + DIR *dirp = opendir(path); + if (dirp == NULL && !to_be_ignored(errno)) + sys_warning(path); + + return dirp; +} + +/* Return nonzero if S has any special globbing chars in it. */ +static int +has_magic(const char *s, int flags, rb_encoding *enc) +{ + const int escape = !(flags & FNM_NOESCAPE); + const int nocase = flags & FNM_CASEFOLD; + + register const char *p = s; + register const char *pend = p + strlen(p); + register char c; + + while ((c = *p++) != 0) { + switch (c) { + case '*': + case '?': + case '[': + return 1; + + case '\\': + if (escape && !(c = *p++)) + return 0; + continue; + + default: + if (!FNM_SYSCASE && ISALPHA(c) && nocase) + return 1; + } + + p = Next(p-1, pend, enc); + } + + return 0; +} + +/* Find separator in globbing pattern. */ +static char * +find_dirsep(const char *s, int flags, rb_encoding *enc) +{ + const int escape = !(flags & FNM_NOESCAPE); + + register const char *p = s; + register const char *pend = p + strlen(p); + register char c; + int open = 0; + + while ((c = *p++) != 0) { + switch (c) { + case '[': + open = 1; + continue; + case ']': + open = 0; + continue; + + case '/': + if (!open) + return (char *)p-1; + continue; + + case '\\': + if (escape && !(c = *p++)) + return (char *)p-1; + continue; + } + + p = Next(p-1, pend, enc); + } + + return (char *)p-1; +} + +/* Remove escaping backslashes */ +static void +remove_backslashes(char *p, rb_encoding *enc) +{ + register const char *pend = p + strlen(p); + char *t = p; + char *s = p; + + while (*p) { + if (*p == '\\') { + if (t != s) + memmove(t, s, p - s); + t += p - s; + s = ++p; + if (!*p) break; + } + Inc(p, pend, enc); + } + + while (*p++); + + if (t != s) + memmove(t, s, p - s); /* move '\0' too */ +} + +/* Globing pattern */ +enum glob_pattern_type { PLAIN, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; + +struct glob_pattern { + char *str; + enum glob_pattern_type type; + struct glob_pattern *next; +}; + +static void glob_free_pattern(struct glob_pattern *list); + +static struct glob_pattern * +glob_make_pattern(const char *p, int flags, rb_encoding *enc) +{ + struct glob_pattern *list, *tmp, **tail = &list; + int dirsep = 0; /* pattern is terminated with '/' */ + + while (*p) { + tmp = GLOB_ALLOC(struct glob_pattern); + if (!tmp) goto error; + if (p[0] == '*' && p[1] == '*' && p[2] == '/') { + /* fold continuous RECURSIVEs (needed in glob_helper) */ + do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/'); + tmp->type = RECURSIVE; + tmp->str = 0; + dirsep = 1; + } + else { + const char *m = find_dirsep(p, flags, enc); + char *buf = GLOB_ALLOC_N(char, m-p+1); + if (!buf) { + GLOB_FREE(tmp); + goto error; + } + memcpy(buf, p, m-p); + buf[m-p] = '\0'; + tmp->type = has_magic(buf, flags, enc) ? MAGICAL : PLAIN; + tmp->str = buf; + if (*m) { + dirsep = 1; + p = m + 1; + } + else { + dirsep = 0; + p = m; + } + } + *tail = tmp; + tail = &tmp->next; + } + + tmp = GLOB_ALLOC(struct glob_pattern); + if (!tmp) { + error: + *tail = 0; + glob_free_pattern(list); + return 0; + } + tmp->type = dirsep ? MATCH_DIR : MATCH_ALL; + tmp->str = 0; + *tail = tmp; + tmp->next = 0; + + return list; +} + +static void +glob_free_pattern(struct glob_pattern *list) +{ + while (list) { + struct glob_pattern *tmp = list; + list = list->next; + if (tmp->str) + GLOB_FREE(tmp->str); + GLOB_FREE(tmp); + } +} + +static char * +join_path(const char *path, int dirsep, const char *name) +{ + long len = strlen(path); + long len2 = strlen(name)+(dirsep?1:0)+1; + char *buf = GLOB_ALLOC_N(char, len+len2); + + if (!buf) return 0; + memcpy(buf, path, len); + if (dirsep) { + buf[len++] = '/'; + } + buf[len] = '\0'; + strlcat(buf+len, name, len2); + return buf; +} + +enum answer { YES, NO, UNKNOWN }; + +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + +#ifndef S_ISLNK +# ifndef S_IFLNK +# define S_ISLNK(m) (0) +# else +# define S_ISLNK(m) ((m & S_IFMT) == S_IFLNK) +# endif +#endif + +struct glob_args { + void (*func)(const char *, VALUE, void *); + const char *path; + VALUE value; + rb_encoding *enc; +}; + +static VALUE +glob_func_caller(VALUE val) +{ + struct glob_args *args = (struct glob_args *)val; + + (*args->func)(args->path, args->value, args->enc); + return Qnil; +} + +#define glob_call_func(func, path, arg, enc) (*func)(path, arg, enc) + +static int +glob_helper( + const char *path, + int dirsep, /* '/' should be placed before appending child entry's name to 'path'. */ + enum answer exist, /* Does 'path' indicate an existing entry? */ + enum answer isdir, /* Does 'path' indicate a directory or a symlink to a directory? */ + struct glob_pattern **beg, + struct glob_pattern **end, + int flags, + ruby_glob_func *func, + VALUE arg, + rb_encoding *enc) +{ + struct stat st; + int status = 0; + struct glob_pattern **cur, **new_beg, **new_end; + int plain = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; + int escape = !(flags & FNM_NOESCAPE); + + for (cur = beg; cur < end; ++cur) { + struct glob_pattern *p = *cur; + if (p->type == RECURSIVE) { + recursive = 1; + p = p->next; + } + switch (p->type) { + case PLAIN: + plain = 1; + break; + case MAGICAL: + magical = 1; + break; + case MATCH_ALL: + match_all = 1; + break; + case MATCH_DIR: + match_dir = 1; + break; + case RECURSIVE: + rb_bug("continuous RECURSIVEs"); + } + } + + if (*path) { + if (match_all && exist == UNKNOWN) { + if (do_lstat(path, &st, flags) == 0) { + exist = YES; + isdir = S_ISDIR(st.st_mode) ? YES : S_ISLNK(st.st_mode) ? UNKNOWN : NO; + } + else { + exist = NO; + isdir = NO; + } + } + if (match_dir && isdir == UNKNOWN) { + if (do_stat(path, &st, flags) == 0) { + exist = YES; + isdir = S_ISDIR(st.st_mode) ? YES : NO; + } + else { + exist = NO; + isdir = NO; + } + } + if (match_all && exist == YES) { + status = glob_call_func(func, path, arg, enc); + if (status) return status; + } + if (match_dir && isdir == YES) { + char *tmp = join_path(path, dirsep, ""); + if (!tmp) return -1; + status = glob_call_func(func, tmp, arg, enc); + GLOB_FREE(tmp); + if (status) return status; + } + } + + if (exist == NO || isdir == NO) return 0; + + if (magical || recursive) { + struct dirent *dp; + DIR *dirp; + IF_HAVE_READDIR_R(DEFINE_STRUCT_DIRENT entry); + dirp = do_opendir(*path ? path : ".", flags); + if (dirp == NULL) return 0; + + while (READDIR(dirp, enc, &STRUCT_DIRENT(entry), dp)) { + char *buf = join_path(path, dirsep, dp->d_name); + enum answer new_isdir = UNKNOWN; + + if (!buf) { + status = -1; + break; + } + if (recursive && strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0 + && fnmatch("*", rb_usascii_encoding(), dp->d_name, flags) == 0) { +#ifndef _WIN32 + if (do_lstat(buf, &st, flags) == 0) + new_isdir = S_ISDIR(st.st_mode) ? YES : S_ISLNK(st.st_mode) ? UNKNOWN : NO; + else + new_isdir = NO; +#else + new_isdir = dp->d_isdir ? (!dp->d_isrep ? YES : UNKNOWN) : NO; +#endif + } + + new_beg = new_end = GLOB_ALLOC_N(struct glob_pattern *, (end - beg) * 2); + if (!new_beg) { + GLOB_FREE(buf); + status = -1; + break; + } + + for (cur = beg; cur < end; ++cur) { + struct glob_pattern *p = *cur; + if (p->type == RECURSIVE) { + if (new_isdir == YES) /* not symlink but real directory */ + *new_end++ = p; /* append recursive pattern */ + p = p->next; /* 0 times recursion */ + } + if (p->type == PLAIN || p->type == MAGICAL) { + if (fnmatch(p->str, enc, dp->d_name, flags) == 0) + *new_end++ = p->next; + } + } + + status = glob_helper(buf, 1, YES, new_isdir, new_beg, new_end, + flags, func, arg, enc); + GLOB_FREE(buf); + GLOB_FREE(new_beg); + if (status) break; + } + + closedir(dirp); + } + else if (plain) { + struct glob_pattern **copy_beg, **copy_end, **cur2; + + copy_beg = copy_end = GLOB_ALLOC_N(struct glob_pattern *, end - beg); + if (!copy_beg) return -1; + for (cur = beg; cur < end; ++cur) + *copy_end++ = (*cur)->type == PLAIN ? *cur : 0; + + for (cur = copy_beg; cur < copy_end; ++cur) { + if (*cur) { + char *buf; + char *name; + size_t len = strlen((*cur)->str) + 1; + name = GLOB_ALLOC_N(char, len); + if (!name) { + status = -1; + break; + } + memcpy(name, (*cur)->str, len); + if (escape) remove_backslashes(name, enc); + + new_beg = new_end = GLOB_ALLOC_N(struct glob_pattern *, end - beg); + if (!new_beg) { + GLOB_FREE(name); + status = -1; + break; + } + *new_end++ = (*cur)->next; + for (cur2 = cur + 1; cur2 < copy_end; ++cur2) { + if (*cur2 && fnmatch((*cur2)->str, enc, name, flags) == 0) { + *new_end++ = (*cur2)->next; + *cur2 = 0; + } + } + + buf = join_path(path, dirsep, name); + GLOB_FREE(name); + if (!buf) { + GLOB_FREE(new_beg); + status = -1; + break; + } + status = glob_helper(buf, 1, UNKNOWN, UNKNOWN, new_beg, + new_end, flags, func, arg, enc); + GLOB_FREE(buf); + GLOB_FREE(new_beg); + if (status) break; + } + } + + GLOB_FREE(copy_beg); + } + + return status; +} + +static int +ruby_glob0(const char *path, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc) +{ + struct glob_pattern *list; + const char *root, *start; + char *buf; + int n; + int status; + + start = root = path; + flags |= FNM_SYSCASE; +#if defined DOSISH + root = rb_path_skip_prefix(root); +#endif + + if (root && *root == '/') root++; + + n = root - start; + buf = GLOB_ALLOC_N(char, n + 1); + if (!buf) return -1; + MEMCPY(buf, start, char, n); + buf[n] = '\0'; + + list = glob_make_pattern(root, flags, enc); + if (!list) { + GLOB_FREE(buf); + return -1; + } + status = glob_helper(buf, 0, UNKNOWN, UNKNOWN, &list, &list + 1, flags, func, arg, enc); + glob_free_pattern(list); + GLOB_FREE(buf); + + return status; +} + +int +ruby_glob(const char *path, int flags, ruby_glob_func *func, VALUE arg) +{ + return ruby_glob0(path, flags & ~GLOB_VERBOSE, func, arg, + rb_ascii8bit_encoding()); +} + +static int +rb_glob_caller(const char *path, VALUE a, void *enc) +{ + int status; + struct glob_args *args = (struct glob_args *)a; + + args->path = path; + rb_protect(glob_func_caller, a, &status); + return status; +} + +static int +rb_glob2(const char *path, int flags, + void (*func)(const char *, VALUE, void *), VALUE arg, + rb_encoding* enc) +{ + struct glob_args args; + + args.func = func; + args.value = arg; + args.enc = enc; + + if (flags & FNM_SYSCASE) { + rb_warning("Dir.glob() ignores File::FNM_CASEFOLD"); + } + + return ruby_glob0(path, flags | GLOB_VERBOSE, rb_glob_caller, (VALUE)&args, + enc); +} + +void +rb_glob(const char *path, void (*func)(const char *, VALUE, void *), VALUE arg) +{ + int status = rb_glob2(path, 0, func, arg, rb_ascii8bit_encoding()); + if (status) GLOB_JUMP_TAG(status); +} + +static void +push_pattern(const char *path, VALUE ary, void *enc) +{ + rb_ary_push(ary, rb_external_str_new_with_enc(path, strlen(path), enc)); +} + +static int +ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, + rb_encoding *enc) +{ + const int escape = !(flags & FNM_NOESCAPE); + const char *p = str; + const char *pend = p + strlen(p); + const char *s = p; + const char *lbrace = 0, *rbrace = 0; + int nest = 0, status = 0; + + while (*p) { + if (*p == '{' && nest++ == 0) { + lbrace = p; + } + if (*p == '}' && --nest <= 0) { + rbrace = p; + break; + } + if (*p == '\\' && escape) { + if (!*++p) break; + } + Inc(p, pend, enc); + } + + if (lbrace && rbrace) { + char *buf = GLOB_ALLOC_N(char, strlen(s) + 1); + long shift; + + if (!buf) return -1; + memcpy(buf, s, lbrace-s); + shift = (lbrace-s); + p = lbrace; + while (p < rbrace) { + const char *t = ++p; + nest = 0; + while (p < rbrace && !(*p == ',' && nest == 0)) { + if (*p == '{') nest++; + if (*p == '}') nest--; + if (*p == '\\' && escape) { + if (++p == rbrace) break; + } + Inc(p, pend, enc); + } + memcpy(buf+shift, t, p-t); + strcpy(buf+shift+(p-t), rbrace+1); + status = ruby_brace_expand(buf, flags, func, arg, enc); + if (status) break; + } + GLOB_FREE(buf); + } + else if (!lbrace && !rbrace) { + status = (*func)(s, arg, enc); + } + + return status; +} + +struct brace_args { + ruby_glob_func *func; + VALUE value; + int flags; +}; + +static int +glob_brace(const char *path, VALUE val, void *enc) +{ + struct brace_args *arg = (struct brace_args *)val; + + return ruby_glob0(path, arg->flags, arg->func, arg->value, enc); +} + +static int +ruby_brace_glob0(const char *str, int flags, ruby_glob_func *func, VALUE arg, + rb_encoding* enc) +{ + struct brace_args args; + + args.func = func; + args.value = arg; + args.flags = flags; + return ruby_brace_expand(str, flags, glob_brace, (VALUE)&args, enc); +} + +int +ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg) +{ + return ruby_brace_glob0(str, flags & ~GLOB_VERBOSE, func, arg, + rb_ascii8bit_encoding()); +} + +static int +push_glob(VALUE ary, VALUE str, int flags) +{ + struct glob_args args; + rb_encoding *enc = rb_enc_get(str); + + if (enc == rb_usascii_encoding()) enc = rb_filesystem_encoding(); + args.func = push_pattern; + args.value = ary; + args.enc = enc; + + return ruby_brace_glob0(StringValuePtr(str), flags | GLOB_VERBOSE, + rb_glob_caller, (VALUE)&args, enc); +} + +static VALUE +rb_push_glob(VALUE str, int flags) /* '\0' is delimiter */ +{ + long offset = 0; + VALUE ary; + + StringValue(str); + ary = rb_ary_new(); + + while (offset < RSTRING_LEN(str)) { + char *p, *pend; + int status; + p = RSTRING_PTR(str) + offset; + status = push_glob(ary, rb_enc_str_new(p, strlen(p), rb_enc_get(str)), + flags); + if (status) GLOB_JUMP_TAG(status); + if (offset >= RSTRING_LEN(str)) break; + p += strlen(p) + 1; + pend = RSTRING_PTR(str) + RSTRING_LEN(str); + while (p < pend && !*p) + p++; + offset = p - RSTRING_PTR(str); + } + + return ary; +} + +static VALUE +dir_globs(long argc, VALUE *argv, int flags) +{ + VALUE ary = rb_ary_new(); + long i; + + for (i = 0; i < argc; ++i) { + int status; + VALUE str = argv[i]; + SafeStringValue(str); + status = push_glob(ary, str, flags); + if (status) GLOB_JUMP_TAG(status); + } + + return ary; +} + +/* + * call-seq: + * Dir[ array ] => array + * Dir[ string [, string ...] ] => array + * + * Equivalent to calling + * Dir.glob(array,0) and + * Dir.glob([string,...],0). + * + */ +static VALUE +dir_s_aref(int argc, VALUE *argv, VALUE obj) +{ + if (argc == 1) { + return rb_push_glob(argv[0], 0); + } + return dir_globs(argc, argv, 0); +} + +/* + * call-seq: + * Dir.glob( pattern, [flags] ) => array + * Dir.glob( pattern, [flags] ) {| filename | block } => nil + * + * Returns the filenames found by expanding pattern which is + * an +Array+ of the patterns or the pattern +String+, either as an + * array or as parameters to the block. Note that this pattern + * is not a regexp (it's closer to a shell glob). See + * File::fnmatch for the meaning of the flags + * parameter. Note that case sensitivity depends on your system (so + * File::FNM_CASEFOLD is ignored) + * + * *:: Matches any file. Can be restricted by + * other values in the glob. * + * will match all files; c* will + * match all files beginning with + * c; *c will match + * all files ending with c; and + * \*c\* will match all files that + * have c in them (including at + * the beginning or end). Equivalent to + * / .* /x in regexp. + * **:: Matches directories recursively. + * ?:: Matches any one character. Equivalent to + * /.{1}/ in regexp. + * [set]:: Matches any one character in +set+. + * Behaves exactly like character sets in + * Regexp, including set negation + * ([^a-z]). + * {p,q}:: Matches either literal p or + * literal q. Matching literals + * may be more than one character in length. + * More than two literals may be specified. + * Equivalent to pattern alternation in + * regexp. + * \:: Escapes the next metacharacter. + * + * Dir["config.?"] #=> ["config.h"] + * Dir.glob("config.?") #=> ["config.h"] + * Dir.glob("*.[a-z][a-z]") #=> ["main.rb"] + * Dir.glob("*.[^r]*") #=> ["config.h"] + * Dir.glob("*.{rb,h}") #=> ["main.rb", "config.h"] + * Dir.glob("*") #=> ["config.h", "main.rb"] + * Dir.glob("*", File::FNM_DOTMATCH) #=> [".", "..", "config.h", "main.rb"] + * + * rbfiles = File.join("**", "*.rb") + * Dir.glob(rbfiles) #=> ["main.rb", + * # "lib/song.rb", + * # "lib/song/karaoke.rb"] + * libdirs = File.join("**", "lib") + * Dir.glob(libdirs) #=> ["lib"] + * + * librbfiles = File.join("**", "lib", "**", "*.rb") + * Dir.glob(librbfiles) #=> ["lib/song.rb", + * # "lib/song/karaoke.rb"] + * + * librbfiles = File.join("**", "lib", "*.rb") + * Dir.glob(librbfiles) #=> ["lib/song.rb"] + */ +static VALUE +dir_s_glob(int argc, VALUE *argv, VALUE obj) +{ + VALUE str, rflags, ary; + int flags; + + if (rb_scan_args(argc, argv, "11", &str, &rflags) == 2) + flags = NUM2INT(rflags); + else + flags = 0; + + ary = rb_check_array_type(str); + if (NIL_P(ary)) { + ary = rb_push_glob(str, flags); + } + else { + volatile VALUE v = ary; + ary = dir_globs(RARRAY_LEN(v), RARRAY_PTR(v), flags); + } + + if (rb_block_given_p()) { + rb_ary_each(ary); + return Qnil; + } + return ary; +} + +static VALUE +dir_open_dir(int argc, VALUE *argv) +{ + VALUE dir = rb_funcall2(rb_cDir, rb_intern("open"), argc, argv); + + if (TYPE(dir) != T_DATA || + RDATA(dir)->dfree != (RUBY_DATA_FUNC)free_dir) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Dir)", + rb_obj_classname(dir)); + } + return dir; +} + + +/* + * call-seq: + * Dir.foreach( dirname ) {| filename | block } => nil + * + * Calls the block once for each entry in the named directory, passing + * the filename of each entry as a parameter to the block. + * + * Dir.foreach("testdir") {|x| puts "Got #{x}" } + * + * produces: + * + * Got . + * Got .. + * Got config.h + * Got main.rb + * + */ +static VALUE +dir_foreach(int argc, VALUE *argv, VALUE io) +{ + VALUE dir; + + RETURN_ENUMERATOR(io, argc, argv); + dir = dir_open_dir(argc, argv); + rb_ensure(dir_each, dir, dir_close, dir); + return Qnil; +} + +/* + * call-seq: + * Dir.entries( dirname ) => array + * + * Returns an array containing all of the filenames in the given + * directory. Will raise a SystemCallError if the named + * directory doesn't exist. + * + * Dir.entries("testdir") #=> [".", "..", "config.h", "main.rb"] + * + */ +static VALUE +dir_entries(int argc, VALUE *argv, VALUE io) +{ + VALUE dir; + + dir = dir_open_dir(argc, argv); + return rb_ensure(rb_Array, dir, dir_close, dir); +} + +/* + * call-seq: + * File.fnmatch( pattern, path, [flags] ) => (true or false) + * File.fnmatch?( pattern, path, [flags] ) => (true or false) + * + * Returns true if path matches against pattern The + * pattern is not a regular expression; instead it follows rules + * similar to shell filename globbing. It may contain the following + * metacharacters: + * + * *:: Matches any file. Can be restricted by + * other values in the glob. * + * will match all files; c* will + * match all files beginning with + * c; *c will match + * all files ending with c; and + * *c* will match all files that + * have c in them (including at + * the beginning or end). Equivalent to + * / .* /x in regexp. + * **:: Matches directories recursively or files + * expansively. + * ?:: Matches any one character. Equivalent to + * /.{1}/ in regexp. + * [set]:: Matches any one character in +set+. + * Behaves exactly like character sets in + * Regexp, including set negation + * ([^a-z]). + * \:: Escapes the next metacharacter. + * + * flags is a bitwise OR of the FNM_xxx + * parameters. The same glob pattern and flags are used by + * Dir::glob. + * + * File.fnmatch('cat', 'cat') #=> true # match entire string + * File.fnmatch('cat', 'category') #=> false # only match partial string + * File.fnmatch('c{at,ub}s', 'cats') #=> false # { } isn't supported + * + * File.fnmatch('c?t', 'cat') #=> true # '?' match only 1 character + * File.fnmatch('c??t', 'cat') #=> false # ditto + * File.fnmatch('c*', 'cats') #=> true # '*' match 0 or more characters + * File.fnmatch('c*t', 'c/a/b/t') #=> true # ditto + * File.fnmatch('ca[a-z]', 'cat') #=> true # inclusive bracket expression + * File.fnmatch('ca[^t]', 'cat') #=> false # exclusive bracket expression ('^' or '!') + * + * File.fnmatch('cat', 'CAT') #=> false # case sensitive + * File.fnmatch('cat', 'CAT', File::FNM_CASEFOLD) #=> true # case insensitive + * + * File.fnmatch('?', '/', File::FNM_PATHNAME) #=> false # wildcard doesn't match '/' on FNM_PATHNAME + * File.fnmatch('*', '/', File::FNM_PATHNAME) #=> false # ditto + * File.fnmatch('[/]', '/', File::FNM_PATHNAME) #=> false # ditto + * + * File.fnmatch('\?', '?') #=> true # escaped wildcard becomes ordinary + * File.fnmatch('\a', 'a') #=> true # escaped ordinary remains ordinary + * File.fnmatch('\a', '\a', File::FNM_NOESCAPE) #=> true # FNM_NOESACPE makes '\' ordinary + * File.fnmatch('[\?]', '?') #=> true # can escape inside bracket expression + * + * File.fnmatch('*', '.profile') #=> false # wildcard doesn't match leading + * File.fnmatch('*', '.profile', File::FNM_DOTMATCH) #=> true # period by default. + * File.fnmatch('.*', '.profile') #=> true + * + * rbfiles = '**' '/' '*.rb' # you don't have to do like this. just write in single string. + * File.fnmatch(rbfiles, 'main.rb') #=> false + * File.fnmatch(rbfiles, './main.rb') #=> false + * File.fnmatch(rbfiles, 'lib/song.rb') #=> true + * File.fnmatch('**.rb', 'main.rb') #=> true + * File.fnmatch('**.rb', './main.rb') #=> false + * File.fnmatch('**.rb', 'lib/song.rb') #=> true + * File.fnmatch('*', 'dave/.profile') #=> true + * + * pattern = '*' '/' '*' + * File.fnmatch(pattern, 'dave/.profile', File::FNM_PATHNAME) #=> false + * File.fnmatch(pattern, 'dave/.profile', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true + * + * pattern = '**' '/' 'foo' + * File.fnmatch(pattern, 'a/b/c/foo', File::FNM_PATHNAME) #=> true + * File.fnmatch(pattern, '/a/b/c/foo', File::FNM_PATHNAME) #=> true + * File.fnmatch(pattern, 'c:/a/b/c/foo', File::FNM_PATHNAME) #=> true + * File.fnmatch(pattern, 'a/.b/c/foo', File::FNM_PATHNAME) #=> false + * File.fnmatch(pattern, 'a/.b/c/foo', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true + */ +static VALUE +file_s_fnmatch(int argc, VALUE *argv, VALUE obj) +{ + VALUE pattern, path; + VALUE rflags; + int flags; + + if (rb_scan_args(argc, argv, "21", &pattern, &path, &rflags) == 3) + flags = NUM2INT(rflags); + else + flags = 0; + + StringValue(pattern); + FilePathStringValue(path); + + if (fnmatch(RSTRING_PTR(pattern), rb_enc_get(pattern), RSTRING_PTR(path), + flags) == 0) + return Qtrue; + + return Qfalse; +} + +/* + * Objects of class Dir are directory streams representing + * directories in the underlying file system. They provide a variety of + * ways to list directories and their contents. See also + * File. + * + * The directory used in these examples contains the two regular files + * (config.h and main.rb), the parent + * directory (..), and the directory itself + * (.). + */ +void +Init_Dir(void) +{ + rb_cDir = rb_define_class("Dir", rb_cObject); + + rb_include_module(rb_cDir, rb_mEnumerable); + + rb_define_alloc_func(rb_cDir, dir_s_alloc); + rb_define_singleton_method(rb_cDir, "open", dir_s_open, -1); + rb_define_singleton_method(rb_cDir, "foreach", dir_foreach, -1); + rb_define_singleton_method(rb_cDir, "entries", dir_entries, -1); + + rb_define_method(rb_cDir,"initialize", dir_initialize, -1); + rb_define_method(rb_cDir,"path", dir_path, 0); + rb_define_method(rb_cDir,"inspect", dir_inspect, 0); + rb_define_method(rb_cDir,"read", dir_read, 0); + rb_define_method(rb_cDir,"each", dir_each, 0); + rb_define_method(rb_cDir,"rewind", dir_rewind, 0); + rb_define_method(rb_cDir,"tell", dir_tell, 0); + rb_define_method(rb_cDir,"seek", dir_seek, 1); + rb_define_method(rb_cDir,"pos", dir_tell, 0); + rb_define_method(rb_cDir,"pos=", dir_set_pos, 1); + rb_define_method(rb_cDir,"close", dir_close, 0); + + rb_define_singleton_method(rb_cDir,"chdir", dir_s_chdir, -1); + rb_define_singleton_method(rb_cDir,"getwd", dir_s_getwd, 0); + rb_define_singleton_method(rb_cDir,"pwd", dir_s_getwd, 0); + rb_define_singleton_method(rb_cDir,"chroot", dir_s_chroot, 1); + rb_define_singleton_method(rb_cDir,"mkdir", dir_s_mkdir, -1); + rb_define_singleton_method(rb_cDir,"rmdir", dir_s_rmdir, 1); + rb_define_singleton_method(rb_cDir,"delete", dir_s_rmdir, 1); + rb_define_singleton_method(rb_cDir,"unlink", dir_s_rmdir, 1); + + rb_define_singleton_method(rb_cDir,"glob", dir_s_glob, -1); + rb_define_singleton_method(rb_cDir,"[]", dir_s_aref, -1); + rb_define_singleton_method(rb_cDir,"exist?", rb_file_directory_p, 1); /* in file.c */ + rb_define_singleton_method(rb_cDir,"exists?", rb_file_directory_p, 1); /* in file.c */ + + rb_define_singleton_method(rb_cFile,"fnmatch", file_s_fnmatch, -1); + rb_define_singleton_method(rb_cFile,"fnmatch?", file_s_fnmatch, -1); + + rb_file_const("FNM_NOESCAPE", INT2FIX(FNM_NOESCAPE)); + rb_file_const("FNM_PATHNAME", INT2FIX(FNM_PATHNAME)); + rb_file_const("FNM_DOTMATCH", INT2FIX(FNM_DOTMATCH)); + rb_file_const("FNM_CASEFOLD", INT2FIX(FNM_CASEFOLD)); + rb_file_const("FNM_SYSCASE", INT2FIX(FNM_SYSCASE)); +} diff --git a/dln.c b/dln.c new file mode 100644 index 0000000..a8f3b72 --- /dev/null +++ b/dln.c @@ -0,0 +1,1693 @@ +/********************************************************************** + + dln.c - + + $Author: yugui $ + created at: Tue Jan 18 17:05:06 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "dln.h" + +#ifdef HAVE_STDLIB_H +# include +#endif + +#ifdef __CHECKER__ +#undef HAVE_DLOPEN +#undef USE_DLN_A_OUT +#undef USE_DLN_DLOPEN +#endif + +#ifdef USE_DLN_A_OUT +char *dln_argv0; +#endif + +#if defined(HAVE_ALLOCA_H) +#include +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#ifndef xmalloc +void *xmalloc(); +void *xcalloc(); +void *xrealloc(); +#endif + +#define free(x) xfree(x) + +#include +#if defined(_WIN32) +#include "missing/file.h" +#endif +#include +#include + +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + +#ifdef HAVE_SYS_PARAM_H +# include +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif + +#ifdef HAVE_UNISTD_H +# include +#endif + +#ifndef _WIN32 +char *getenv(); +#endif + +#if defined(__APPLE__) && defined(__MACH__) /* Mac OS X */ +# if defined(HAVE_DLOPEN) + /* Mac OS X with dlopen (10.3 or later) */ +# define MACOSX_DLOPEN +# else +# define MACOSX_DYLD +# endif +#endif + +#ifdef __BEOS__ +# include +#endif + +#ifndef NO_DLN_LOAD + +#if defined(HAVE_DLOPEN) && !defined(USE_DLN_A_OUT) && !defined(_AIX) && !defined(MACOSX_DYLD) && !defined(_UNICOSMP) +/* dynamic load with dlopen() */ +# define USE_DLN_DLOPEN +#endif + +#ifndef FUNCNAME_PATTERN +# if defined(__hp9000s300) || (defined(__NetBSD__) && !defined(__ELF__)) || defined(__BORLANDC__) || (defined(__FreeBSD__) && !defined(__ELF__)) || (defined(__OpenBSD__) && !defined(__ELF__)) || defined(NeXT) || defined(__WATCOMC__) || defined(MACOSX_DYLD) +# define FUNCNAME_PATTERN "_Init_%s" +# else +# define FUNCNAME_PATTERN "Init_%s" +# endif +#endif + +static int +init_funcname_len(char **buf, const char *file) +{ + char *p; + const char *slash; + int len; + + /* Load the file as an object one */ + for (slash = file-1; *file; file++) /* Find position of last '/' */ + if (*file == '/') slash = file; + + len = strlen(FUNCNAME_PATTERN) + strlen(slash + 1); + *buf = xmalloc(len); + snprintf(*buf, len, FUNCNAME_PATTERN, slash + 1); + for (p = *buf; *p; p++) { /* Delete suffix if it exists */ + if (*p == '.') { + *p = '\0'; break; + } + } + return p - *buf; +} + +#define init_funcname(buf, file) do {\ + int len = init_funcname_len(buf, file);\ + char *tmp = ALLOCA_N(char, len+1);\ + if (!tmp) {\ + free(*buf);\ + rb_memerror();\ + }\ + strcpy(tmp, *buf);\ + free(*buf);\ + *buf = tmp;\ +} while (0) + +#ifdef USE_DLN_A_OUT + +#ifndef LIBC_NAME +# define LIBC_NAME "libc.a" +#endif + +#ifndef DLN_DEFAULT_LIB_PATH +# define DLN_DEFAULT_LIB_PATH "/lib:/usr/lib:/usr/local/lib:." +#endif + +#include + +static int dln_errno; + +#define DLN_ENOEXEC ENOEXEC /* Exec format error */ +#define DLN_ECONFL 1201 /* Symbol name conflict */ +#define DLN_ENOINIT 1202 /* No initializer given */ +#define DLN_EUNDEF 1203 /* Undefine symbol remains */ +#define DLN_ENOTLIB 1204 /* Not a library file */ +#define DLN_EBADLIB 1205 /* Malformed library file */ +#define DLN_EINIT 1206 /* Not initialized */ + +static int dln_init_p = 0; + +#include +#include +#ifndef N_COMM +# define N_COMM 0x12 +#endif +#ifndef N_MAGIC +# define N_MAGIC(x) (x).a_magic +#endif + +#define INVALID_OBJECT(h) (N_MAGIC(h) != OMAGIC) + +#include "ruby/util.h" +#include "ruby/st.h" + +static st_table *sym_tbl; +static st_table *undef_tbl; + +static int load_lib(); + +static int +load_header(int fd, struct exec *hdrp, long disp) +{ + int size; + + lseek(fd, disp, 0); + size = read(fd, hdrp, sizeof(struct exec)); + if (size == -1) { + dln_errno = errno; + return -1; + } + if (size != sizeof(struct exec) || N_BADMAG(*hdrp)) { + dln_errno = DLN_ENOEXEC; + return -1; + } + return 0; +} + +#if defined(sequent) +#define RELOC_SYMBOL(r) ((r)->r_symbolnum) +#define RELOC_MEMORY_SUB_P(r) ((r)->r_bsr) +#define RELOC_PCREL_P(r) ((r)->r_pcrel || (r)->r_bsr) +#define RELOC_TARGET_SIZE(r) ((r)->r_length) +#endif + +/* Default macros */ +#ifndef RELOC_ADDRESS +#define RELOC_ADDRESS(r) ((r)->r_address) +#define RELOC_EXTERN_P(r) ((r)->r_extern) +#define RELOC_SYMBOL(r) ((r)->r_symbolnum) +#define RELOC_MEMORY_SUB_P(r) 0 +#define RELOC_PCREL_P(r) ((r)->r_pcrel) +#define RELOC_TARGET_SIZE(r) ((r)->r_length) +#endif + +#if defined(sun) && defined(sparc) +/* Sparc (Sun 4) macros */ +# undef relocation_info +# define relocation_info reloc_info_sparc +# define R_RIGHTSHIFT(r) (reloc_r_rightshift[(r)->r_type]) +# define R_BITSIZE(r) (reloc_r_bitsize[(r)->r_type]) +# define R_LENGTH(r) (reloc_r_length[(r)->r_type]) +static int reloc_r_rightshift[] = { + 0, 0, 0, 0, 0, 0, 2, 2, 10, 0, 0, 0, 0, 0, 0, +}; +static int reloc_r_bitsize[] = { + 8, 16, 32, 8, 16, 32, 30, 22, 22, 22, 13, 10, 32, 32, 16, +}; +static int reloc_r_length[] = { + 0, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; +# define R_PCREL(r) \ + ((r)->r_type >= RELOC_DISP8 && (r)->r_type <= RELOC_WDISP22) +# define R_SYMBOL(r) ((r)->r_index) +#endif + +#if defined(sequent) +#define R_SYMBOL(r) ((r)->r_symbolnum) +#define R_MEMORY_SUB(r) ((r)->r_bsr) +#define R_PCREL(r) ((r)->r_pcrel || (r)->r_bsr) +#define R_LENGTH(r) ((r)->r_length) +#endif + +#ifndef R_SYMBOL +# define R_SYMBOL(r) ((r)->r_symbolnum) +# define R_MEMORY_SUB(r) 0 +# define R_PCREL(r) ((r)->r_pcrel) +# define R_LENGTH(r) ((r)->r_length) +#endif + +static struct relocation_info * +load_reloc(int fd, struct exec *hdrp, long disp) +{ + struct relocation_info *reloc; + int size; + + lseek(fd, disp + N_TXTOFF(*hdrp) + hdrp->a_text + hdrp->a_data, 0); + size = hdrp->a_trsize + hdrp->a_drsize; + reloc = (struct relocation_info*)xmalloc(size); + if (reloc == NULL) { + dln_errno = errno; + return NULL; + } + + if (read(fd, reloc, size) != size) { + dln_errno = errno; + free(reloc); + return NULL; + } + + return reloc; +} + +static struct nlist * +load_sym(int fd, struct exec *hdrp, long disp) +{ + struct nlist * buffer; + struct nlist * sym; + struct nlist * end; + long displ; + int size; + + lseek(fd, N_SYMOFF(*hdrp) + hdrp->a_syms + disp, 0); + if (read(fd, &size, sizeof(int)) != sizeof(int)) { + goto err_noexec; + } + + buffer = (struct nlist*)xmalloc(hdrp->a_syms + size); + if (buffer == NULL) { + dln_errno = errno; + return NULL; + } + + lseek(fd, disp + N_SYMOFF(*hdrp), 0); + if (read(fd, buffer, hdrp->a_syms + size) != hdrp->a_syms + size) { + free(buffer); + goto err_noexec; + } + + sym = buffer; + end = sym + hdrp->a_syms / sizeof(struct nlist); + displ = (long)buffer + (long)(hdrp->a_syms); + + while (sym < end) { + sym->n_un.n_name = (char*)sym->n_un.n_strx + displ; + sym++; + } + return buffer; + + err_noexec: + dln_errno = DLN_ENOEXEC; + return NULL; +} + +static st_table * +sym_hash(struct exec *hdrp, struct nlist *syms) +{ + st_table *tbl; + struct nlist *sym = syms; + struct nlist *end = syms + (hdrp->a_syms / sizeof(struct nlist)); + + tbl = st_init_strtable(); + if (tbl == NULL) { + dln_errno = errno; + return NULL; + } + + while (sym < end) { + st_insert(tbl, sym->n_un.n_name, sym); + sym++; + } + return tbl; +} + +static int +dln_init(const char *prog) +{ + char *file, fbuf[MAXPATHLEN]; + int fd; + struct exec hdr; + struct nlist *syms; + + if (dln_init_p == 1) return 0; + + file = dln_find_exe_r(prog, NULL, fbuf, sizeof(fbuf)); + if (file == NULL || (fd = open(file, O_RDONLY)) < 0) { + dln_errno = errno; + return -1; + } + + if (load_header(fd, &hdr, 0) == -1) return -1; + syms = load_sym(fd, &hdr, 0); + if (syms == NULL) { + close(fd); + return -1; + } + sym_tbl = sym_hash(&hdr, syms); + if (sym_tbl == NULL) { /* file may be start with #! */ + char c = '\0'; + char buf[MAXPATHLEN]; + char *p; + + free(syms); + lseek(fd, 0L, 0); + if (read(fd, &c, 1) == -1) { + dln_errno = errno; + return -1; + } + if (c != '#') goto err_noexec; + if (read(fd, &c, 1) == -1) { + dln_errno = errno; + return -1; + } + if (c != '!') goto err_noexec; + + p = buf; + /* skip forwarding spaces */ + while (read(fd, &c, 1) == 1) { + if (c == '\n') goto err_noexec; + if (c != '\t' && c != ' ') { + *p++ = c; + break; + } + } + /* read in command name */ + while (read(fd, p, 1) == 1) { + if (*p == '\n' || *p == '\t' || *p == ' ') break; + p++; + if (p-buf >= MAXPATHLEN) { + dln_errno = ENAMETOOLONG; + return -1; + } + } + *p = '\0'; + + return dln_init(buf); + } + dln_init_p = 1; + undef_tbl = st_init_strtable(); + close(fd); + return 0; + + err_noexec: + close(fd); + dln_errno = DLN_ENOEXEC; + return -1; +} + +static long +load_text_data(int fd, struct exec *hdrp, int bss, long disp) +{ + int size; + unsigned char* addr; + + lseek(fd, disp + N_TXTOFF(*hdrp), 0); + size = hdrp->a_text + hdrp->a_data; + + if (bss == -1) size += hdrp->a_bss; + else if (bss > 1) size += bss; + + addr = (unsigned char*)xmalloc(size); + if (addr == NULL) { + dln_errno = errno; + return 0; + } + + if (read(fd, addr, size) != size) { + dln_errno = errno; + free(addr); + return 0; + } + + if (bss == -1) { + memset(addr + hdrp->a_text + hdrp->a_data, 0, hdrp->a_bss); + } + else if (bss > 0) { + memset(addr + hdrp->a_text + hdrp->a_data, 0, bss); + } + + return (long)addr; +} + +static int +undef_print(char *key, char *value) +{ + fprintf(stderr, " %s\n", key); + return ST_CONTINUE; +} + +static void +dln_print_undef() +{ + fprintf(stderr, " Undefined symbols:\n"); + st_foreach(undef_tbl, undef_print, NULL); +} + +static void +dln_undefined() +{ + if (undef_tbl->num_entries > 0) { + fprintf(stderr, "dln: Calling undefined function\n"); + dln_print_undef(); + rb_exit(1); + } +} + +struct undef { + char *name; + struct relocation_info reloc; + long base; + char *addr; + union { + char c; + short s; + long l; + } u; +}; + +static st_table *reloc_tbl = NULL; +static void +link_undef(const char *name, long base, struct relocation_info *reloc) +{ + static int u_no = 0; + struct undef *obj; + char *addr = (char*)(reloc->r_address + base); + + obj = (struct undef*)xmalloc(sizeof(struct undef)); + obj->name = strdup(name); + obj->reloc = *reloc; + obj->base = base; + switch (R_LENGTH(reloc)) { + case 0: /* byte */ + obj->u.c = *addr; + break; + case 1: /* word */ + obj->u.s = *(short*)addr; + break; + case 2: /* long */ + obj->u.l = *(long*)addr; + break; + } + if (reloc_tbl == NULL) { + reloc_tbl = st_init_numtable(); + } + st_insert(reloc_tbl, u_no++, obj); +} + +struct reloc_arg { + const char *name; + long value; +}; + +static int +reloc_undef(int no, struct undef *undef, struct reloc_arg *arg) +{ + int datum; + char *address; +#if defined(sun) && defined(sparc) + unsigned int mask = 0; +#endif + + if (strcmp(arg->name, undef->name) != 0) return ST_CONTINUE; + address = (char*)(undef->base + undef->reloc.r_address); + datum = arg->value; + + if (R_PCREL(&(undef->reloc))) datum -= undef->base; +#if defined(sun) && defined(sparc) + datum += undef->reloc.r_addend; + datum >>= R_RIGHTSHIFT(&(undef->reloc)); + mask = (1 << R_BITSIZE(&(undef->reloc))) - 1; + mask |= mask -1; + datum &= mask; + switch (R_LENGTH(&(undef->reloc))) { + case 0: + *address = undef->u.c; + *address &= ~mask; + *address |= datum; + break; + case 1: + *(short *)address = undef->u.s; + *(short *)address &= ~mask; + *(short *)address |= datum; + break; + case 2: + *(long *)address = undef->u.l; + *(long *)address &= ~mask; + *(long *)address |= datum; + break; + } +#else + switch (R_LENGTH(&(undef->reloc))) { + case 0: /* byte */ + if (R_MEMORY_SUB(&(undef->reloc))) + *address = datum - *address; + else *address = undef->u.c + datum; + break; + case 1: /* word */ + if (R_MEMORY_SUB(&(undef->reloc))) + *(short*)address = datum - *(short*)address; + else *(short*)address = undef->u.s + datum; + break; + case 2: /* long */ + if (R_MEMORY_SUB(&(undef->reloc))) + *(long*)address = datum - *(long*)address; + else *(long*)address = undef->u.l + datum; + break; + } +#endif + free(undef->name); + free(undef); + return ST_DELETE; +} + +static void +unlink_undef(const char *name, long value) +{ + struct reloc_arg arg; + + arg.name = name; + arg.value = value; + st_foreach(reloc_tbl, reloc_undef, &arg); +} + +#ifdef N_INDR +struct indr_data { + char *name0, *name1; +}; + +static int +reloc_repl(int no, struct undef *undef, struct indr_data *data) +{ + if (strcmp(data->name0, undef->name) == 0) { + free(undef->name); + undef->name = strdup(data->name1); + } + return ST_CONTINUE; +} +#endif + +static int +load_1(int fd, long disp, const char *need_init) +{ + static const char *libc = LIBC_NAME; + struct exec hdr; + struct relocation_info *reloc = NULL; + long block = 0; + long new_common = 0; /* Length of new common */ + struct nlist *syms = NULL; + struct nlist *sym; + struct nlist *end; + int init_p = 0; + + if (load_header(fd, &hdr, disp) == -1) return -1; + if (INVALID_OBJECT(hdr)) { + dln_errno = DLN_ENOEXEC; + return -1; + } + reloc = load_reloc(fd, &hdr, disp); + if (reloc == NULL) return -1; + + syms = load_sym(fd, &hdr, disp); + if (syms == NULL) { + free(reloc); + return -1; + } + + sym = syms; + end = syms + (hdr.a_syms / sizeof(struct nlist)); + while (sym < end) { + struct nlist *old_sym; + int value = sym->n_value; + +#ifdef N_INDR + if (sym->n_type == (N_INDR | N_EXT)) { + char *key = sym->n_un.n_name; + + if (st_lookup(sym_tbl, sym[1].n_un.n_name, &old_sym)) { + if (st_delete(undef_tbl, (st_data_t*)&key, NULL)) { + unlink_undef(key, old_sym->n_value); + free(key); + } + } + else { + struct indr_data data; + + data.name0 = sym->n_un.n_name; + data.name1 = sym[1].n_un.n_name; + st_foreach(reloc_tbl, reloc_repl, &data); + + st_insert(undef_tbl, strdup(sym[1].n_un.n_name), NULL); + if (st_delete(undef_tbl, (st_data_t*)&key, NULL)) { + free(key); + } + } + sym += 2; + continue; + } +#endif + if (sym->n_type == (N_UNDF | N_EXT)) { + if (st_lookup(sym_tbl, sym->n_un.n_name, &old_sym) == 0) { + old_sym = NULL; + } + + if (value) { + if (old_sym) { + sym->n_type = N_EXT | N_COMM; + sym->n_value = old_sym->n_value; + } + else { + int rnd = + value >= sizeof(double) ? sizeof(double) - 1 + : value >= sizeof(long) ? sizeof(long) - 1 + : sizeof(short) - 1; + + sym->n_type = N_COMM; + new_common += rnd; + new_common &= ~(long)rnd; + sym->n_value = new_common; + new_common += value; + } + } + else { + if (old_sym) { + sym->n_type = N_EXT | N_COMM; + sym->n_value = old_sym->n_value; + } + else { + sym->n_value = (long)dln_undefined; + st_insert(undef_tbl, strdup(sym->n_un.n_name), NULL); + } + } + } + sym++; + } + + block = load_text_data(fd, &hdr, hdr.a_bss + new_common, disp); + if (block == 0) goto err_exit; + + sym = syms; + while (sym < end) { + struct nlist *new_sym; + char *key; + + switch (sym->n_type) { + case N_COMM: + sym->n_value += hdr.a_text + hdr.a_data; + case N_TEXT|N_EXT: + case N_DATA|N_EXT: + + sym->n_value += block; + + if (st_lookup(sym_tbl, sym->n_un.n_name, &new_sym) != 0 + && new_sym->n_value != (long)dln_undefined) { + dln_errno = DLN_ECONFL; + goto err_exit; + } + + key = sym->n_un.n_name; + if (st_delete(undef_tbl, (st_data_t*)&key, NULL) != 0) { + unlink_undef(key, sym->n_value); + free(key); + } + + new_sym = (struct nlist*)xmalloc(sizeof(struct nlist)); + *new_sym = *sym; + new_sym->n_un.n_name = strdup(sym->n_un.n_name); + st_insert(sym_tbl, new_sym->n_un.n_name, new_sym); + break; + + case N_TEXT: + case N_DATA: + sym->n_value += block; + break; + } + sym++; + } + + /* + * First comes the text-relocation + */ + { + struct relocation_info * rel = reloc; + struct relocation_info * rel_beg = reloc + + (hdr.a_trsize/sizeof(struct relocation_info)); + struct relocation_info * rel_end = reloc + + (hdr.a_trsize+hdr.a_drsize)/sizeof(struct relocation_info); + + while (rel < rel_end) { + char *address = (char*)(rel->r_address + block); + long datum = 0; +#if defined(sun) && defined(sparc) + unsigned int mask = 0; +#endif + + if(rel >= rel_beg) + address += hdr.a_text; + + if (rel->r_extern) { /* Look it up in symbol-table */ + sym = &(syms[R_SYMBOL(rel)]); + switch (sym->n_type) { + case N_EXT|N_UNDF: + link_undef(sym->n_un.n_name, block, rel); + case N_EXT|N_COMM: + case N_COMM: + datum = sym->n_value; + break; + default: + goto err_exit; + } + } /* end.. look it up */ + else { /* is static */ + switch (R_SYMBOL(rel)) { + case N_TEXT: + case N_DATA: + datum = block; + break; + case N_BSS: + datum = block + new_common; + break; + case N_ABS: + break; + } + } /* end .. is static */ + if (R_PCREL(rel)) datum -= block; + +#if defined(sun) && defined(sparc) + datum += rel->r_addend; + datum >>= R_RIGHTSHIFT(rel); + mask = (1 << R_BITSIZE(rel)) - 1; + mask |= mask -1; + datum &= mask; + + switch (R_LENGTH(rel)) { + case 0: + *address &= ~mask; + *address |= datum; + break; + case 1: + *(short *)address &= ~mask; + *(short *)address |= datum; + break; + case 2: + *(long *)address &= ~mask; + *(long *)address |= datum; + break; + } +#else + switch (R_LENGTH(rel)) { + case 0: /* byte */ + if (datum < -128 || datum > 127) goto err_exit; + *address += datum; + break; + case 1: /* word */ + *(short *)address += datum; + break; + case 2: /* long */ + *(long *)address += datum; + break; + } +#endif + rel++; + } + } + + if (need_init) { + int len; + char **libs_to_be_linked = 0; + char *buf; + + if (undef_tbl->num_entries > 0) { + if (load_lib(libc) == -1) goto err_exit; + } + + init_funcname(&buf, need_init); + len = strlen(buf); + + for (sym = syms; symn_un.n_name; + if (name[0] == '_' && sym->n_value >= block) { + if (strcmp(name+1, "dln_libs_to_be_linked") == 0) { + libs_to_be_linked = (char**)sym->n_value; + } + else if (strcmp(name+1, buf) == 0) { + init_p = 1; + ((int (*)())sym->n_value)(); + } + } + } + if (libs_to_be_linked && undef_tbl->num_entries > 0) { + while (*libs_to_be_linked) { + load_lib(*libs_to_be_linked); + libs_to_be_linked++; + } + } + } + free(reloc); + free(syms); + if (need_init) { + if (init_p == 0) { + dln_errno = DLN_ENOINIT; + return -1; + } + if (undef_tbl->num_entries > 0) { + if (load_lib(libc) == -1) goto err_exit; + if (undef_tbl->num_entries > 0) { + dln_errno = DLN_EUNDEF; + return -1; + } + } + } + return 0; + + err_exit: + if (syms) free(syms); + if (reloc) free(reloc); + if (block) free((char*)block); + return -1; +} + +static int target_offset; +static int +search_undef(const char *key, int value, st_table *lib_tbl) +{ + long offset; + + if (st_lookup(lib_tbl, key, &offset) == 0) return ST_CONTINUE; + target_offset = offset; + return ST_STOP; +} + +struct symdef { + int rb_str_index; + int lib_offset; +}; + +char *dln_librrb_ary_path = DLN_DEFAULT_LIB_PATH; + +static int +load_lib(const char *lib) +{ + char *path, *file, fbuf[MAXPATHLEN]; + char armagic[SARMAG]; + int fd, size; + struct ar_hdr ahdr; + st_table *lib_tbl = NULL; + int *data, nsym; + struct symdef *base; + char *name_base; + + if (dln_init_p == 0) { + dln_errno = DLN_ENOINIT; + return -1; + } + + if (undef_tbl->num_entries == 0) return 0; + dln_errno = DLN_EBADLIB; + + if (lib[0] == '-' && lib[1] == 'l') { + long len = strlen(lib) + 4; + char *p = alloca(len); + snprintf(p, len, "lib%s.a", lib+2); + lib = p; + } + + /* library search path: */ + /* look for environment variable DLN_LIBRARY_PATH first. */ + /* then variable dln_librrb_ary_path. */ + /* if path is still NULL, use "." for path. */ + path = getenv("DLN_LIBRARY_PATH"); + if (path == NULL) path = dln_librrb_ary_path; + + file = dln_find_file_r(lib, path, fbuf, sizeof(fbuf)); + fd = open(file, O_RDONLY); + if (fd == -1) goto syserr; + size = read(fd, armagic, SARMAG); + if (size == -1) goto syserr; + + if (size != SARMAG) { + dln_errno = DLN_ENOTLIB; + goto badlib; + } + size = read(fd, &ahdr, sizeof(ahdr)); + if (size == -1) goto syserr; + if (size != sizeof(ahdr) || sscanf(ahdr.ar_size, "%d", &size) != 1) { + goto badlib; + } + + if (strncmp(ahdr.ar_name, "__.SYMDEF", 9) == 0) { + /* make hash table from __.SYMDEF */ + + lib_tbl = st_init_strtable(); + data = (int*)xmalloc(size); + if (data == NULL) goto syserr; + size = read(fd, data, size); + nsym = *data / sizeof(struct symdef); + base = (struct symdef*)(data + 1); + name_base = (char*)(base + nsym) + sizeof(int); + while (nsym > 0) { + char *name = name_base + base->rb_str_index; + + st_insert(lib_tbl, name, base->lib_offset + sizeof(ahdr)); + nsym--; + base++; + } + for (;;) { + target_offset = -1; + st_foreach(undef_tbl, search_undef, lib_tbl); + if (target_offset == -1) break; + if (load_1(fd, target_offset, 0) == -1) { + st_free_table(lib_tbl); + free(data); + goto badlib; + } + if (undef_tbl->num_entries == 0) break; + } + free(data); + st_free_table(lib_tbl); + } + else { + /* linear library, need to scan (FUTURE) */ + + for (;;) { + int offset = SARMAG; + int found = 0; + struct exec hdr; + struct nlist *syms, *sym, *end; + + while (undef_tbl->num_entries > 0) { + found = 0; + lseek(fd, offset, 0); + size = read(fd, &ahdr, sizeof(ahdr)); + if (size == -1) goto syserr; + if (size == 0) break; + if (size != sizeof(ahdr) + || sscanf(ahdr.ar_size, "%d", &size) != 1) { + goto badlib; + } + offset += sizeof(ahdr); + if (load_header(fd, &hdr, offset) == -1) + goto badlib; + syms = load_sym(fd, &hdr, offset); + if (syms == NULL) goto badlib; + sym = syms; + end = syms + (hdr.a_syms / sizeof(struct nlist)); + while (sym < end) { + if (sym->n_type == N_EXT|N_TEXT + && st_lookup(undef_tbl, sym->n_un.n_name, NULL)) { + break; + } + sym++; + } + if (sym < end) { + found++; + free(syms); + if (load_1(fd, offset, 0) == -1) { + goto badlib; + } + } + offset += size; + if (offset & 1) offset++; + } + if (found) break; + } + } + close(fd); + return 0; + + syserr: + dln_errno = errno; + badlib: + if (fd >= 0) close(fd); + return -1; +} + +static int +load(const char *file) +{ + int fd; + int result; + + if (dln_init_p == 0) { + if (dln_init(dln_argv0) == -1) return -1; + } + result = strlen(file); + if (file[result-1] == 'a') { + return load_lib(file); + } + + fd = open(file, O_RDONLY); + if (fd == -1) { + dln_errno = errno; + return -1; + } + result = load_1(fd, 0, file); + close(fd); + + return result; +} + +void* +dln_sym(const char *name) +{ + struct nlist *sym; + + if (st_lookup(sym_tbl, name, &sym)) + return (void*)sym->n_value; + return NULL; +} + +#endif /* USE_DLN_A_OUT */ + +#ifdef USE_DLN_DLOPEN +# include +#endif + +#ifdef __hpux +#include +#include "dl.h" +#endif + +#if defined(_AIX) +#include /* for isdigit() */ +#include /* for global errno */ +#include +#endif + +#ifdef NeXT +#if NS_TARGET_MAJOR < 4 +#include +#else +#include +#ifndef NSLINKMODULE_OPTION_BINDNOW +#define NSLINKMODULE_OPTION_BINDNOW 1 +#endif +#endif +#else +#ifdef MACOSX_DYLD +#include +#endif +#endif + +#if defined _WIN32 && !defined __CYGWIN__ +#include +#endif + +#if ! defined _AIX +static const char * +dln_strerror(void) +{ +#ifdef USE_DLN_A_OUT + char *strerror(); + + switch (dln_errno) { + case DLN_ECONFL: + return "Symbol name conflict"; + case DLN_ENOINIT: + return "No initializer given"; + case DLN_EUNDEF: + return "Unresolved symbols"; + case DLN_ENOTLIB: + return "Not a library file"; + case DLN_EBADLIB: + return "Malformed library file"; + case DLN_EINIT: + return "Not initialized"; + default: + return strerror(dln_errno); + } +#endif + +#ifdef USE_DLN_DLOPEN + return (char*)dlerror(); +#endif + +#if defined _WIN32 && !defined __CYGWIN__ + static char message[1024]; + int error = GetLastError(); + char *p = message; + p += sprintf(message, "%d: ", error); + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + p, + sizeof message - strlen(message), + NULL); + + for (p = message; *p; p++) { + if (*p == '\n' || *p == '\r') + *p = ' '; + } + return message; +#endif +} +#endif + +#if defined(_AIX) && ! defined(_IA64) +static void +aix_loaderror(const char *pathname) +{ + char *message[1024], errbuf[1024]; + int i,j; + + static const struct errtab { + int errnum; + const char * errstr; + } load_errtab[] = { + {L_ERROR_TOOMANY, "too many errors, rest skipped."}, + {L_ERROR_NOLIB, "can't load library:"}, + {L_ERROR_UNDEF, "can't find symbol in library:"}, + {L_ERROR_RLDBAD, + "RLD index out of range or bad relocation type:"}, + {L_ERROR_FORMAT, "not a valid, executable xcoff file:"}, + {L_ERROR_MEMBER, + "file not an archive or does not contain requested member:"}, + {L_ERROR_TYPE, "symbol table mismatch:"}, + {L_ERROR_ALIGN, "text alignment in file is wrong."}, + {L_ERROR_SYSTEM, "System error:"}, + {L_ERROR_ERRNO, NULL} + }; + +#define LOAD_ERRTAB_LEN (sizeof(load_errtab)/sizeof(load_errtab[0])) +#define ERRBUF_APPEND(s) strncat(errbuf, s, sizeof(errbuf)-strlen(errbuf)-1) + + snprintf(errbuf, sizeof(errbuf), "load failed - %s ", pathname); + + message[0] = NULL; + if (!loadquery(L_GETMESSAGES, &message[0], sizeof(message))) + ERRBUF_APPEND(strerror(errno)); + for(i = 0; message[i] && *message[i]; i++) { + int nerr = atoi(message[i]); + for (j=0; j= MAXPATHLEN) rb_loaderror("filename too long"); + + /* Load the file as an object one */ + init_funcname(&buf, file); + + strcpy(winfile, file); + + /* Load file */ + if ((handle = LoadLibrary(winfile)) == NULL) { + error = dln_strerror(); + goto failed; + } + + if ((init_fct = (void(*)())GetProcAddress(handle, buf)) == NULL) { + rb_loaderror("%s - %s\n%s", dln_strerror(), buf, file); + } + + /* Call the init code */ + (*init_fct)(); + return handle; +#else +#ifdef USE_DLN_A_OUT + if (load(file) == -1) { + error = dln_strerror(); + goto failed; + } + return 0; +#else + + char *buf; + /* Load the file as an object one */ + init_funcname(&buf, file); + +#ifdef USE_DLN_DLOPEN +#define DLN_DEFINED + { + void *handle; + void (*init_fct)(); + +#ifndef RTLD_LAZY +# define RTLD_LAZY 1 +#endif +#ifdef __INTERIX +# undef RTLD_GLOBAL +#endif +#ifndef RTLD_GLOBAL +# define RTLD_GLOBAL 0 +#endif + + /* Load file */ + if ((handle = (void*)dlopen(file, RTLD_LAZY|RTLD_GLOBAL)) == NULL) { + error = dln_strerror(); + goto failed; + } + + init_fct = (void(*)())dlsym(handle, buf); + if (init_fct == NULL) { + error = DLN_ERROR(); + dlclose(handle); + goto failed; + } + /* Call the init code */ + (*init_fct)(); + + return handle; + } +#endif /* USE_DLN_DLOPEN */ + +#ifdef __hpux +#define DLN_DEFINED + { + shl_t lib = NULL; + int flags; + void (*init_fct)(); + + flags = BIND_DEFERRED; + lib = shl_load(file, flags, 0); + if (lib == NULL) { + extern int errno; + rb_loaderror("%s - %s", strerror(errno), file); + } + shl_findsym(&lib, buf, TYPE_PROCEDURE, (void*)&init_fct); + if (init_fct == NULL) { + shl_findsym(&lib, buf, TYPE_UNDEFINED, (void*)&init_fct); + if (init_fct == NULL) { + errno = ENOSYM; + rb_loaderror("%s - %s", strerror(ENOSYM), file); + } + } + (*init_fct)(); + return (void*)lib; + } +#endif /* hpux */ + +#if defined(_AIX) && ! defined(_IA64) +#define DLN_DEFINED + { + void (*init_fct)(); + + init_fct = (void(*)())load((char*)file, 1, 0); + if (init_fct == NULL) { + aix_loaderror(file); + } + if (loadbind(0, (void*)dln_load, (void*)init_fct) == -1) { + aix_loaderror(file); + } + (*init_fct)(); + return (void*)init_fct; + } +#endif /* _AIX */ + +#if defined(NeXT) || defined(MACOSX_DYLD) +#define DLN_DEFINED +/*---------------------------------------------------- + By SHIROYAMA Takayuki Psi@fortune.nest.or.jp + + Special Thanks... + Yu tomoak-i@is.aist-nara.ac.jp, + Mi hisho@tasihara.nest.or.jp, + sunshine@sunshineco.com, + and... Miss ARAI Akino(^^;) + ----------------------------------------------------*/ +#if defined(NeXT) && (NS_TARGET_MAJOR < 4)/* NeXTSTEP rld functions */ + + { + NXStream* s; + unsigned long init_address; + char *object_files[2] = {NULL, NULL}; + + void (*init_fct)(); + + object_files[0] = (char*)file; + + s = NXOpenFile(2,NX_WRITEONLY); + + /* Load object file, if return value ==0 , load failed*/ + if(rld_load(s, NULL, object_files, NULL) == 0) { + NXFlush(s); + NXClose(s); + rb_loaderror("Failed to load %.200s", file); + } + + /* lookup the initial function */ + if(rld_lookup(s, buf, &init_address) == 0) { + NXFlush(s); + NXClose(s); + rb_loaderror("Failed to lookup Init function %.200s", file); + } + + NXFlush(s); + NXClose(s); + + /* Cannot call *init_address directory, so copy this value to + funtion pointer */ + init_fct = (void(*)())init_address; + (*init_fct)(); + return (void*)init_address; + } +#else/* OPENSTEP dyld functions */ + { + int dyld_result; + NSObjectFileImage obj_file; /* handle, but not use it */ + /* "file" is module file name . + "buf" is pointer to initial function name with "_" . */ + + void (*init_fct)(); + + + dyld_result = NSCreateObjectFileImageFromFile(file, &obj_file); + + if (dyld_result != NSObjectFileImageSuccess) { + rb_loaderror("Failed to load %.200s", file); + } + + NSLinkModule(obj_file, file, NSLINKMODULE_OPTION_BINDNOW); + + /* lookup the initial function */ + if(!NSIsSymbolNameDefined(buf)) { + rb_loaderror("Failed to lookup Init function %.200s",file); + } + init_fct = NSAddressOfSymbol(NSLookupAndBindSymbol(buf)); + (*init_fct)(); + + return (void*)init_fct; + } +#endif /* rld or dyld */ +#endif + +#ifdef __BEOS__ +# define DLN_DEFINED + { + status_t err_stat; /* BeOS error status code */ + image_id img_id; /* extention module unique id */ + void (*init_fct)(); /* initialize function for extention module */ + + /* load extention module */ + img_id = load_add_on(file); + if (img_id <= 0) { + rb_loaderror("Failed to load add_on %.200s error_code=%x", + file, img_id); + } + + /* find symbol for module initialize function. */ + /* The Be Book KernelKit Images section described to use + B_SYMBOL_TYPE_TEXT for symbol of function, not + B_SYMBOL_TYPE_CODE. Why ? */ + /* strcat(init_fct_symname, "__Fv"); */ /* parameter nothing. */ + /* "__Fv" dont need! The Be Book Bug ? */ + err_stat = get_image_symbol(img_id, buf, + B_SYMBOL_TYPE_TEXT, (void **)&init_fct); + + if (err_stat != B_NO_ERROR) { + char real_name[MAXPATHLEN]; + + strlcpy(real_name, buf, MAXPATHLEN); + strlcat(real_name, "__Fv", MAXPATHLEN); + err_stat = get_image_symbol(img_id, real_name, + B_SYMBOL_TYPE_TEXT, (void **)&init_fct); + } + + if ((B_BAD_IMAGE_ID == err_stat) || (B_BAD_INDEX == err_stat)) { + unload_add_on(img_id); + rb_loaderror("Failed to lookup Init function %.200s", file); + } + else if (B_NO_ERROR != err_stat) { + char errmsg[] = "Internal of BeOS version. %.200s (symbol_name = %s)"; + unload_add_on(img_id); + rb_loaderror(errmsg, strerror(err_stat), buf); + } + + /* call module initialize function. */ + (*init_fct)(); + return (void*)img_id; + } +#endif /* __BEOS__*/ + +#ifndef DLN_DEFINED + rb_notimplement(); +#endif + +#endif /* USE_DLN_A_OUT */ +#endif +#if !defined(_AIX) && !defined(NeXT) + failed: + rb_loaderror("%s - %s", error, file); +#endif + +#endif /* NO_DLN_LOAD */ + return 0; /* dummy return */ +} + +static char *dln_find_1(const char *fname, const char *path, char *buf, int size, int exe_flag); + +char * +dln_find_exe_r(const char *fname, const char *path, char *buf, int size) +{ + if (!path) { + path = getenv(PATH_ENV); + } + + if (!path) { +#if defined(_WIN32) + path = "/usr/local/bin;/usr/ucb;/usr/bin;/bin;."; +#else + path = "/usr/local/bin:/usr/ucb:/usr/bin:/bin:."; +#endif + } + return dln_find_1(fname, path, buf, size, 1); +} + +char * +dln_find_file_r(const char *fname, const char *path, char *buf, int size) +{ + if (!path) path = "."; + return dln_find_1(fname, path, buf, size, 0); +} + +static char fbuf[MAXPATHLEN]; + +char * +dln_find_exe(const char *fname, const char *path) +{ + return dln_find_exe_r(fname, path, fbuf, sizeof(fbuf)); +} + +char * +dln_find_file(const char *fname, const char *path) +{ + return dln_find_file_r(fname, path, fbuf, sizeof(fbuf)); +} + +static char * +dln_find_1(const char *fname, const char *path, char *fbuf, int size, + int exe_flag /* non 0 if looking for executable. */) +{ + register const char *dp; + register const char *ep; + register char *bp; + struct stat st; + int i, fspace; +#ifdef DOSISH + static const char extension[][5] = { + ".exe", ".com", ".cmd", ".bat", + }; + size_t j; + int is_abs = 0, has_path = 0; + const char *ext = 0; + const char *p = fname; +#endif + +#define RETURN_IF(expr) if (expr) return (char *)fname; + + RETURN_IF(!fname); +#ifdef DOSISH +# ifndef CharNext +# define CharNext(p) ((p)+1) +# endif +# ifdef DOSISH_DRIVE_LETTER + if (((p[0] | 0x20) - 'a') < 26 && p[1] == ':') { + p += 2; + is_abs = 1; + } +# endif + switch (*p) { + case '/': case '\\': + is_abs = 1; + p++; + } + has_path = is_abs; + while (*p) { + switch (*p) { + case '/': case '\\': + has_path = 1; + ext = 0; + p++; + break; + case '.': + ext = p; + p++; + break; + default: + p = CharNext(p); + } + } + if (ext) { + for (j = 0; STRCASECMP(ext, extension[j]); ) { + if (++j == sizeof(extension) / sizeof(extension[0])) { + ext = 0; + break; + } + } + } + ep = bp = 0; + if (!exe_flag) { + RETURN_IF(is_abs); + } + else if (has_path) { + RETURN_IF(ext); + i = p - fname; + if (i + 1 > size) goto toolong; + fspace = size - i - 1; + bp = fbuf; + ep = p; + memcpy(fbuf, fname, i + 1); + goto needs_extension; + } +#endif + + RETURN_IF(fname[0] == '/'); + RETURN_IF(strncmp("./", fname, 2) == 0 || strncmp("../", fname, 3) == 0); + RETURN_IF(exe_flag && strchr(fname, '/')); + +#undef RETURN_IF + + for (dp = path;; dp = ++ep) { + register int l; + + /* extract a component */ + ep = strchr(dp, PATH_SEP[0]); + if (ep == NULL) + ep = dp+strlen(dp); + + /* find the length of that component */ + l = ep - dp; + bp = fbuf; + fspace = size - 2; + if (l > 0) { + /* + ** If the length of the component is zero length, + ** start from the current directory. If the + ** component begins with "~", start from the + ** user's $HOME environment variable. Otherwise + ** take the path literally. + */ + + if (*dp == '~' && (l == 1 || +#if defined(DOSISH) + dp[1] == '\\' || +#endif + dp[1] == '/')) { + char *home; + + home = getenv("HOME"); + if (home != NULL) { + i = strlen(home); + if ((fspace -= i) < 0) + goto toolong; + memcpy(bp, home, i); + bp += i; + } + dp++; + l--; + } + if (l > 0) { + if ((fspace -= l) < 0) + goto toolong; + memcpy(bp, dp, l); + bp += l; + } + + /* add a "/" between directory and filename */ + if (ep[-1] != '/') + *bp++ = '/'; + } + + /* now append the file name */ + i = strlen(fname); + if ((fspace -= i) < 0) { + toolong: + fprintf(stderr, "openpath: pathname too long (ignored)\n"); + *bp = '\0'; + fprintf(stderr, "\tDirectory \"%s\"\n", fbuf); + fprintf(stderr, "\tFile \"%s\"\n", fname); + goto next; + } + memcpy(bp, fname, i + 1); + +#if defined(DOSISH) + if (exe_flag && !ext) { + needs_extension: + for (j = 0; j < sizeof(extension) / sizeof(extension[0]); j++) { + if (fspace < strlen(extension[j])) { + fprintf(stderr, "openpath: pathname too long (ignored)\n"); + fprintf(stderr, "\tDirectory \"%.*s\"\n", (int) (bp - fbuf), fbuf); + fprintf(stderr, "\tFile \"%s%s\"\n", fname, extension[j]); + continue; + } + strcpy(bp + i, extension[j]); + if (stat(fbuf, &st) == 0) + return fbuf; + } + goto next; + } +#endif /* _WIN32 or __EMX__ */ + + if (stat(fbuf, &st) == 0) { + if (exe_flag == 0) return fbuf; + /* looking for executable */ + if (!S_ISDIR(st.st_mode) && eaccess(fbuf, X_OK) == 0) + return fbuf; + } + next: + /* if not, and no other alternatives, life is bleak */ + if (*ep == '\0') { + return NULL; + } + + /* otherwise try the next component in the search path */ + } +} diff --git a/dln.h b/dln.h new file mode 100644 index 0000000..1c51edd --- /dev/null +++ b/dln.h @@ -0,0 +1,41 @@ +/********************************************************************** + + dln.h - + + $Author: nobu $ + created at: Wed Jan 19 16:53:09 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef DLN_H +#define DLN_H + +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#undef _ +#ifdef HAVE_PROTOTYPES +# define _(args) args +#else +# define _(args) () +#endif + +DEPRECATED(char *dln_find_exe(const char*,const char*)); +DEPRECATED(char *dln_find_file(const char*,const char*)); +char *dln_find_exe_r(const char*,const char*,char*,int); +char *dln_find_file_r(const char*,const char*,char*,int); + +#ifdef USE_DLN_A_OUT +extern char *dln_argv0; +#endif + +void *dln_load(const char*); +#endif diff --git a/dmydln.c b/dmydln.c new file mode 100644 index 0000000..09ea068 --- /dev/null +++ b/dmydln.c @@ -0,0 +1,2 @@ +#define NO_DLN_LOAD 1 +#include "dln.c" diff --git a/dmyencoding.c b/dmyencoding.c new file mode 100644 index 0000000..1bd1106 --- /dev/null +++ b/dmyencoding.c @@ -0,0 +1,2 @@ +#define NO_LOCALE_CHARMAP 1 +#include "encoding.c" diff --git a/dmyext.c b/dmyext.c new file mode 100644 index 0000000..4d273f7 --- /dev/null +++ b/dmyext.c @@ -0,0 +1,4 @@ +void +Init_ext(void) +{ +} diff --git a/enc/Makefile.in b/enc/Makefile.in new file mode 100644 index 0000000..8ba04f3 --- /dev/null +++ b/enc/Makefile.in @@ -0,0 +1,63 @@ +encsrcdir = @srcdir@ +topdir = . +prefix = @prefix@ +exec_prefix = @exec_prefix@ +libdir = @libdir@ +top_srcdir = $(encsrcdir:/enc=) +srcdir = $(top_srcdir) +arch = @arch@ +EXTOUT = @EXTOUT@ +hdrdir = $(srcdir)/include +arch_hdrdir = $(EXTOUT)/include/$(arch) +ENCSODIR = $(EXTOUT)/$(arch)/enc +TRANSSODIR = $(ENCSODIR)/trans +DLEXT = @DLEXT@ +OBJEXT = @OBJEXT@ + +BUILTIN_ENCS = ascii.c us_ascii.c\ + unicode.c utf_8.c + +BUILTIN_TRANSES = newline.trans + +RUBY_SO_NAME = @RUBY_SO_NAME@ +LIBRUBY = @LIBRUBY@ +LIBRUBYARG_SHARED = @LIBRUBYARG_SHARED@ +LIBRUBYARG_STATIC = $(LIBRUBYARG_SHARED) + +empty = +CC = @CC@ +OUTFLAG = @OUTFLAG@$(empty) +COUTFLAG = @COUTFLAG@$(empty) +CFLAGS = $(CCDLFLAGS) @CFLAGS@ @ARCH_FLAG@ +cflags = @cflags@ +optflags = @optflags@ +debugflags = @debugflags@ +warnflags = @warnflags@ +CCDLFLAGS = @CCDLFLAGS@ +INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir) -I$(top_srcdir) +DEFS = @DEFS@ +CPPFLAGS = @CPPFLAGS@ -DONIG_ENC_REGISTER=rb_enc_register +LDFLAGS = @LDFLAGS@ +LDSHARED = @LDSHARED@ +ldflags = $(LDFLAGS) +dldflags = @DLDFLAGS@ +archflag = @ARCH_FLAG@ +DLDFLAGS = $(ldflags) $(dldflags) $(archflag) +RUBY = $(MINIRUBY) + +WORKDIRS = $(ENCSODIR) $(TRANSSODIR) enc enc/trans + +RM = @RM@ +MAKEDIRS = @MAKEDIRS@ + +.SUFFIXES: .trans + +all: make-workdir + +make-workdir: + $(MAKEDIRS) $(WORKDIRS) + +clean: + +distclean: clean clean-srcs + @$(RM) enc.mk diff --git a/enc/ascii.c b/enc/ascii.c new file mode 100644 index 0000000..3d62ec9 --- /dev/null +++ b/enc/ascii.c @@ -0,0 +1,93 @@ +/********************************************************************** + ascii.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +OnigEncodingDefine(ascii, ASCII) = { + onigenc_single_byte_mbc_enc_len, + "ASCII-8BIT",/* name */ + 1, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + onigenc_ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("BINARY", "ASCII-8BIT") +ENC_REPLICATE("IBM437", "ASCII-8BIT") +ENC_ALIAS("CP437", "IBM437") +ENC_REPLICATE("IBM737", "ASCII-8BIT") +ENC_ALIAS("CP737", "IBM737") +ENC_REPLICATE("IBM775", "ASCII-8BIT") +ENC_ALIAS("CP775", "IBM775") +ENC_REPLICATE("CP850", "ASCII-8BIT") +ENC_ALIAS("IBM850", "CP850") +ENC_REPLICATE("IBM852", "ASCII-8BIT") +ENC_REPLICATE("CP852", "IBM852") +ENC_REPLICATE("IBM855", "ASCII-8BIT") +ENC_REPLICATE("CP855", "IBM855") +ENC_REPLICATE("IBM857", "ASCII-8BIT") +ENC_ALIAS("CP857", "IBM857") +ENC_REPLICATE("IBM860", "ASCII-8BIT") +ENC_ALIAS("CP860", "IBM860") +ENC_REPLICATE("IBM861", "ASCII-8BIT") +ENC_ALIAS("CP861", "IBM861") +ENC_REPLICATE("IBM862", "ASCII-8BIT") +ENC_ALIAS("CP862", "IBM862") +ENC_REPLICATE("IBM863", "ASCII-8BIT") +ENC_ALIAS("CP863", "IBM863") +ENC_REPLICATE("IBM864", "ASCII-8BIT") +ENC_ALIAS("CP864", "IBM864") +ENC_REPLICATE("IBM865", "ASCII-8BIT") +ENC_ALIAS("CP865", "IBM865") +ENC_REPLICATE("IBM866", "ASCII-8BIT") +ENC_ALIAS("CP866", "IBM866") +ENC_REPLICATE("IBM869", "ASCII-8BIT") +ENC_ALIAS("CP869", "IBM869") +ENC_REPLICATE("Windows-1258", "ASCII-8BIT") +ENC_ALIAS("CP1258", "Windows-1258") +ENC_REPLICATE("GB1988", "ASCII-8BIT") +ENC_REPLICATE("macCentEuro", "ASCII-8BIT") +ENC_REPLICATE("macCroatian", "ASCII-8BIT") +ENC_REPLICATE("macCyrillic", "ASCII-8BIT") +ENC_REPLICATE("macGreek", "ASCII-8BIT") +ENC_REPLICATE("macIceland", "ASCII-8BIT") +ENC_REPLICATE("macRoman", "ASCII-8BIT") +ENC_REPLICATE("macRomania", "ASCII-8BIT") +ENC_REPLICATE("macThai", "ASCII-8BIT") +ENC_REPLICATE("macTurkish", "ASCII-8BIT") +ENC_REPLICATE("macUkraine", "ASCII-8BIT") diff --git a/enc/big5.c b/enc/big5.c new file mode 100644 index 0000000..9993f47 --- /dev/null +++ b/enc/big5.c @@ -0,0 +1,216 @@ +/********************************************************************** + big5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_BIG5[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + } +}; +#undef A +#undef F + +static int +big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_BIG5[firstbyte]-1); + s = trans[s][*p++]; + RETURN(2); +#undef RETURN +} + +static OnigCodePoint +big5_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_mbc_to_code(enc, p, end); +} + +static int +big5_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb2_code_to_mbc(enc, code, buf); +} + +static int +big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +big5_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +big5_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb2_is_code_ctype(enc, code, ctype); +} + +static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) +#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] + +static UChar* +big5_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (BIG5_ISMB_TRAIL(*p)) { + while (p > start) { + if (! BIG5_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + + return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingDefine(big5, BIG5) = { + big5_mbc_enc_len, + "Big5", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + big5_mbc_to_code, + onigenc_mb2_code_to_mbclen, + big5_code_to_mbc, + big5_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + big5_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + big5_left_adjust_char_head, + big5_is_allowed_reverse_match +}; +ENC_ALIAS("CP950", "BIG5") diff --git a/enc/cp949.c b/enc/cp949.c new file mode 100644 index 0000000..009443a --- /dev/null +++ b/enc/cp949.c @@ -0,0 +1,219 @@ +/********************************************************************** + cp949.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_CP949[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char CP949_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define CP949_ISMB_FIRST(byte) (EncLen_CP949[byte] > 1) +#define CP949_ISMB_TRAIL(byte) CP949_CAN_BE_TRAIL_TABLE[(byte)] + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ A, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, + /* 6 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, + /* 8 */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + } +}; +#undef A +#undef F + +static int +cp949_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_CP949[firstbyte]-1); + s = trans[s][*p++]; + RETURN(2); +#undef RETURN +} + +static OnigCodePoint +cp949_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_mbc_to_code(enc, p, end); +} + +static int +cp949_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb2_code_to_mbc(enc, code, buf); +} + +static int +cp949_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +cp949_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +cp949_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb2_is_code_ctype(enc, code, ctype); +} + +static UChar* +cp949_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (CP949_ISMB_TRAIL(*p)) { + while (p > start) { + if (! CP949_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +cp949_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + return (CP949_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingDefine(cp949, CP949) = { + cp949_mbc_enc_len, + "CP949", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + cp949_mbc_to_code, + onigenc_mb2_code_to_mbclen, + cp949_code_to_mbc, + cp949_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + cp949_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + cp949_left_adjust_char_head, + cp949_is_allowed_reverse_match +}; +/* + * Name: CP949 + * Link: http://www.microsoft.com/globaldev/reference/dbcs/949.mspx + * Link: http://en.wikipedia.org/wiki/EUC-KR#EUC-KR + */ diff --git a/enc/depend b/enc/depend new file mode 100644 index 0000000..91a3265 --- /dev/null +++ b/enc/depend @@ -0,0 +1,142 @@ +% CONFIG["DLDFLAGS"].sub!(/(\A|\s)(-\S+(?:\s*\w*)?\$\(TARGET\)\S*)/, '\1') +% dldflags = $2 +% enable_shared = CONFIG['ENABLE_SHARED'] == 'yes' +% deffile = (true if /\$\(DEFFILE\)/ =~ CONFIG["LINK_SO"]) +% encs = Dir.open($srcdir) {|d| d.grep(/.+\.c\z/)} - BUILTIN_ENCS +% encs.each {|e| e.chomp!(".c")} +% encs.reject! {|e| !ENC_PATTERNS.any? {|p| File.fnmatch?(p, e)}} if !ENC_PATTERNS.empty? +% encs.reject! {|e| NOENC_PATTERNS.any? {|p| File.fnmatch?(p, e)}} +% alphanumeric_order = proc {|e| e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten} +% encs = encs.sort_by(&alphanumeric_order) +% encs.unshift(encs.delete("encdb")) +% atrans = [] +% trans = Dir.open($srcdir+"/trans") {|d| +% d.select {|e| +% if e.chomp!('.trans') +% atrans << e +% true +% elsif e.chomp!('.c') +% true +% end +% } +% } +% trans -= BUILTIN_TRANSES +% atrans -= BUILTIN_TRANSES +% trans.uniq! +% atrans = atrans.sort_by(&alphanumeric_order) +% trans = trans.sort_by(&alphanumeric_order) +% trans.unshift(trans.delete("transdb")) +% trans.compact! +% trans |= atrans +% trans.map! {|e| "trans/#{e}"} +% dependencies = encs + trans +% cleanlibs = Shellwords.shellwords(CONFIG["cleanlibs"] || "") +% cleanobjs = Shellwords.shellwords(CONFIG["cleanobjs"] || "") +% rule_subst = CONFIG["RULE_SUBST"] || "%s" +% transvpath = rule_subst.dup.sub!(/\{[^{}]+\}/, '$(TRANSVPATH)/') || "enc/trans/%s" +% transvpath_prefix = (rule_subst.dup.sub!(/\{[^{}]+\}/, '{$(TRANSVPATH)}') || "") % "" +% if File::ALT_SEPARATOR +% pathrep = proc {|path| path.gsub('/', File::ALT_SEPARATOR).gsub(/\$\(([@ +LIBPATH = <%=libpathflag($DEFLIBPATH)%> +LIBS = <%= +if enable_shared or RbConfig.expand(CONFIG["LIBRUBY"].dup) != RbConfig.expand(CONFIG["LIBRUBY_A"].dup) + CONFIG['LIBRUBYARG'] +else + '' +end %> <%=CONFIG['LIBS']%> $(EXTLIBS) + +ENCOBJS = <%=encs.map {|e|"enc/#{e}.$(OBJEXT)"}.join(" \\\n\t ")%><%="\n" if encs.size>1%> +ENCSOS = <%=encs.map {|e|"$(ENCSODIR)/#{e}.$(DLEXT)"}.join(" \\\n\t ")%><%="\n" if encs.size>1%> +ENCCLEANLIBS = <%=cleanlibs.map {|clean| + clean.gsub(/\$\*(\.\w+)?/) {"$(ENCOBJS#{$1 ? ":.#{CONFIG["OBJEXT"]}=#{$1}" : ""})"} +}.join(" ")%> +ENCCLEANOBJS = <%=cleanobjs.map {|clean| + clean.gsub(/\$\*(\.\w+)?/) {"$(ENCOBJS#{$1 ? ":.#{CONFIG["OBJEXT"]}=#{$1}" : ""})"} +}.join(" ")%> + +TRANSVPATH = $(srcdir)/enc/trans + +TRANSCSRCS = <%=atrans.map {|e| transvpath % "#{e}.c"}.join(" \\\n\t ")%><%="\n" if trans.size>1%> +TRANSOBJS = <%=trans.map {|e|"enc/#{e}.$(OBJEXT)"}.join(" \\\n\t ")%><%="\n" if trans.size>1%> +TRANSSOS = <%=trans.map {|e|"$(ENCSODIR)/#{e}.$(DLEXT)"}.join(" \\\n\t ")%><%="\n" if trans.size>1%> +TRANSCLEANLIBS = <%=cleanlibs.map {|clean| + clean.gsub(/\$\*(\.\w+)?/) {"$(TRANSOBJS#{$1 ? ":.#{CONFIG["OBJEXT"]}=#{$1}" : ""})"} +}.join(" ")%> +TRANSCLEANOBJS = <%=cleanobjs.map {|clean| + clean.gsub(/\$\*(\.\w+)?/) {"$(TRANSOBJS#{$1 ? ":.#{CONFIG["OBJEXT"]}=#{$1}" : ""})"} +}.join(" ")%> + +encs: all +all: enc trans +enc: $(ENCSOS) +trans: $(TRANSSOS) + +srcs: $(TRANSCSRCS) + +<%=transvpath_prefix%>.trans<%=transvpath_prefix%>.c: + $(MINIRUBY) "$(srcdir)/tool/transcode-tblgen.rb" -vo "$@" "$<" + +% unless encs.empty? or trans.empty? + +% unless encs.empty? +$(ENCOBJS): regenc.h oniguruma.h config.h defines.h +% end +% unless trans.empty? +$(TRANSOBJS): ruby.h intern.h config.h defines.h missing.h encoding.h oniguruma.h st.h transcode_data.h +% end +% atrans.each do |e| +% src = "#{e}.trans" + +<%=transvpath % "#{e}.c"%>: <%= transvpath % "#{e}.trans"%> +% src = [*IO.read(File.join($srcdir, "trans", src)).scan(/^\s*require\s+[\'\"]([^\'\"]*)/).flatten.map{|c|c+".rb"}] +<%=transvpath % "#{e}.c"%>: <%= src.map {|e| transvpath % "#{e}"}.join(" ")%> $(srcdir)/tool/transcode-tblgen.rb +% end + +% end +% link_so = LINK_SO.gsub(/\n/, "\n\t") +% link_so.gsub!(/(-(?:implib|pdb):\S+)-\$\(arch\)\./, '\1.') +% dependencies.each do |e| +% obj = "enc/#{e}.$(OBJEXT)" +% df = ("enc/#{e}.def" if deffile) +$(ENCSODIR)/<%=e%>.$(DLEXT): <%=obj%> +% cmd = link_so.sub(/\$\(OBJS\)/) {obj} +% base = File.basename(e) +% cmd.sub!(/(?=\$\(DLDFLAGS\))/) {dldflags.sub(/\$\(TARGET\)/) {base} + " "} if dldflags +% if df + echo> <%=df%> EXPORTS + echo>> <%=df%> <%=EXPORT_PREFIX%>Init_<%=base%> +% cmd.sub!(/\$\(DEFFILE\)/) {df} +% cmd.gsub!(/-(?:implib|pdb):/) {|s|"#{s}enc/#{e.sub(/[^\/]+\z/, '')}"} +% end + @$(MAKEDIRS) "$(@D)" + <%=cmd%> + +% end +% dependencies.each do |e| +<%="enc/#{e}.$(OBJEXT)"%>: <%="$(encsrcdir)/#{e}.c"%> + -@$(MAKEDIRS) "$(@D)" + <%=COMPILE_C.gsub(/\$(\()?<(\:[^)]+)?(\))?/){"$(encsrcdir)/#{e}.c"}%> + +% end + +enc/encdb.$(OBJEXT): encdb.h +enc/trans/transdb.$(OBJEXT): transdb.h + +clean: +% %w[$(ENCSOS) $(ENCOBJS) $(ENCCLEANOBJS) $(ENCCLEANLIBS) $(TRANSSOS) $(TRANSOBJS) $(TRANSCLEANOBJS) $(TRANSCLEANLIBS)].each do |clean| + @$(RM) <%=pathrep[clean]%> +% end +% %w[$(TRANSSODIR) $(ENCSODIR)].each do|dir| + @-rmdir <%=pathrep[dir]%> +% end + +clean-srcs: + @$(RM) <%=pathrep['$(TRANSCSRCS)']%> +% %w[enc/trans enc].each do|dir| + @-rmdir <%=pathrep[dir]%> +% end diff --git a/enc/emacs_mule.c b/enc/emacs_mule.c new file mode 100644 index 0000000..be7f842 --- /dev/null +++ b/enc/emacs_mule.c @@ -0,0 +1,340 @@ +/********************************************************************** + emacs_mule.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + + +#define emacsmule_islead(c) ((UChar )((c) - 0x81) > 0x9d - 0x81) + +/* + CHARACTER := ASCII_CHAR | MULTIBYTE_CHAR + MULTIBYTE_CHAR := PRIMARY_CHAR_1 | PRIMARY_CHAR_2 + | SECONDARY_CHAR_1 | SECONDARY_CHAR_2 + PRIMARY_CHAR_1 := LEADING_CODE_PRI C1 + PRIMARY_CHAR_2 := LEADING_CODE_PRI C1 C2 + SECONDARY_CHAR_1 := LEADING_CODE_SEC LEADING_CODE_EXT C1 + SECONDARY_CHAR_2 := LEADING_CODE_SEC LEADING_CODE_EXT C1 C2 + ASCII_CHAR := 0 | 1 | ... | 127 + LEADING_CODE_PRI := 129 | 130 | ... | 153 + LEADING_CODE_SEC := 154 | 155 | 156 | 157 + C1, C2, LEADING_CODE_EXT := 160 | 161 | ... | 255 + */ + +static const int EncLen_EmacsMule[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3, S4, S5, S6 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 5, 6, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + }, + { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ 2, 2, 2, 2, 2, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F + }, + +}; +#undef A +#undef F + +static int +mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s; + s = trans[0][firstbyte]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EmacsMule[firstbyte]-1); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EmacsMule[firstbyte]-2); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EmacsMule[firstbyte]-3); + s = trans[s][*p++]; + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + n = (OnigCodePoint )*p++; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if (ONIGENC_IS_CODE_ASCII(code)) return 1; + else if (code > 0xffffffff) return 0; + else if ((code & 0xff000000) >= 0x80000000) return 4; + else if ((code & 0xff0000) >= 0x800000) return 3; + else if ((code & 0xff00) >= 0x8000) return 2; + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) *p++ = (UChar )(((code >> 24) & 0xff)); + if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* lower, + OnigEncoding enc) +{ + int len; + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = mbc_enc_len(p, end, enc); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!emacsmule_islead(*p) && p > start) p--; + return (UChar* )p; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return (code_to_mbclen(code, enc) > 1 ? TRUE : FALSE); +} + +/* + * Name: Emacs-Mule + * Link: http://www.m17n.org/mule/pricai96/mule.en.html + */ +OnigEncodingDefine(emacs_mule, Emacs_Mule) = { + mbc_enc_len, + "Emacs-Mule", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match, + 0 +}; + +ENC_REPLICATE("stateless-ISO-2022-JP", "Emacs-Mule") diff --git a/enc/encdb.c b/enc/encdb.c new file mode 100644 index 0000000..cfdbdbb --- /dev/null +++ b/enc/encdb.c @@ -0,0 +1,25 @@ +/********************************************************************** + + enc/encdb.c - + + $Author: nobu $ + created at: Mon Apr 7 15:51:31 2008 + + Copyright (C) 2008 Yukihiro Matsumoto + +**********************************************************************/ + +int rb_encdb_replicate(const char *alias, const char *orig); +int rb_encdb_alias(const char *alias, const char *orig); +int rb_encdb_dummy(const char *name); +void rb_encdb_declare(const char *name); +#define ENC_REPLICATE(name, orig) rb_encdb_replicate(name, orig) +#define ENC_ALIAS(name, orig) rb_encdb_alias(name, orig) +#define ENC_DUMMY(name) rb_encdb_dummy(name) +#define ENC_DEFINE(name) rb_encdb_declare(name) + +void +Init_encdb(void) +{ +#include "encdb.h" +} diff --git a/enc/euc_jp.c b/enc/euc_jp.c new file mode 100644 index 0000000..21f30ad --- /dev/null +++ b/enc/euc_jp.c @@ -0,0 +1,388 @@ +/********************************************************************** + euc_jp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + + +#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static const int EncLen_EUCJP[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 1, 2, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + +}; +#undef A +#undef F + +static int +mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s; + s = trans[0][firstbyte]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-1); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-2); + s = trans[s][*p++]; + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + n = (OnigCodePoint )*p++; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if (ONIGENC_IS_CODE_ASCII(code)) return 1; + else if (code > 0xffffff) return 0; + else if ((code & 0xff0000) >= 0x800000) return 3; + else if ((code & 0xff00) >= 0x8000) return 2; + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +#if 0 +static int +code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} +#endif + +static int +code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + UChar *p = buf; + + if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* lower, + OnigEncoding enc) +{ + int len; + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = enclen(enc, p, end); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + /* In this encoding + mb-trail bytes doesn't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!eucjp_islead(*p) && p > start) p--; + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e || c == 0x8e || c == 0x8f) + return TRUE; + else + return FALSE; +} + + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0xa4a1, 0xa4f3 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 3, + 0xa5a1, 0xa5f6, + 0xaaa6, 0xaaaf, + 0xaab1, 0xaadd +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + st_data_t ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code, enc) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + + +OnigEncodingDefine(euc_jp, EUC_JP) = { + mbc_enc_len, + "EUC-JP", /* name */ + 3, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match, + 0 +}; +/* + * Name: EUC-JP + * MIBenum: 18 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://home.m05.itscom.net/numa/cde/sjis-euc/sjis-euc.html + */ +ENC_ALIAS("eucJP", "EUC-JP") /* UI-OSF Application Platform Profile for Japanese Environment Version 1.1 */ + +/* + * Name: eucJP-ms + * Link: http://home.m05.itscom.net/numa/cde/ucs-conv/ucs-conv.html + * Link: http://www2d.biglobe.ne.jp/~msyk/charcode/cp932/eucJP-ms.html + * Link: http://ja.wikipedia.org/wiki/EUC-JP + */ +ENC_REPLICATE("eucJP-ms", "EUC-JP") /* TOG/JVC CDE/Motif Technical WG */ +ENC_ALIAS("euc-jp-ms", "eucJP-ms") + +/* + * Name: CP51932 + * Link: http://search.cpan.org/src/NARUSE/Encode-EUCJPMS-0.07/ucm/cp51932.ucm + * Link: http://legacy-encoding.sourceforge.jp/wiki/index.php?cp51932 + * Link: http://msyk.at.webry.info/200511/article_2.html + */ +ENC_REPLICATE("CP51932", "EUC-JP") diff --git a/enc/euc_kr.c b/enc/euc_kr.c new file mode 100644 index 0000000..f20a57e --- /dev/null +++ b/enc/euc_kr.c @@ -0,0 +1,192 @@ +/********************************************************************** + euc_kr.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCKR[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + } +}; +#undef A +#undef F + +static int +euckr_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCKR[firstbyte]-1); + s = trans[s][*p++]; + RETURN(2); +#undef RETURN +} + +static OnigCodePoint +euckr_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_mbc_to_code(enc, p, end); +} + +static int +euckr_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb2_code_to_mbc(enc, code, buf); +} + +static int +euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +euckr_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb2_is_code_ctype(enc, code, ctype); +} + +#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) + +static UChar* +euckr_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euckr_islead(*p) && p > start) p--; + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingDefine(euc_kr, EUC_KR) = { + euckr_mbc_enc_len, + "EUC-KR", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; +ENC_ALIAS("eucKR", "EUC-KR") diff --git a/enc/euc_tw.c b/enc/euc_tw.c new file mode 100644 index 0000000..d025a0d --- /dev/null +++ b/enc/euc_tw.c @@ -0,0 +1,234 @@ +/********************************************************************** + euc_tw.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCTW[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 2, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + } +}; +#undef A +#undef F + +static int +euctw_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCTW[firstbyte]-1); + s = trans[s][*p++]; + if (s < 0) RETURN(2); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2); + s = trans[s][*p++]; + if (s < 0) RETURN(3); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3); + s = trans[s][*p++]; + RETURN(4); +#undef RETURN +} + +static OnigCodePoint +euctw_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + return onigenc_mbn_mbc_to_code(enc, p, end); +} + +static int +euctw_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb4_code_to_mbc(enc, code, buf); +} + +static int +euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +euctw_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb4_is_code_ctype(enc, code, ctype); +} + +#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static UChar* +euctw_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euctw_islead(*p) && p > start) p--; + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingDefine(euc_tw, EUC_TW) = { + euctw_mbc_enc_len, + "EUC-TW", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euctw_mbc_to_code, + onigenc_mb4_code_to_mbclen, + euctw_code_to_mbc, + euctw_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euctw_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euctw_left_adjust_char_head, + euctw_is_allowed_reverse_match +}; +ENC_ALIAS("eucTW", "EUC-TW") diff --git a/enc/gb18030.c b/enc/gb18030.c new file mode 100644 index 0000000..16c8c1c --- /dev/null +++ b/enc/gb18030.c @@ -0,0 +1,596 @@ +/********************************************************************** + gb18030.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2005-2007 KUBO Takehiro + * K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#if 1 +#define DEBUG_GB18030(arg) +#else +#define DEBUG_GB18030(arg) printf arg +#endif + +enum { + C1, /* one-byte char */ + C2, /* one-byte or second of two-byte char */ + C4, /* one-byte or second or fourth of four-byte char */ + CM /* first of two- or four-byte char or second of two-byte char */ +}; + +static const char GB18030_MAP[] = { + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1, + C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 +}; + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, + /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* c */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* d */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* e */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F + }, + { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + } +}; +#undef A +#undef F + +static int +gb18030_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2-1); + s = trans[s][*p++]; + if (s < 0) RETURN(2); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2); + s = trans[s][*p++]; + if (s < 0) RETURN(3); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3); + s = trans[s][*p++]; + RETURN(4); +#undef RETURN +} + +static OnigCodePoint +gb18030_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + n = (OnigCodePoint )(*p++); + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +gb18030_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb4_code_to_mbc(enc, code, buf); +} + +static int +gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb4_is_code_ctype(enc, code, ctype); +} + +enum state { + S_START, + S_one_C2, + S_one_C4, + S_one_CM, + + S_odd_CM_one_CX, + S_even_CM_one_CX, + + /* CMC4 : pair of "CM C4" */ + S_one_CMC4, + S_odd_CMC4, + S_one_C4_odd_CMC4, + S_even_CMC4, + S_one_C4_even_CMC4, + + S_odd_CM_odd_CMC4, + S_even_CM_odd_CMC4, + + S_odd_CM_even_CMC4, + S_even_CM_even_CMC4, + + /* C4CM : pair of "C4 CM" */ + S_odd_C4CM, + S_one_CM_odd_C4CM, + S_even_C4CM, + S_one_CM_even_C4CM, + + S_even_CM_odd_C4CM, + S_odd_CM_odd_C4CM, + S_even_CM_even_C4CM, + S_odd_CM_even_C4CM +}; + +static UChar* +gb18030_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + const UChar *p; + enum state state = S_START; + + DEBUG_GB18030(("----------------\n")); + for (p = s; p >= start; p--) { + DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p)); + switch (state) { + case S_START: + switch (GB18030_MAP[*p]) { + case C1: + return (UChar *)s; + case C2: + state = S_one_C2; /* C2 */ + break; + case C4: + state = S_one_C4; /* C4 */ + break; + case CM: + state = S_one_CM; /* CM */ + break; + } + break; + case S_one_C2: /* C2 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; /* CM C2 */ + break; + } + break; + case S_one_C4: /* C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CMC4; + break; + } + break; + case S_one_CM: /* CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)s; + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_odd_CM_one_CX; /* CM CM */ + break; + } + break; + + case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_one_CX; + break; + } + break; + case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; + break; + } + break; + + case S_one_CMC4: /* CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; /* C4 CM C4 */ + break; + case CM: + state = S_even_CM_one_CX; /* CM CM C4 */ + break; + } + break; + case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; + break; + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + case S_one_C4_odd_CMC4: /* C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CMC4; /* CM C4 CM C4 */ + break; + } + break; + case S_even_CMC4: /* CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 3); + case C4: + state = S_one_C4_even_CMC4; + break; + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CMC4; + break; + } + break; + + case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_even_CM_odd_CMC4; + break; + } + break; + case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + + case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_even_CMC4; + break; + } + break; + case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + + case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CM_odd_C4CM; /* CM C4 CM */ + break; + } + break; + case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 2); /* |CM C4 CM */ + case C4: + state = S_even_C4CM; + break; + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + case S_even_C4CM: /* C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* C4|CM C4 CM */ + case CM: + state = S_one_CM_even_C4CM; + break; + } + break; + case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + + case S_even_CM_odd_C4CM: /* CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|C4|CM */ + case CM: + state = S_odd_CM_odd_C4CM; + break; + } + break; + case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + + case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ + case CM: + state = S_odd_CM_even_C4CM; + break; + } + break; + case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + } + } + + DEBUG_GB18030(("state %d\n", state)); + switch (state) { + case S_START: return (UChar *)(s - 0); + case S_one_C2: return (UChar *)(s - 0); + case S_one_C4: return (UChar *)(s - 0); + case S_one_CM: return (UChar *)(s - 0); + + case S_odd_CM_one_CX: return (UChar *)(s - 1); + case S_even_CM_one_CX: return (UChar *)(s - 0); + + case S_one_CMC4: return (UChar *)(s - 1); + case S_odd_CMC4: return (UChar *)(s - 1); + case S_one_C4_odd_CMC4: return (UChar *)(s - 1); + case S_even_CMC4: return (UChar *)(s - 3); + case S_one_C4_even_CMC4: return (UChar *)(s - 3); + + case S_odd_CM_odd_CMC4: return (UChar *)(s - 3); + case S_even_CM_odd_CMC4: return (UChar *)(s - 1); + + case S_odd_CM_even_CMC4: return (UChar *)(s - 1); + case S_even_CM_even_CMC4: return (UChar *)(s - 3); + + case S_odd_C4CM: return (UChar *)(s - 0); + case S_one_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_C4CM: return (UChar *)(s - 2); + case S_one_CM_even_C4CM: return (UChar *)(s - 0); + + case S_even_CM_odd_C4CM: return (UChar *)(s - 0); + case S_odd_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_CM_even_C4CM: return (UChar *)(s - 2); + case S_odd_CM_even_C4CM: return (UChar *)(s - 0); + } + + return (UChar* )s; /* never come here. (escape warning) */ +} + +static int +gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + return GB18030_MAP[*s] == C1 ? TRUE : FALSE; +} + +OnigEncodingDefine(gb18030, GB18030) = { + gb18030_mbc_enc_len, + "GB18030", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + gb18030_mbc_to_code, + onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbc, + gb18030_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + gb18030_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + gb18030_left_adjust_char_head, + gb18030_is_allowed_reverse_match +}; + diff --git a/enc/gb2312.c b/enc/gb2312.c new file mode 100644 index 0000000..6fc1573 --- /dev/null +++ b/enc/gb2312.c @@ -0,0 +1,13 @@ +#include +#include +#include "regenc.h" + +void +Init_gb2312(void) +{ + rb_enc_register("GB2312", rb_enc_find("EUC-KR")); +} + +ENC_ALIAS("EUC-CN", "GB2312") +ENC_ALIAS("eucCN", "GB2312") +ENC_REPLICATE("GB12345", "GB2312") diff --git a/enc/gbk.c b/enc/gbk.c new file mode 100644 index 0000000..3efb1bf --- /dev/null +++ b/enc/gbk.c @@ -0,0 +1,222 @@ +/********************************************************************** + gbk.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_GBK[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char GBK_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define GBK_ISMB_FIRST(byte) (EncLen_GBK[byte] > 1) +#define GBK_ISMB_TRAIL(byte) GBK_CAN_BE_TRAIL_TABLE[(byte)] + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ A, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, + /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + } +}; +#undef A +#undef F + +static int +gbk_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_GBK[firstbyte]-1); + s = trans[s][*p++]; + RETURN(2); +#undef RETURN +} + +static OnigCodePoint +gbk_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_mbc_to_code(enc, p, end); +} + +static int +gbk_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + return onigenc_mb2_code_to_mbc(enc, code, buf); +} + +static int +gbk_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower, OnigEncoding enc) +{ + return onigenc_mbn_mbc_case_fold(enc, flag, + pp, end, lower); +} + +#if 0 +static int +gbk_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, OnigEncoding enc) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); +} +#endif + +static int +gbk_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + return onigenc_mb2_is_code_ctype(enc, code, ctype); +} + +static UChar* +gbk_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (GBK_ISMB_TRAIL(*p)) { + while (p > start) { + if (! GBK_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +gbk_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + return (GBK_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingDefine(gbk, GBK) = { + gbk_mbc_enc_len, + "GBK", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + gbk_mbc_to_code, + onigenc_mb2_code_to_mbclen, + gbk_code_to_mbc, + gbk_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + gbk_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + gbk_left_adjust_char_head, + gbk_is_allowed_reverse_match +}; +/* + * Name: GBK + * MIBenum: 113 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.iana.org/assignments/charset-reg/GBK + * Link: http://www.microsoft.com/globaldev/reference/dbcs/936.mspx + */ +ENC_ALIAS("CP936", "GBK") diff --git a/enc/iso_2022_jp.h b/enc/iso_2022_jp.h new file mode 100644 index 0000000..8155bdc --- /dev/null +++ b/enc/iso_2022_jp.h @@ -0,0 +1,6 @@ +#include "regenc.h" +/* dummy for unsupported, statefull encoding */ +ENC_DUMMY("ISO-2022-JP"); +ENC_ALIAS("ISO2022-JP", "ISO-2022-JP"); +ENC_REPLICATE("ISO-2022-JP-2", "ISO-2022-JP"); +ENC_ALIAS("ISO2022-JP2", "ISO-2022-JP-2"); diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c new file mode 100644 index 0000000..b73f8ca --- /dev/null +++ b/enc/iso_8859_1.c @@ -0,0 +1,285 @@ +/********************************************************************** + iso8859_1.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ + ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0xc0 <= *p && *p <= 0xcf) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0xd0 <= *p && *p <= 0xdf) { + if (*p == 0xdf) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else if (*p != 0xd7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + } + else if (0xe0 <= *p && *p <= 0xef) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else if (0xf0 <= *p && *p <= 0xfe) { + if (*p != 0xf7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + } + + return 0; +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_1_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-1", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-1", "ISO-8859-1") + +/* + * Name: windows-1252 + * MIBenum: 2252 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1252.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1252 + */ +ENC_REPLICATE("Windows-1252", "ISO-8859-1") +ENC_ALIAS("CP1252", "Windows-1252") diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c new file mode 100644 index 0000000..1643278 --- /dev/null +++ b/enc/iso_8859_10.c @@ -0,0 +1,244 @@ +/********************************************************************** + iso8859_10.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ + ((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247', + '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_10_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_10_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa2, 0xb2 }, + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa8, 0xb8 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-10", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-10", "ISO-8859-10") diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c new file mode 100644 index 0000000..257d9d8 --- /dev/null +++ b/enc/iso_8859_11.c @@ -0,0 +1,111 @@ +/********************************************************************** + iso8859_11.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ + ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_11_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_11_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingDefine(iso_8859_11, ISO_8859_11) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-11", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-11", "ISO-8859-11") + +/* + * Name: TIS-620 + * MIBenum: 2259 + * Link: http://en.wikipedia.org/wiki/Thai_Industrial_Standard_620-2533 + */ +ENC_REPLICATE("TIS-620", "ISO-8859-11") + +/* + * Name: Windows-874 + * Link: http://www.microsoft.com/globaldev/reference/sbcs/874.mspx + */ +ENC_REPLICATE("Windows-874", "ISO-8859-11") +ENC_ALIAS("CP874", "Windows-874") diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c new file mode 100644 index 0000000..f29cb2e --- /dev/null +++ b/enc/iso_8859_13.c @@ -0,0 +1,243 @@ +/********************************************************************** + iso8859_13.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ + ((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_13_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xb5 are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_13_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-13", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-13", "ISO-8859-13") + +/* + * Name: windows-1257 + * MIBenum: 2257 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1257.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1257 + */ +ENC_REPLICATE("Windows-1257", "ISO-8859-13") +ENC_ALIAS("CP1257", "Windows-1257") diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c new file mode 100644 index 0000000..be83f93 --- /dev/null +++ b/enc/iso_8859_14.c @@ -0,0 +1,246 @@ +/********************************************************************** + iso8859_14.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ + ((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247', + '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377', + '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271', + '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_14_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2, + 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_14_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa4, 0xa5 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, + + { 0xb0, 0xb1 }, + { 0xb2, 0xb3 }, + { 0xb4, 0xb5 }, + { 0xb7, 0xb9 }, + { 0xbb, 0xbf }, + { 0xbd, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-14", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-14", "ISO-8859-14") diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c new file mode 100644 index 0000000..2ec41b6 --- /dev/null +++ b/enc/iso_8859_15.c @@ -0,0 +1,240 @@ +/********************************************************************** + iso8859_15.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ + ((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_15_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p == 0xaa || *p == 0xb5 || *p == 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_15_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa6, 0xa8 }, + + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-15", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-15", "ISO-8859-15") diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c new file mode 100644 index 0000000..3ddc1d9 --- /dev/null +++ b/enc/iso_8859_16.c @@ -0,0 +1,242 @@ +/********************************************************************** + iso8859_16.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ + ((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247', + '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277', + '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_16_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2, + 0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_16_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xaf, 0xbf }, + + { 0xb2, 0xb9 }, + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-16", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-16", "ISO-8859-16") diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c new file mode 100644 index 0000000..9783f1c --- /dev/null +++ b/enc/iso_8859_2.c @@ -0,0 +1,250 @@ +/********************************************************************** + iso8859_2.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ + ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_2_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_2_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-2", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-2", "ISO-8859-2") + +/* + * Name: windows-1250 + * MIBenum: 2250 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1250.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1250 + */ +ENC_REPLICATE("Windows-1250", "ISO-8859-2") +ENC_ALIAS("CP1250", "Windows-1250") diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c new file mode 100644 index 0000000..81c0c74 --- /dev/null +++ b/enc/iso_8859_3.c @@ -0,0 +1,240 @@ +/********************************************************************** + iso8859_3.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ + ((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_3_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2, + 0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_3_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-3", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-3", "ISO-8859-3") diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c new file mode 100644 index 0000000..ec71c4b --- /dev/null +++ b/enc/iso_8859_4.c @@ -0,0 +1,242 @@ +/********************************************************************** + iso8859_4.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ + ((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_4_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xa2) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_4_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-4", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-4", "ISO-8859-4") diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c new file mode 100644 index 0000000..1550763 --- /dev/null +++ b/enc/iso_8859_5.c @@ -0,0 +1,230 @@ +/********************************************************************** + iso8859_5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ + ((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_5_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_5_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xf1 }, + { 0xa2, 0xf2 }, + { 0xa3, 0xf3 }, + { 0xa4, 0xf4 }, + { 0xa5, 0xf5 }, + { 0xa6, 0xf6 }, + { 0xa7, 0xf7 }, + { 0xa8, 0xf8 }, + { 0xa9, 0xf9 }, + { 0xaa, 0xfa }, + { 0xab, 0xfb }, + { 0xac, 0xfc }, + { 0xae, 0xfe }, + { 0xaf, 0xff }, + + { 0xb0, 0xd0 }, + { 0xb1, 0xd1 }, + { 0xb2, 0xd2 }, + { 0xb3, 0xd3 }, + { 0xb4, 0xd4 }, + { 0xb5, 0xd5 }, + { 0xb6, 0xd6 }, + { 0xb7, 0xd7 }, + { 0xb8, 0xd8 }, + { 0xb9, 0xd9 }, + { 0xba, 0xda }, + { 0xbb, 0xdb }, + { 0xbc, 0xdc }, + { 0xbd, 0xdd }, + { 0xbe, 0xde }, + { 0xbf, 0xdf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-5", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-5", "ISO-8859-5") diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c new file mode 100644 index 0000000..7384061 --- /dev/null +++ b/enc/iso_8859_6.c @@ -0,0 +1,107 @@ +/********************************************************************** + iso8859_6.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ + ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_6_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_6_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingDefine(iso_8859_6, ISO_8859_6) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-6", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-6", "ISO-8859-6") + +/* + * Name: windows-1256 + * MIBenum: 2256 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1256.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1256 + */ +ENC_REPLICATE("Windows-1256", "ISO-8859-6") +ENC_ALIAS("CP1256", "Windows-1256") diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c new file mode 100644 index 0000000..2f91617 --- /dev/null +++ b/enc/iso_8859_7.c @@ -0,0 +1,237 @@ +/********************************************************************** + iso8859_7.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ + ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', + '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', + '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_7_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, + 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xc0 || *p == 0xe0) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_7_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb6, 0xdc }, + { 0xb8, 0xdd }, + { 0xb9, 0xde }, + { 0xba, 0xdf }, + { 0xbc, 0xfc }, + { 0xbe, 0xfd }, + { 0xbf, 0xfe }, + + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + + +OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-7", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-7", "ISO-8859-7") + +/* + * Name: windows-1253 + * MIBenum: 2253 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1253.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1253 + */ +ENC_REPLICATE("Windows-1253", "ISO-8859-7") +ENC_ALIAS("CP1253", "Windows-1253") diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c new file mode 100644 index 0000000..bd0754c --- /dev/null +++ b/enc/iso_8859_8.c @@ -0,0 +1,107 @@ +/********************************************************************** + iso8859_8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ + ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_8_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_8_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingDefine(iso_8859_8, ISO_8859_8) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-8", "ISO-8859-8") + +/* + * Name: windows-1255 + * MIBenum: 2255 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1255.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1255 + */ +ENC_REPLICATE("Windows-1255", "ISO-8859-8") +ENC_ALIAS("CP1255", "Windows-1255") diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c new file mode 100644 index 0000000..4d46e74 --- /dev/null +++ b/enc/iso_8859_9.c @@ -0,0 +1,243 @@ +/********************************************************************** + iso8859_9.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ + ((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_9_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, + OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_ISO_8859_9_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-9", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ISO8859-9", "ISO-8859-9") + +/* + * Name: windows-1254 + * MIBenum: 2254 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1254.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1254 + */ +ENC_REPLICATE("Windows-1254", "ISO-8859-9") +ENC_ALIAS("CP1254", "Windows-1254") diff --git a/enc/koi8_r.c b/enc/koi8_r.c new file mode 100644 index 0000000..fb6e10e --- /dev/null +++ b/enc/koi8_r.c @@ -0,0 +1,219 @@ +/********************************************************************** + koi8_r.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] +#define ENC_IS_KOI8_R_CTYPE(code,ctype) \ + ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncKOI8_R_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_R_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 +}; + +static int +koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype, + OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_KOI8_R_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa3, 0xb3 }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff } +}; + +static int +koi8_r_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, + void* arg, OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingDefine(koi8_r, KOI8_R) = { + onigenc_single_byte_mbc_enc_len, + "KOI8-R", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_r_mbc_case_fold, + koi8_r_apply_all_case_fold, + koi8_r_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + koi8_r_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("CP878", "KOI8-R") + diff --git a/enc/koi8_u.c b/enc/koi8_u.c new file mode 100644 index 0000000..9bd363d --- /dev/null +++ b/enc/koi8_u.c @@ -0,0 +1,221 @@ +/********************************************************************** + koi8_u.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_U_TO_LOWER_CASE(c) EncKOI8_U_ToLowerCaseTable[c] +#define ENC_IS_KOI8_U_CTYPE(code,ctype) \ + ((EncKOI8_U_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncKOI8_U_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\243', '\244', '\265', '\246', '\247', + '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_U_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 +}; + +static int +koi8_u_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + *lower = ENC_KOI8_U_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +koi8_u_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncKOI8_U_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +koi8_u_is_code_ctype(OnigCodePoint code, unsigned int ctype, + OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_KOI8_U_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa6, 0xb6 }, + { 0xa7, 0xb7 }, + { 0xad, 0xbd }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff } +}; + +static int +koi8_u_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, + void* arg, OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +koi8_u_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingDefine(koi8_u, KOI8_U) = { + onigenc_single_byte_mbc_enc_len, + "KOI8-U", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_u_mbc_case_fold, + koi8_u_apply_all_case_fold, + koi8_u_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + koi8_u_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/enc/make_encmake.rb b/enc/make_encmake.rb new file mode 100644 index 0000000..107a387 --- /dev/null +++ b/enc/make_encmake.rb @@ -0,0 +1,50 @@ +#! ./miniruby + +dir = File.expand_path("../..", __FILE__) +$:.unshift(dir) +$:.unshift(".") +if $".grep(/mkmf/).empty? + $" << "mkmf.rb" + load File.expand_path("lib/mkmf.rb", dir) +end +require 'erb' + +CONFIG["MAKEDIRS"] ||= '@$(MINIRUBY) -run -e mkdir -- -p' + +BUILTIN_ENCS = [] +BUILTIN_TRANSES = [] +ENC_PATTERNS = [] +NOENC_PATTERNS = [] + +until ARGV.empty? + case ARGV[0] + when /\A--builtin-encs=/ + BUILTIN_ENCS.concat $'.split.map {|e| File.basename(e, '.*') << '.c'} + ARGV.shift + when /\A--builtin-transes=/ + BUILTIN_TRANSES.concat $'.split.map {|e| File.basename(e, '.*') } + ARGV.shift + when /\A--encs=/ + ENC_PATTERNS.concat $'.split + ARGV.shift + when /\A--no-encs=/ + NOENC_PATTERNS.concat $'.split + ARGV.shift + else + break + end +end + +if File.exist?(depend = File.join($srcdir, "depend")) + erb = ERB.new(File.read(depend), nil, '%') + erb.filename = depend + tmp = erb.result(binding) + dep = "\n#### depend ####\n\n" << depend_rules(tmp).join +else + dep = "" +end +mkin = File.read(File.join($srcdir, "Makefile.in")) +mkin.gsub!(/@(#{CONFIG.keys.join('|')})@/) {CONFIG[$1]} +open(ARGV[0], 'wb') {|f| + f.puts mkin, dep +} diff --git a/enc/prelude.rb b/enc/prelude.rb new file mode 100644 index 0000000..8ce59f9 --- /dev/null +++ b/enc/prelude.rb @@ -0,0 +1,6 @@ +%w'enc/encdb.so enc/trans/transdb.so'.each do |init| + begin + require(init) + rescue LoadError + end +end diff --git a/enc/shift_jis.c b/enc/shift_jis.c new file mode 100644 index 0000000..b4d8592 --- /dev/null +++ b/enc/shift_jis.c @@ -0,0 +1,399 @@ +/********************************************************************** + sjis.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +static const int EncLen_SJIS[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 +}; + +#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) +#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] + +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F, F, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, + /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, F, F, F + } +}; +#undef A +#undef F + +static int +mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s; + s = trans[0][firstbyte]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_SJIS[firstbyte]-1); + s = trans[s][*p++]; + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static int +code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) { + if (EncLen_SJIS[(int )code] == 1) + return 1; + else + return 0; + } + else if (code <= 0xffff) { + return 2; + } + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + c = *p++; + n = c; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 0 + if (enclen(enc, buf) != (p - buf)) + return REGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* lower, + OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + int len = enclen(enc, p, end); + + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); + +} +#endif + +#if 0 +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} +#endif + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (SJIS_ISMB_TRAIL(*p)) { + while (p > start) { + if (! SJIS_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(enc, p, end); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + const UChar c = *s; + return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); +} + + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0x829f, 0x82f1 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 4, + 0x00a6, 0x00af, + 0x00b1, 0x00dd, + 0x8340, 0x837e, + 0x8380, 0x8396, +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + st_data_t ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return TRUE; + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + +OnigEncodingDefine(shift_jis, Shift_JIS) = { + mbc_enc_len, + "Shift_JIS", /* name */ + 2, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match, + 0 +}; +/* + * Name: Shift_JIS + * MIBenum: 17 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://ja.wikipedia.org/wiki/Shift_JIS + */ +ENC_ALIAS("SJIS", "Shift_JIS") + +/* + * Name: Windows-31J + * MIBenum: 2024 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/dbcs/932.mspx + * Link: http://ja.wikipedia.org/wiki/Windows-31J + * Link: http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-932-2000.ucm + */ +ENC_REPLICATE("Windows-31J", "Shift_JIS") +ENC_ALIAS("CP932", "Windows-31J") +ENC_ALIAS("csWindows31J", "Windows-31J") /* IANA. IE6 don't accept Windows-31J but csWindows31J. */ + +/* + * Name: MacJapanese + * Link: http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/JAPANESE.TXT + * Link: http://ja.wikipedia.org/wiki/MacJapanese + */ +ENC_REPLICATE("MacJapanese", "Shift_JIS") +ENC_ALIAS("MacJapan", "MacJapanese") diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans new file mode 100644 index 0000000..a200ec0 --- /dev/null +++ b/enc/trans/newline.trans @@ -0,0 +1,135 @@ +#include "transcode_data.h" + +<% + map_normalize = {} + map_normalize["{00-ff}"] = :func_so + + transcode_generate_node(ActionMap.parse(map_normalize), "universal_newline") + + map_crlf = {} + map_crlf["{00-09,0b-ff}"] = :nomap + map_crlf["0a"] = "0d0a" + + transcode_generate_node(ActionMap.parse(map_crlf), "crlf_newline") + + map_cr = {} + map_cr["{00-09,0b-ff}"] = :nomap + map_cr["0a"] = "0d" + + transcode_generate_node(ActionMap.parse(map_cr), "cr_newline") +%> + +<%= transcode_generated_code %> + +#define STATE (sp[0]) +#define NORMAL 0 +#define JUST_AFTER_CR 1 + +/* no way to access this information, yet. */ +#define NEWLINES_MET (sp[1]) +#define MET_LF 0x01 +#define MET_CRLF 0x02 +#define MET_CR 0x04 + +static int +universal_newline_init(void *statep) +{ + unsigned char *sp = statep; + STATE = NORMAL; + NEWLINES_MET = 0; + return 0; +} + +static ssize_t +fun_so_universal_newline(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + int len; + if (s[0] == '\n') { + if (STATE == NORMAL) { + NEWLINES_MET |= MET_LF; + } + else { /* JUST_AFTER_CR */ + NEWLINES_MET |= MET_CRLF; + } + o[0] = '\n'; + len = 1; + STATE = NORMAL; + } + else { + len = 0; + if (STATE == JUST_AFTER_CR) { + o[0] = '\n'; + len = 1; + NEWLINES_MET |= MET_CR; + } + if (s[0] == '\r') { + STATE = JUST_AFTER_CR; + } + else { + o[len++] = s[0]; + STATE = NORMAL; + } + } + + return len; +} + +static ssize_t +universal_newline_finish(void *statep, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + int len = 0; + if (STATE == JUST_AFTER_CR) { + o[0] = '\n'; + len = 1; + NEWLINES_MET |= MET_CR; + } + STATE = NORMAL; + return len; +} + +static const rb_transcoder +rb_universal_newline = { + "", "universal_newline", universal_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_universal_newline, + universal_newline_finish +}; + +static const rb_transcoder +rb_crlf_newline = { + "", "crlf_newline", crlf_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 2, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, NULL +}; + +static const rb_transcoder +rb_cr_newline = { + "", "cr_newline", cr_newline, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 1, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, NULL +}; + +void +Init_newline(void) +{ + rb_register_transcoder(&rb_universal_newline); + rb_register_transcoder(&rb_crlf_newline); + rb_register_transcoder(&rb_cr_newline); +} diff --git a/enc/unicode.c b/enc/unicode.c new file mode 100644 index 0000000..2dfcbba --- /dev/null +++ b/enc/unicode.c @@ -0,0 +1,11359 @@ +/********************************************************************** + unicode.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#if 0 +#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0) +#endif + +static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +/* 'NEWLINE' */ +static const OnigCodePoint CR_NEWLINE[] = { + 1, + 0x000a, 0x000a +}; /* CR_NEWLINE */ + +/* 'Alpha': [[:Alpha:]] */ +static const OnigCodePoint CR_Alpha[] = { + 418, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alpha */ + +/* 'Blank': [[:Blank:]] */ +static const OnigCodePoint CR_Blank[] = { + 9, + 0x0009, 0x0009, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Blank */ + +/* 'Cntrl': [[:Cntrl:]] */ +static const OnigCodePoint CR_Cntrl[] = { + 19, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Cntrl */ + +/* 'Digit': [[:Digit:]] */ +static const OnigCodePoint CR_Digit[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Digit */ + +/* 'Graph': [[:Graph:]] */ +static const OnigCodePoint CR_Graph[] = { + 424, + 0x0021, 0x007e, + 0x00a1, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x205e, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Graph */ + +/* 'Lower': [[:Lower:]] */ +static const OnigCodePoint CR_Lower[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Lower */ + +/* 'Print': [[:Print:]] */ +static const OnigCodePoint CR_Print[] = { + 423, + 0x0009, 0x000d, + 0x0020, 0x007e, + 0x0085, 0x0085, + 0x00a0, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Print */ + +/* 'Punct': [[:Punct:]] */ +static const OnigCodePoint CR_Punct[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Punct */ + +/* 'Space': [[:Space:]] */ +static const OnigCodePoint CR_Space[] = { + 11, + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Space */ + +/* 'Upper': [[:Upper:]] */ +static const OnigCodePoint CR_Upper[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Upper */ + +/* 'XDigit': [[:XDigit:]] */ +static const OnigCodePoint CR_XDigit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; /* CR_XDigit */ + +/* 'Word': [[:Word:]] */ +static const OnigCodePoint CR_Word[] = { + 464, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1369, 0x137c, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2cfd, 0x2cfd, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Word */ + +/* 'Alnum': [[:Alnum:]] */ +static const OnigCodePoint CR_Alnum[] = { + 436, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alnum */ + +/* 'ASCII': [[:ASCII:]] */ +static const OnigCodePoint CR_ASCII[] = { + 1, + 0x0000, 0x007f +}; /* CR_ASCII */ + +#ifdef USE_UNICODE_PROPERTIES + +/* 'Any': - */ +static const OnigCodePoint CR_Any[] = { + 1, + 0x0000, 0x10ffff +}; /* CR_Any */ + +/* 'Assigned': - */ +static const OnigCodePoint CR_Assigned[] = { + 420, + 0x0000, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xd800, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Assigned */ + +/* 'C': Major Category */ +static const OnigCodePoint CR_C[] = { + 422, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x06dd, 0x06dd, + 0x070e, 0x070f, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17b4, 0x17b5, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x206f, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xf8ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfffb, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d173, 0x1d17a, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe00ff, + 0xe01f0, 0x10ffff +}; /* CR_C */ + +/* 'Cc': General Category */ +static const OnigCodePoint CR_Cc[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x009f +}; /* CR_Cc */ + +/* 'Cf': General Category */ +static const OnigCodePoint CR_Cf[] = { + 14, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f +}; /* CR_Cf */ + +/* 'Cn': General Category */ +static const OnigCodePoint CR_Cn[] = { + 420, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x05ff, + 0x0604, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x070e, 0x070e, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x2064, 0x2069, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xd7ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xfefe, + 0xff00, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfff8, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff +}; /* CR_Cn */ + +/* 'Co': General Category */ +static const OnigCodePoint CR_Co[] = { + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Co */ + +/* 'Cs': General Category */ +static const OnigCodePoint CR_Cs[] = { + 1, + 0xd800, 0xdfff +}; /* CR_Cs */ + +/* 'L': Major Category */ +static const OnigCodePoint CR_L[] = { + 347, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0640, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1d00, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_L */ + +/* 'Ll': General Category */ +static const OnigCodePoint CR_Ll[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Ll */ + +/* 'Lm': General Category */ +static const OnigCodePoint CR_Lm[] = { + 26, + 0x02b0, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0559, 0x0559, + 0x0640, 0x0640, + 0x06e5, 0x06e6, + 0x0e46, 0x0e46, + 0x0ec6, 0x0ec6, + 0x10fc, 0x10fc, + 0x17d7, 0x17d7, + 0x1843, 0x1843, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2090, 0x2094, + 0x2d6f, 0x2d6f, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xff70, 0xff70, + 0xff9e, 0xff9f +}; /* CR_Lm */ + +/* 'Lo': General Category */ +static const OnigCodePoint CR_Lo[] = { + 245, + 0x01bb, 0x01bb, + 0x01c0, 0x01c3, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e45, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10d0, 0x10fa, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17dc, 0x17dc, + 0x1820, 0x1842, + 0x1844, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x2135, 0x2138, + 0x2d30, 0x2d65, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3006, 0x3006, + 0x303c, 0x303c, + 0x3041, 0x3096, + 0x309f, 0x309f, + 0x30a1, 0x30fa, + 0x30ff, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa014, + 0xa016, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10450, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Lo */ + +/* 'Lt': General Category */ +static const OnigCodePoint CR_Lt[] = { + 10, + 0x01c5, 0x01c5, + 0x01c8, 0x01c8, + 0x01cb, 0x01cb, + 0x01f2, 0x01f2, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fbc, 0x1fbc, + 0x1fcc, 0x1fcc, + 0x1ffc, 0x1ffc +}; /* CR_Lt */ + +/* 'Lu': General Category */ +static const OnigCodePoint CR_Lu[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Lu */ + +/* 'M': Major Category */ +static const OnigCodePoint CR_M[] = { + 133, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0488, 0x0489, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06de, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0903, + 0x093c, 0x093c, + 0x093e, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0983, + 0x09bc, 0x09bc, + 0x09be, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a03, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a83, + 0x0abc, 0x0abc, + 0x0abe, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b3c, 0x0b3c, + 0x0b3e, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c82, 0x0c83, + 0x0cbc, 0x0cbc, + 0x0cbe, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f3f, + 0x0f71, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1056, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b6, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa823, 0xa827, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_M */ + +/* 'Mc': General Category */ +static const OnigCodePoint CR_Mc[] = { + 63, + 0x0903, 0x0903, + 0x093e, 0x0940, + 0x0949, 0x094c, + 0x0982, 0x0983, + 0x09be, 0x09c0, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09d7, 0x09d7, + 0x0a03, 0x0a03, + 0x0a3e, 0x0a40, + 0x0a83, 0x0a83, + 0x0abe, 0x0ac0, + 0x0ac9, 0x0ac9, + 0x0acb, 0x0acc, + 0x0b02, 0x0b03, + 0x0b3e, 0x0b3e, + 0x0b40, 0x0b40, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b57, 0x0b57, + 0x0bbe, 0x0bbf, + 0x0bc1, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c41, 0x0c44, + 0x0c82, 0x0c83, + 0x0cbe, 0x0cbe, + 0x0cc0, 0x0cc4, + 0x0cc7, 0x0cc8, + 0x0cca, 0x0ccb, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d40, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dcf, 0x0dd1, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0f3e, 0x0f3f, + 0x0f7f, 0x0f7f, + 0x102c, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x1056, 0x1057, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a19, 0x1a1b, + 0xa802, 0xa802, + 0xa823, 0xa824, + 0xa827, 0xa827, + 0x1d165, 0x1d166, + 0x1d16d, 0x1d172 +}; /* CR_Mc */ + +/* 'Me': General Category */ +static const OnigCodePoint CR_Me[] = { + 4, + 0x0488, 0x0489, + 0x06de, 0x06de, + 0x20dd, 0x20e0, + 0x20e2, 0x20e4 +}; /* CR_Me */ + +/* 'Mn': General Category */ +static const OnigCodePoint CR_Mn[] = { + 124, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06df, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3f, 0x0b3f, + 0x0b41, 0x0b43, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b56, + 0x0b82, 0x0b82, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0d41, 0x0d43, + 0x0d4d, 0x0d4d, + 0x0dca, 0x0dca, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1032, + 0x1036, 0x1037, + 0x1039, 0x1039, + 0x1058, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_Mn */ + +/* 'N': Major Category */ +static const OnigCodePoint CR_N[] = { + 53, + 0x0030, 0x0039, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x09f4, 0x09f9, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bf2, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f33, + 0x1040, 0x1049, + 0x1369, 0x137c, + 0x16ee, 0x16f0, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xff10, 0xff19, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x104a0, 0x104a9, + 0x10a40, 0x10a47, + 0x1d7ce, 0x1d7ff +}; /* CR_N */ + +/* 'Nd': General Category */ +static const OnigCodePoint CR_Nd[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Nd */ + +/* 'Nl': General Category */ +static const OnigCodePoint CR_Nl[] = { + 8, + 0x16ee, 0x16f0, + 0x2160, 0x2183, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x10140, 0x10174, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5 +}; /* CR_Nl */ + +/* 'No': General Category */ +static const OnigCodePoint CR_No[] = { + 26, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x09f4, 0x09f9, + 0x0bf0, 0x0bf2, + 0x0f2a, 0x0f33, + 0x1369, 0x137c, + 0x17f0, 0x17f9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x215f, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x10107, 0x10133, + 0x10175, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x10a40, 0x10a47 +}; /* CR_No */ + +/* 'P': Major Category */ +static const OnigCodePoint CR_P[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_P */ + +/* 'Pc': General Category */ +static const OnigCodePoint CR_Pc[] = { + 6, + 0x005f, 0x005f, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xff3f, 0xff3f +}; /* CR_Pc */ + +/* 'Pd': General Category */ +static const OnigCodePoint CR_Pd[] = { + 12, + 0x002d, 0x002d, + 0x058a, 0x058a, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2e17, 0x2e17, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d +}; /* CR_Pd */ + +/* 'Pe': General Category */ +static const OnigCodePoint CR_Pe[] = { + 65, + 0x0029, 0x0029, + 0x005d, 0x005d, + 0x007d, 0x007d, + 0x0f3b, 0x0f3b, + 0x0f3d, 0x0f3d, + 0x169c, 0x169c, + 0x2046, 0x2046, + 0x207e, 0x207e, + 0x208e, 0x208e, + 0x232a, 0x232a, + 0x23b5, 0x23b5, + 0x2769, 0x2769, + 0x276b, 0x276b, + 0x276d, 0x276d, + 0x276f, 0x276f, + 0x2771, 0x2771, + 0x2773, 0x2773, + 0x2775, 0x2775, + 0x27c6, 0x27c6, + 0x27e7, 0x27e7, + 0x27e9, 0x27e9, + 0x27eb, 0x27eb, + 0x2984, 0x2984, + 0x2986, 0x2986, + 0x2988, 0x2988, + 0x298a, 0x298a, + 0x298c, 0x298c, + 0x298e, 0x298e, + 0x2990, 0x2990, + 0x2992, 0x2992, + 0x2994, 0x2994, + 0x2996, 0x2996, + 0x2998, 0x2998, + 0x29d9, 0x29d9, + 0x29db, 0x29db, + 0x29fd, 0x29fd, + 0x3009, 0x3009, + 0x300b, 0x300b, + 0x300d, 0x300d, + 0x300f, 0x300f, + 0x3011, 0x3011, + 0x3015, 0x3015, + 0x3017, 0x3017, + 0x3019, 0x3019, + 0x301b, 0x301b, + 0x301e, 0x301f, + 0xfd3f, 0xfd3f, + 0xfe18, 0xfe18, + 0xfe36, 0xfe36, + 0xfe38, 0xfe38, + 0xfe3a, 0xfe3a, + 0xfe3c, 0xfe3c, + 0xfe3e, 0xfe3e, + 0xfe40, 0xfe40, + 0xfe42, 0xfe42, + 0xfe44, 0xfe44, + 0xfe48, 0xfe48, + 0xfe5a, 0xfe5a, + 0xfe5c, 0xfe5c, + 0xfe5e, 0xfe5e, + 0xff09, 0xff09, + 0xff3d, 0xff3d, + 0xff5d, 0xff5d, + 0xff60, 0xff60, + 0xff63, 0xff63 +}; /* CR_Pe */ + +/* 'Pf': General Category */ +static const OnigCodePoint CR_Pf[] = { + 9, + 0x00bb, 0x00bb, + 0x2019, 0x2019, + 0x201d, 0x201d, + 0x203a, 0x203a, + 0x2e03, 0x2e03, + 0x2e05, 0x2e05, + 0x2e0a, 0x2e0a, + 0x2e0d, 0x2e0d, + 0x2e1d, 0x2e1d +}; /* CR_Pf */ + +/* 'Pi': General Category */ +static const OnigCodePoint CR_Pi[] = { + 10, + 0x00ab, 0x00ab, + 0x2018, 0x2018, + 0x201b, 0x201c, + 0x201f, 0x201f, + 0x2039, 0x2039, + 0x2e02, 0x2e02, + 0x2e04, 0x2e04, + 0x2e09, 0x2e09, + 0x2e0c, 0x2e0c, + 0x2e1c, 0x2e1c +}; /* CR_Pi */ + +/* 'Po': General Category */ +static const OnigCodePoint CR_Po[] = { + 88, + 0x0021, 0x0023, + 0x0025, 0x0027, + 0x002a, 0x002a, + 0x002c, 0x002c, + 0x002e, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005c, 0x005c, + 0x00a1, 0x00a1, + 0x00b7, 0x00b7, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x0589, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x1805, + 0x1807, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2016, 0x2017, + 0x2020, 0x2027, + 0x2030, 0x2038, + 0x203b, 0x203e, + 0x2041, 0x2043, + 0x2047, 0x2051, + 0x2053, 0x2053, + 0x2055, 0x205e, + 0x23b6, 0x23b6, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e01, + 0x2e06, 0x2e08, + 0x2e0b, 0x2e0b, + 0x2e0e, 0x2e16, + 0x3001, 0x3003, + 0x303d, 0x303d, + 0x30fb, 0x30fb, + 0xfe10, 0xfe16, + 0xfe19, 0xfe19, + 0xfe30, 0xfe30, + 0xfe45, 0xfe46, + 0xfe49, 0xfe4c, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xfe5f, 0xfe61, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff07, + 0xff0a, 0xff0a, + 0xff0c, 0xff0c, + 0xff0e, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3c, 0xff3c, + 0xff61, 0xff61, + 0xff64, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Po */ + +/* 'Ps': General Category */ +static const OnigCodePoint CR_Ps[] = { + 67, + 0x0028, 0x0028, + 0x005b, 0x005b, + 0x007b, 0x007b, + 0x0f3a, 0x0f3a, + 0x0f3c, 0x0f3c, + 0x169b, 0x169b, + 0x201a, 0x201a, + 0x201e, 0x201e, + 0x2045, 0x2045, + 0x207d, 0x207d, + 0x208d, 0x208d, + 0x2329, 0x2329, + 0x23b4, 0x23b4, + 0x2768, 0x2768, + 0x276a, 0x276a, + 0x276c, 0x276c, + 0x276e, 0x276e, + 0x2770, 0x2770, + 0x2772, 0x2772, + 0x2774, 0x2774, + 0x27c5, 0x27c5, + 0x27e6, 0x27e6, + 0x27e8, 0x27e8, + 0x27ea, 0x27ea, + 0x2983, 0x2983, + 0x2985, 0x2985, + 0x2987, 0x2987, + 0x2989, 0x2989, + 0x298b, 0x298b, + 0x298d, 0x298d, + 0x298f, 0x298f, + 0x2991, 0x2991, + 0x2993, 0x2993, + 0x2995, 0x2995, + 0x2997, 0x2997, + 0x29d8, 0x29d8, + 0x29da, 0x29da, + 0x29fc, 0x29fc, + 0x3008, 0x3008, + 0x300a, 0x300a, + 0x300c, 0x300c, + 0x300e, 0x300e, + 0x3010, 0x3010, + 0x3014, 0x3014, + 0x3016, 0x3016, + 0x3018, 0x3018, + 0x301a, 0x301a, + 0x301d, 0x301d, + 0xfd3e, 0xfd3e, + 0xfe17, 0xfe17, + 0xfe35, 0xfe35, + 0xfe37, 0xfe37, + 0xfe39, 0xfe39, + 0xfe3b, 0xfe3b, + 0xfe3d, 0xfe3d, + 0xfe3f, 0xfe3f, + 0xfe41, 0xfe41, + 0xfe43, 0xfe43, + 0xfe47, 0xfe47, + 0xfe59, 0xfe59, + 0xfe5b, 0xfe5b, + 0xfe5d, 0xfe5d, + 0xff08, 0xff08, + 0xff3b, 0xff3b, + 0xff5b, 0xff5b, + 0xff5f, 0xff5f, + 0xff62, 0xff62 +}; /* CR_Ps */ + +/* 'S': Major Category */ +static const OnigCodePoint CR_S[] = { + 162, + 0x0024, 0x0024, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00a2, 0x00a9, + 0x00ac, 0x00ac, + 0x00ae, 0x00b1, + 0x00b4, 0x00b4, + 0x00b6, 0x00b6, + 0x00b8, 0x00b8, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x03f6, 0x03f6, + 0x0482, 0x0482, + 0x060b, 0x060b, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09f2, 0x09f3, + 0x09fa, 0x09fa, + 0x0af1, 0x0af1, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bfa, + 0x0e3f, 0x0e3f, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x17db, 0x17db, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x20a0, 0x20b5, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x2140, 0x2144, + 0x214a, 0x214c, + 0x2190, 0x2328, + 0x232b, 0x23b3, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x309b, 0x309c, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa828, 0xa82b, + 0xfb29, 0xfb29, + 0xfdfc, 0xfdfd, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_S */ + +/* 'Sc': General Category */ +static const OnigCodePoint CR_Sc[] = { + 14, + 0x0024, 0x0024, + 0x00a2, 0x00a5, + 0x060b, 0x060b, + 0x09f2, 0x09f3, + 0x0af1, 0x0af1, + 0x0bf9, 0x0bf9, + 0x0e3f, 0x0e3f, + 0x17db, 0x17db, + 0x20a0, 0x20b5, + 0xfdfc, 0xfdfc, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xffe0, 0xffe1, + 0xffe5, 0xffe6 +}; /* CR_Sc */ + +/* 'Sk': General Category */ +static const OnigCodePoint CR_Sk[] = { + 23, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b8, 0x00b8, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x309b, 0x309c, + 0xa700, 0xa716, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xffe3, 0xffe3 +}; /* CR_Sk */ + +/* 'Sm': General Category */ +static const OnigCodePoint CR_Sm[] = { + 59, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00ac, 0x00ac, + 0x00b1, 0x00b1, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x03f6, 0x03f6, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x2140, 0x2144, + 0x214b, 0x214b, + 0x2190, 0x2194, + 0x219a, 0x219b, + 0x21a0, 0x21a0, + 0x21a3, 0x21a3, + 0x21a6, 0x21a6, + 0x21ae, 0x21ae, + 0x21ce, 0x21cf, + 0x21d2, 0x21d2, + 0x21d4, 0x21d4, + 0x21f4, 0x22ff, + 0x2308, 0x230b, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b3, + 0x25b7, 0x25b7, + 0x25c1, 0x25c1, + 0x25f8, 0x25ff, + 0x266f, 0x266f, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x27ff, + 0x2900, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2aff, + 0xfb29, 0xfb29, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_Sm */ + +/* 'So': General Category */ +static const OnigCodePoint CR_So[] = { + 120, + 0x00a6, 0x00a7, + 0x00a9, 0x00a9, + 0x00ae, 0x00ae, + 0x00b0, 0x00b0, + 0x00b6, 0x00b6, + 0x0482, 0x0482, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09fa, 0x09fa, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bf8, + 0x0bfa, 0x0bfa, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x214a, 0x214a, + 0x214c, 0x214c, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21ad, + 0x21af, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21f3, + 0x2300, 0x2307, + 0x230c, 0x231f, + 0x2322, 0x2328, + 0x232b, 0x237b, + 0x237d, 0x239a, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x25b6, + 0x25b8, 0x25c0, + 0x25c2, 0x25f7, + 0x2600, 0x266e, + 0x2670, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x2800, 0x28ff, + 0x2b00, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa828, 0xa82b, + 0xfdfd, 0xfdfd, + 0xffe4, 0xffe4, + 0xffe8, 0xffe8, + 0xffed, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356 +}; /* CR_So */ + +/* 'Z': Major Category */ +static const OnigCodePoint CR_Z[] = { + 9, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Z */ + +/* 'Zl': General Category */ +static const OnigCodePoint CR_Zl[] = { + 1, + 0x2028, 0x2028 +}; /* CR_Zl */ + +/* 'Zp': General Category */ +static const OnigCodePoint CR_Zp[] = { + 1, + 0x2029, 0x2029 +}; /* CR_Zp */ + +/* 'Zs': General Category */ +static const OnigCodePoint CR_Zs[] = { + 8, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Zs */ + +/* 'Arabic': Script */ +static const OnigCodePoint CR_Arabic[] = { + 17, + 0x060b, 0x060b, + 0x060d, 0x0615, + 0x061e, 0x061e, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x0656, 0x065e, + 0x066a, 0x066f, + 0x0671, 0x06dc, + 0x06de, 0x06ff, + 0x0750, 0x076d, + 0xfb50, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfc, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc +}; /* CR_Arabic */ + +/* 'Armenian': Script */ +static const OnigCodePoint CR_Armenian[] = { + 5, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x058a, 0x058a, + 0xfb13, 0xfb17 +}; /* CR_Armenian */ + +/* 'Bengali': Script */ +static const OnigCodePoint CR_Bengali[] = { + 14, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa +}; /* CR_Bengali */ + +/* 'Bopomofo': Script */ +static const OnigCodePoint CR_Bopomofo[] = { + 2, + 0x3105, 0x312c, + 0x31a0, 0x31b7 +}; /* CR_Bopomofo */ + +/* 'Braille': Script */ +static const OnigCodePoint CR_Braille[] = { + 1, + 0x2800, 0x28ff +}; /* CR_Braille */ + +/* 'Buginese': Script */ +static const OnigCodePoint CR_Buginese[] = { + 2, + 0x1a00, 0x1a1b, + 0x1a1e, 0x1a1f +}; /* CR_Buginese */ + +/* 'Buhid': Script */ +static const OnigCodePoint CR_Buhid[] = { + 1, + 0x1740, 0x1753 +}; /* CR_Buhid */ + +/* 'Canadian_Aboriginal': Script */ +static const OnigCodePoint CR_Canadian_Aboriginal[] = { + 1, + 0x1401, 0x1676 +}; /* CR_Canadian_Aboriginal */ + +/* 'Cherokee': Script */ +static const OnigCodePoint CR_Cherokee[] = { + 1, + 0x13a0, 0x13f4 +}; /* CR_Cherokee */ + +/* 'Common': Script */ +static const OnigCodePoint CR_Common[] = { + 126, + 0x0000, 0x0040, + 0x005b, 0x0060, + 0x007b, 0x00a9, + 0x00ab, 0x00b9, + 0x00bb, 0x00bf, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02b9, 0x02df, + 0x02e5, 0x02ff, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x0589, 0x0589, + 0x0600, 0x0603, + 0x060c, 0x060c, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0640, 0x0640, + 0x0660, 0x0669, + 0x06dd, 0x06dd, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0e3f, 0x0e3f, + 0x10fb, 0x10fb, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x2000, 0x200b, + 0x200e, 0x2063, + 0x206a, 0x2070, + 0x2074, 0x207e, + 0x2080, 0x208e, + 0x20a0, 0x20b5, + 0x2100, 0x2125, + 0x2127, 0x2129, + 0x212c, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x27ff, + 0x2900, 0x2b13, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2ff0, 0x2ffb, + 0x3000, 0x3004, + 0x3006, 0x3006, + 0x3008, 0x3020, + 0x3030, 0x3037, + 0x303c, 0x303f, + 0x309b, 0x309c, + 0x30a0, 0x30a0, + 0x30fb, 0x30fc, + 0x3190, 0x319f, + 0x31c0, 0x31cf, + 0x3220, 0x3243, + 0x3250, 0x325f, + 0x327e, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa700, 0xa716, + 0xe000, 0xf8ff, + 0xfd3e, 0xfd3f, + 0xfdfd, 0xfdfd, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfeff, 0xfeff, + 0xff01, 0xff20, + 0xff3b, 0xff40, + 0xff5b, 0xff65, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d166, + 0x1d16a, 0x1d17a, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Common */ + +/* 'Coptic': Script */ +static const OnigCodePoint CR_Coptic[] = { + 3, + 0x03e2, 0x03ef, + 0x2c80, 0x2cea, + 0x2cf9, 0x2cff +}; /* CR_Coptic */ + +/* 'Cypriot': Script */ +static const OnigCodePoint CR_Cypriot[] = { + 6, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f +}; /* CR_Cypriot */ + +/* 'Cyrillic': Script */ +static const OnigCodePoint CR_Cyrillic[] = { + 6, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x1d2b, 0x1d2b, + 0x1d78, 0x1d78 +}; /* CR_Cyrillic */ + +/* 'Deseret': Script */ +static const OnigCodePoint CR_Deseret[] = { + 1, + 0x10400, 0x1044f +}; /* CR_Deseret */ + +/* 'Devanagari': Script */ +static const OnigCodePoint CR_Devanagari[] = { + 6, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d +}; /* CR_Devanagari */ + +/* 'Ethiopic': Script */ +static const OnigCodePoint CR_Ethiopic[] = { + 27, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde +}; /* CR_Ethiopic */ + +/* 'Georgian': Script */ +static const OnigCodePoint CR_Georgian[] = { + 4, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x2d00, 0x2d25 +}; /* CR_Georgian */ + +/* 'Glagolitic': Script */ +static const OnigCodePoint CR_Glagolitic[] = { + 2, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e +}; /* CR_Glagolitic */ + +/* 'Gothic': Script */ +static const OnigCodePoint CR_Gothic[] = { + 1, + 0x10330, 0x1034a +}; /* CR_Gothic */ + +/* 'Greek': Script */ +static const OnigCodePoint CR_Greek[] = { + 31, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x0384, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03e1, + 0x03f0, 0x03ff, + 0x1d26, 0x1d2a, + 0x1d5d, 0x1d61, + 0x1d66, 0x1d6a, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2126, 0x2126, + 0x10140, 0x1018a, + 0x1d200, 0x1d245 +}; /* CR_Greek */ + +/* 'Gujarati': Script */ +static const OnigCodePoint CR_Gujarati[] = { + 14, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1 +}; /* CR_Gujarati */ + +/* 'Gurmukhi': Script */ +static const OnigCodePoint CR_Gurmukhi[] = { + 15, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74 +}; /* CR_Gurmukhi */ + +/* 'Han': Script */ +static const OnigCodePoint CR_Han[] = { + 14, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x3005, 0x3005, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303b, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Han */ + +/* 'Hangul': Script */ +static const OnigCodePoint CR_Hangul[] = { + 12, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x3131, 0x318e, + 0x3200, 0x321e, + 0x3260, 0x327d, + 0xac00, 0xd7a3, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc +}; /* CR_Hangul */ + +/* 'Hanunoo': Script */ +static const OnigCodePoint CR_Hanunoo[] = { + 1, + 0x1720, 0x1734 +}; /* CR_Hanunoo */ + +/* 'Hebrew': Script */ +static const OnigCodePoint CR_Hebrew[] = { + 10, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfb4f +}; /* CR_Hebrew */ + +/* 'Hiragana': Script */ +static const OnigCodePoint CR_Hiragana[] = { + 2, + 0x3041, 0x3096, + 0x309d, 0x309f +}; /* CR_Hiragana */ + +/* 'Inherited': Script */ +static const OnigCodePoint CR_Inherited[] = { + 15, + 0x0300, 0x036f, + 0x064b, 0x0655, + 0x0670, 0x0670, + 0x1dc0, 0x1dc3, + 0x200c, 0x200d, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0xe0100, 0xe01ef +}; /* CR_Inherited */ + +/* 'Kannada': Script */ +static const OnigCodePoint CR_Kannada[] = { + 13, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef +}; /* CR_Kannada */ + +/* 'Katakana': Script */ +static const OnigCodePoint CR_Katakana[] = { + 5, + 0x30a1, 0x30fa, + 0x30fd, 0x30ff, + 0x31f0, 0x31ff, + 0xff66, 0xff6f, + 0xff71, 0xff9d +}; /* CR_Katakana */ + +/* 'Kharoshthi': Script */ +static const OnigCodePoint CR_Kharoshthi[] = { + 8, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58 +}; /* CR_Kharoshthi */ + +/* 'Khmer': Script */ +static const OnigCodePoint CR_Khmer[] = { + 4, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x19e0, 0x19ff +}; /* CR_Khmer */ + +/* 'Lao': Script */ +static const OnigCodePoint CR_Lao[] = { + 18, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd +}; /* CR_Lao */ + +/* 'Latin': Script */ +static const OnigCodePoint CR_Latin[] = { + 23, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02b8, + 0x02e0, 0x02e4, + 0x1d00, 0x1d25, + 0x1d2c, 0x1d5c, + 0x1d62, 0x1d65, + 0x1d6b, 0x1d77, + 0x1d79, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x212a, 0x212b, + 0xfb00, 0xfb06, + 0xff21, 0xff3a, + 0xff41, 0xff5a +}; /* CR_Latin */ + +/* 'Limbu': Script */ +static const OnigCodePoint CR_Limbu[] = { + 5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x194f +}; /* CR_Limbu */ + +/* 'Linear_B': Script */ +static const OnigCodePoint CR_Linear_B[] = { + 7, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa +}; /* CR_Linear_B */ + +/* 'Malayalam': Script */ +static const OnigCodePoint CR_Malayalam[] = { + 11, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f +}; /* CR_Malayalam */ + +/* 'Mongolian': Script */ +static const OnigCodePoint CR_Mongolian[] = { + 4, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9 +}; /* CR_Mongolian */ + +/* 'Myanmar': Script */ +static const OnigCodePoint CR_Myanmar[] = { + 6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059 +}; /* CR_Myanmar */ + +/* 'New_Tai_Lue': Script */ +static const OnigCodePoint CR_New_Tai_Lue[] = { + 4, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x19df +}; /* CR_New_Tai_Lue */ + +/* 'Ogham': Script */ +static const OnigCodePoint CR_Ogham[] = { + 1, + 0x1680, 0x169c +}; /* CR_Ogham */ + +/* 'Old_Italic': Script */ +static const OnigCodePoint CR_Old_Italic[] = { + 2, + 0x10300, 0x1031e, + 0x10320, 0x10323 +}; /* CR_Old_Italic */ + +/* 'Old_Persian': Script */ +static const OnigCodePoint CR_Old_Persian[] = { + 2, + 0x103a0, 0x103c3, + 0x103c8, 0x103d5 +}; /* CR_Old_Persian */ + +/* 'Oriya': Script */ +static const OnigCodePoint CR_Oriya[] = { + 14, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71 +}; /* CR_Oriya */ + +/* 'Osmanya': Script */ +static const OnigCodePoint CR_Osmanya[] = { + 2, + 0x10480, 0x1049d, + 0x104a0, 0x104a9 +}; /* CR_Osmanya */ + +/* 'Runic': Script */ +static const OnigCodePoint CR_Runic[] = { + 2, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0 +}; /* CR_Runic */ + +/* 'Shavian': Script */ +static const OnigCodePoint CR_Shavian[] = { + 1, + 0x10450, 0x1047f +}; /* CR_Shavian */ + +/* 'Sinhala': Script */ +static const OnigCodePoint CR_Sinhala[] = { + 11, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4 +}; /* CR_Sinhala */ + +/* 'Syloti_Nagri': Script */ +static const OnigCodePoint CR_Syloti_Nagri[] = { + 1, + 0xa800, 0xa82b +}; /* CR_Syloti_Nagri */ + +/* 'Syriac': Script */ +static const OnigCodePoint CR_Syriac[] = { + 3, + 0x0700, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f +}; /* CR_Syriac */ + +/* 'Tagalog': Script */ +static const OnigCodePoint CR_Tagalog[] = { + 2, + 0x1700, 0x170c, + 0x170e, 0x1714 +}; /* CR_Tagalog */ + +/* 'Tagbanwa': Script */ +static const OnigCodePoint CR_Tagbanwa[] = { + 3, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773 +}; /* CR_Tagbanwa */ + +/* 'Tai_Le': Script */ +static const OnigCodePoint CR_Tai_Le[] = { + 2, + 0x1950, 0x196d, + 0x1970, 0x1974 +}; /* CR_Tai_Le */ + +/* 'Tamil': Script */ +static const OnigCodePoint CR_Tamil[] = { + 15, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa +}; /* CR_Tamil */ + +/* 'Telugu': Script */ +static const OnigCodePoint CR_Telugu[] = { + 12, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f +}; /* CR_Telugu */ + +/* 'Thaana': Script */ +static const OnigCodePoint CR_Thaana[] = { + 1, + 0x0780, 0x07b1 +}; /* CR_Thaana */ + +/* 'Thai': Script */ +static const OnigCodePoint CR_Thai[] = { + 2, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e5b +}; /* CR_Thai */ + +/* 'Tibetan': Script */ +static const OnigCodePoint CR_Tibetan[] = { + 7, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1 +}; /* CR_Tibetan */ + +/* 'Tifinagh': Script */ +static const OnigCodePoint CR_Tifinagh[] = { + 2, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f +}; /* CR_Tifinagh */ + +/* 'Ugaritic': Script */ +static const OnigCodePoint CR_Ugaritic[] = { + 2, + 0x10380, 0x1039d, + 0x1039f, 0x1039f +}; /* CR_Ugaritic */ + +/* 'Yi': Script */ +static const OnigCodePoint CR_Yi[] = { + 2, + 0xa000, 0xa48c, + 0xa490, 0xa4c6 +}; /* CR_Yi */ + + +#endif /* USE_UNICODE_PROPERTIES */ + + +typedef struct { + int n; + OnigCodePoint code[3]; +} CodePointList3; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseFold_11_Type; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseUnfold_11_Type; + +typedef struct { + int n; + OnigCodePoint code[2]; +} CodePointList2; + +typedef struct { + OnigCodePoint from[2]; + CodePointList2 to; +} CaseUnfold_12_Type; + +typedef struct { + OnigCodePoint from[3]; + CodePointList2 to; +} CaseUnfold_13_Type; + +static const CaseFold_11_Type CaseFold[] = { + { 0x0041, {1, {0x0061}}}, + { 0x0042, {1, {0x0062}}}, + { 0x0043, {1, {0x0063}}}, + { 0x0044, {1, {0x0064}}}, + { 0x0045, {1, {0x0065}}}, + { 0x0046, {1, {0x0066}}}, + { 0x0047, {1, {0x0067}}}, + { 0x0048, {1, {0x0068}}}, + { 0x004a, {1, {0x006a}}}, + { 0x004b, {1, {0x006b}}}, + { 0x004c, {1, {0x006c}}}, + { 0x004d, {1, {0x006d}}}, + { 0x004e, {1, {0x006e}}}, + { 0x004f, {1, {0x006f}}}, + { 0x0050, {1, {0x0070}}}, + { 0x0051, {1, {0x0071}}}, + { 0x0052, {1, {0x0072}}}, + { 0x0053, {1, {0x0073}}}, + { 0x0054, {1, {0x0074}}}, + { 0x0055, {1, {0x0075}}}, + { 0x0056, {1, {0x0076}}}, + { 0x0057, {1, {0x0077}}}, + { 0x0058, {1, {0x0078}}}, + { 0x0059, {1, {0x0079}}}, + { 0x005a, {1, {0x007a}}}, + { 0x00b5, {1, {0x03bc}}}, + { 0x00c0, {1, {0x00e0}}}, + { 0x00c1, {1, {0x00e1}}}, + { 0x00c2, {1, {0x00e2}}}, + { 0x00c3, {1, {0x00e3}}}, + { 0x00c4, {1, {0x00e4}}}, + { 0x00c5, {1, {0x00e5}}}, + { 0x00c6, {1, {0x00e6}}}, + { 0x00c7, {1, {0x00e7}}}, + { 0x00c8, {1, {0x00e8}}}, + { 0x00c9, {1, {0x00e9}}}, + { 0x00ca, {1, {0x00ea}}}, + { 0x00cb, {1, {0x00eb}}}, + { 0x00cc, {1, {0x00ec}}}, + { 0x00cd, {1, {0x00ed}}}, + { 0x00ce, {1, {0x00ee}}}, + { 0x00cf, {1, {0x00ef}}}, + { 0x00d0, {1, {0x00f0}}}, + { 0x00d1, {1, {0x00f1}}}, + { 0x00d2, {1, {0x00f2}}}, + { 0x00d3, {1, {0x00f3}}}, + { 0x00d4, {1, {0x00f4}}}, + { 0x00d5, {1, {0x00f5}}}, + { 0x00d6, {1, {0x00f6}}}, + { 0x00d8, {1, {0x00f8}}}, + { 0x00d9, {1, {0x00f9}}}, + { 0x00da, {1, {0x00fa}}}, + { 0x00db, {1, {0x00fb}}}, + { 0x00dc, {1, {0x00fc}}}, + { 0x00dd, {1, {0x00fd}}}, + { 0x00de, {1, {0x00fe}}}, + { 0x00df, {2, {0x0073, 0x0073}}}, + { 0x0100, {1, {0x0101}}}, + { 0x0102, {1, {0x0103}}}, + { 0x0104, {1, {0x0105}}}, + { 0x0106, {1, {0x0107}}}, + { 0x0108, {1, {0x0109}}}, + { 0x010a, {1, {0x010b}}}, + { 0x010c, {1, {0x010d}}}, + { 0x010e, {1, {0x010f}}}, + { 0x0110, {1, {0x0111}}}, + { 0x0112, {1, {0x0113}}}, + { 0x0114, {1, {0x0115}}}, + { 0x0116, {1, {0x0117}}}, + { 0x0118, {1, {0x0119}}}, + { 0x011a, {1, {0x011b}}}, + { 0x011c, {1, {0x011d}}}, + { 0x011e, {1, {0x011f}}}, + { 0x0120, {1, {0x0121}}}, + { 0x0122, {1, {0x0123}}}, + { 0x0124, {1, {0x0125}}}, + { 0x0126, {1, {0x0127}}}, + { 0x0128, {1, {0x0129}}}, + { 0x012a, {1, {0x012b}}}, + { 0x012c, {1, {0x012d}}}, + { 0x012e, {1, {0x012f}}}, + { 0x0132, {1, {0x0133}}}, + { 0x0134, {1, {0x0135}}}, + { 0x0136, {1, {0x0137}}}, + { 0x0139, {1, {0x013a}}}, + { 0x013b, {1, {0x013c}}}, + { 0x013d, {1, {0x013e}}}, + { 0x013f, {1, {0x0140}}}, + { 0x0141, {1, {0x0142}}}, + { 0x0143, {1, {0x0144}}}, + { 0x0145, {1, {0x0146}}}, + { 0x0147, {1, {0x0148}}}, + { 0x0149, {2, {0x02bc, 0x006e}}}, + { 0x014a, {1, {0x014b}}}, + { 0x014c, {1, {0x014d}}}, + { 0x014e, {1, {0x014f}}}, + { 0x0150, {1, {0x0151}}}, + { 0x0152, {1, {0x0153}}}, + { 0x0154, {1, {0x0155}}}, + { 0x0156, {1, {0x0157}}}, + { 0x0158, {1, {0x0159}}}, + { 0x015a, {1, {0x015b}}}, + { 0x015c, {1, {0x015d}}}, + { 0x015e, {1, {0x015f}}}, + { 0x0160, {1, {0x0161}}}, + { 0x0162, {1, {0x0163}}}, + { 0x0164, {1, {0x0165}}}, + { 0x0166, {1, {0x0167}}}, + { 0x0168, {1, {0x0169}}}, + { 0x016a, {1, {0x016b}}}, + { 0x016c, {1, {0x016d}}}, + { 0x016e, {1, {0x016f}}}, + { 0x0170, {1, {0x0171}}}, + { 0x0172, {1, {0x0173}}}, + { 0x0174, {1, {0x0175}}}, + { 0x0176, {1, {0x0177}}}, + { 0x0178, {1, {0x00ff}}}, + { 0x0179, {1, {0x017a}}}, + { 0x017b, {1, {0x017c}}}, + { 0x017d, {1, {0x017e}}}, + { 0x017f, {1, {0x0073}}}, + { 0x0181, {1, {0x0253}}}, + { 0x0182, {1, {0x0183}}}, + { 0x0184, {1, {0x0185}}}, + { 0x0186, {1, {0x0254}}}, + { 0x0187, {1, {0x0188}}}, + { 0x0189, {1, {0x0256}}}, + { 0x018a, {1, {0x0257}}}, + { 0x018b, {1, {0x018c}}}, + { 0x018e, {1, {0x01dd}}}, + { 0x018f, {1, {0x0259}}}, + { 0x0190, {1, {0x025b}}}, + { 0x0191, {1, {0x0192}}}, + { 0x0193, {1, {0x0260}}}, + { 0x0194, {1, {0x0263}}}, + { 0x0196, {1, {0x0269}}}, + { 0x0197, {1, {0x0268}}}, + { 0x0198, {1, {0x0199}}}, + { 0x019c, {1, {0x026f}}}, + { 0x019d, {1, {0x0272}}}, + { 0x019f, {1, {0x0275}}}, + { 0x01a0, {1, {0x01a1}}}, + { 0x01a2, {1, {0x01a3}}}, + { 0x01a4, {1, {0x01a5}}}, + { 0x01a6, {1, {0x0280}}}, + { 0x01a7, {1, {0x01a8}}}, + { 0x01a9, {1, {0x0283}}}, + { 0x01ac, {1, {0x01ad}}}, + { 0x01ae, {1, {0x0288}}}, + { 0x01af, {1, {0x01b0}}}, + { 0x01b1, {1, {0x028a}}}, + { 0x01b2, {1, {0x028b}}}, + { 0x01b3, {1, {0x01b4}}}, + { 0x01b5, {1, {0x01b6}}}, + { 0x01b7, {1, {0x0292}}}, + { 0x01b8, {1, {0x01b9}}}, + { 0x01bc, {1, {0x01bd}}}, + { 0x01c4, {1, {0x01c6}}}, + { 0x01c5, {1, {0x01c6}}}, + { 0x01c7, {1, {0x01c9}}}, + { 0x01c8, {1, {0x01c9}}}, + { 0x01ca, {1, {0x01cc}}}, + { 0x01cb, {1, {0x01cc}}}, + { 0x01cd, {1, {0x01ce}}}, + { 0x01cf, {1, {0x01d0}}}, + { 0x01d1, {1, {0x01d2}}}, + { 0x01d3, {1, {0x01d4}}}, + { 0x01d5, {1, {0x01d6}}}, + { 0x01d7, {1, {0x01d8}}}, + { 0x01d9, {1, {0x01da}}}, + { 0x01db, {1, {0x01dc}}}, + { 0x01de, {1, {0x01df}}}, + { 0x01e0, {1, {0x01e1}}}, + { 0x01e2, {1, {0x01e3}}}, + { 0x01e4, {1, {0x01e5}}}, + { 0x01e6, {1, {0x01e7}}}, + { 0x01e8, {1, {0x01e9}}}, + { 0x01ea, {1, {0x01eb}}}, + { 0x01ec, {1, {0x01ed}}}, + { 0x01ee, {1, {0x01ef}}}, + { 0x01f0, {2, {0x006a, 0x030c}}}, + { 0x01f1, {1, {0x01f3}}}, + { 0x01f2, {1, {0x01f3}}}, + { 0x01f4, {1, {0x01f5}}}, + { 0x01f6, {1, {0x0195}}}, + { 0x01f7, {1, {0x01bf}}}, + { 0x01f8, {1, {0x01f9}}}, + { 0x01fa, {1, {0x01fb}}}, + { 0x01fc, {1, {0x01fd}}}, + { 0x01fe, {1, {0x01ff}}}, + { 0x0200, {1, {0x0201}}}, + { 0x0202, {1, {0x0203}}}, + { 0x0204, {1, {0x0205}}}, + { 0x0206, {1, {0x0207}}}, + { 0x0208, {1, {0x0209}}}, + { 0x020a, {1, {0x020b}}}, + { 0x020c, {1, {0x020d}}}, + { 0x020e, {1, {0x020f}}}, + { 0x0210, {1, {0x0211}}}, + { 0x0212, {1, {0x0213}}}, + { 0x0214, {1, {0x0215}}}, + { 0x0216, {1, {0x0217}}}, + { 0x0218, {1, {0x0219}}}, + { 0x021a, {1, {0x021b}}}, + { 0x021c, {1, {0x021d}}}, + { 0x021e, {1, {0x021f}}}, + { 0x0220, {1, {0x019e}}}, + { 0x0222, {1, {0x0223}}}, + { 0x0224, {1, {0x0225}}}, + { 0x0226, {1, {0x0227}}}, + { 0x0228, {1, {0x0229}}}, + { 0x022a, {1, {0x022b}}}, + { 0x022c, {1, {0x022d}}}, + { 0x022e, {1, {0x022f}}}, + { 0x0230, {1, {0x0231}}}, + { 0x0232, {1, {0x0233}}}, + { 0x023b, {1, {0x023c}}}, + { 0x023d, {1, {0x019a}}}, + { 0x0241, {1, {0x0294}}}, + { 0x0345, {1, {0x03b9}}}, + { 0x0386, {1, {0x03ac}}}, + { 0x0388, {1, {0x03ad}}}, + { 0x0389, {1, {0x03ae}}}, + { 0x038a, {1, {0x03af}}}, + { 0x038c, {1, {0x03cc}}}, + { 0x038e, {1, {0x03cd}}}, + { 0x038f, {1, {0x03ce}}}, + { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x0391, {1, {0x03b1}}}, + { 0x0392, {1, {0x03b2}}}, + { 0x0393, {1, {0x03b3}}}, + { 0x0394, {1, {0x03b4}}}, + { 0x0395, {1, {0x03b5}}}, + { 0x0396, {1, {0x03b6}}}, + { 0x0397, {1, {0x03b7}}}, + { 0x0398, {1, {0x03b8}}}, + { 0x0399, {1, {0x03b9}}}, + { 0x039a, {1, {0x03ba}}}, + { 0x039b, {1, {0x03bb}}}, + { 0x039c, {1, {0x03bc}}}, + { 0x039d, {1, {0x03bd}}}, + { 0x039e, {1, {0x03be}}}, + { 0x039f, {1, {0x03bf}}}, + { 0x03a0, {1, {0x03c0}}}, + { 0x03a1, {1, {0x03c1}}}, + { 0x03a3, {1, {0x03c3}}}, + { 0x03a4, {1, {0x03c4}}}, + { 0x03a5, {1, {0x03c5}}}, + { 0x03a6, {1, {0x03c6}}}, + { 0x03a7, {1, {0x03c7}}}, + { 0x03a8, {1, {0x03c8}}}, + { 0x03a9, {1, {0x03c9}}}, + { 0x03aa, {1, {0x03ca}}}, + { 0x03ab, {1, {0x03cb}}}, + { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x03c2, {1, {0x03c3}}}, + { 0x03d0, {1, {0x03b2}}}, + { 0x03d1, {1, {0x03b8}}}, + { 0x03d5, {1, {0x03c6}}}, + { 0x03d6, {1, {0x03c0}}}, + { 0x03d8, {1, {0x03d9}}}, + { 0x03da, {1, {0x03db}}}, + { 0x03dc, {1, {0x03dd}}}, + { 0x03de, {1, {0x03df}}}, + { 0x03e0, {1, {0x03e1}}}, + { 0x03e2, {1, {0x03e3}}}, + { 0x03e4, {1, {0x03e5}}}, + { 0x03e6, {1, {0x03e7}}}, + { 0x03e8, {1, {0x03e9}}}, + { 0x03ea, {1, {0x03eb}}}, + { 0x03ec, {1, {0x03ed}}}, + { 0x03ee, {1, {0x03ef}}}, + { 0x03f0, {1, {0x03ba}}}, + { 0x03f1, {1, {0x03c1}}}, + { 0x03f4, {1, {0x03b8}}}, + { 0x03f5, {1, {0x03b5}}}, + { 0x03f7, {1, {0x03f8}}}, + { 0x03f9, {1, {0x03f2}}}, + { 0x03fa, {1, {0x03fb}}}, + { 0x0400, {1, {0x0450}}}, + { 0x0401, {1, {0x0451}}}, + { 0x0402, {1, {0x0452}}}, + { 0x0403, {1, {0x0453}}}, + { 0x0404, {1, {0x0454}}}, + { 0x0405, {1, {0x0455}}}, + { 0x0406, {1, {0x0456}}}, + { 0x0407, {1, {0x0457}}}, + { 0x0408, {1, {0x0458}}}, + { 0x0409, {1, {0x0459}}}, + { 0x040a, {1, {0x045a}}}, + { 0x040b, {1, {0x045b}}}, + { 0x040c, {1, {0x045c}}}, + { 0x040d, {1, {0x045d}}}, + { 0x040e, {1, {0x045e}}}, + { 0x040f, {1, {0x045f}}}, + { 0x0410, {1, {0x0430}}}, + { 0x0411, {1, {0x0431}}}, + { 0x0412, {1, {0x0432}}}, + { 0x0413, {1, {0x0433}}}, + { 0x0414, {1, {0x0434}}}, + { 0x0415, {1, {0x0435}}}, + { 0x0416, {1, {0x0436}}}, + { 0x0417, {1, {0x0437}}}, + { 0x0418, {1, {0x0438}}}, + { 0x0419, {1, {0x0439}}}, + { 0x041a, {1, {0x043a}}}, + { 0x041b, {1, {0x043b}}}, + { 0x041c, {1, {0x043c}}}, + { 0x041d, {1, {0x043d}}}, + { 0x041e, {1, {0x043e}}}, + { 0x041f, {1, {0x043f}}}, + { 0x0420, {1, {0x0440}}}, + { 0x0421, {1, {0x0441}}}, + { 0x0422, {1, {0x0442}}}, + { 0x0423, {1, {0x0443}}}, + { 0x0424, {1, {0x0444}}}, + { 0x0425, {1, {0x0445}}}, + { 0x0426, {1, {0x0446}}}, + { 0x0427, {1, {0x0447}}}, + { 0x0428, {1, {0x0448}}}, + { 0x0429, {1, {0x0449}}}, + { 0x042a, {1, {0x044a}}}, + { 0x042b, {1, {0x044b}}}, + { 0x042c, {1, {0x044c}}}, + { 0x042d, {1, {0x044d}}}, + { 0x042e, {1, {0x044e}}}, + { 0x042f, {1, {0x044f}}}, + { 0x0460, {1, {0x0461}}}, + { 0x0462, {1, {0x0463}}}, + { 0x0464, {1, {0x0465}}}, + { 0x0466, {1, {0x0467}}}, + { 0x0468, {1, {0x0469}}}, + { 0x046a, {1, {0x046b}}}, + { 0x046c, {1, {0x046d}}}, + { 0x046e, {1, {0x046f}}}, + { 0x0470, {1, {0x0471}}}, + { 0x0472, {1, {0x0473}}}, + { 0x0474, {1, {0x0475}}}, + { 0x0476, {1, {0x0477}}}, + { 0x0478, {1, {0x0479}}}, + { 0x047a, {1, {0x047b}}}, + { 0x047c, {1, {0x047d}}}, + { 0x047e, {1, {0x047f}}}, + { 0x0480, {1, {0x0481}}}, + { 0x048a, {1, {0x048b}}}, + { 0x048c, {1, {0x048d}}}, + { 0x048e, {1, {0x048f}}}, + { 0x0490, {1, {0x0491}}}, + { 0x0492, {1, {0x0493}}}, + { 0x0494, {1, {0x0495}}}, + { 0x0496, {1, {0x0497}}}, + { 0x0498, {1, {0x0499}}}, + { 0x049a, {1, {0x049b}}}, + { 0x049c, {1, {0x049d}}}, + { 0x049e, {1, {0x049f}}}, + { 0x04a0, {1, {0x04a1}}}, + { 0x04a2, {1, {0x04a3}}}, + { 0x04a4, {1, {0x04a5}}}, + { 0x04a6, {1, {0x04a7}}}, + { 0x04a8, {1, {0x04a9}}}, + { 0x04aa, {1, {0x04ab}}}, + { 0x04ac, {1, {0x04ad}}}, + { 0x04ae, {1, {0x04af}}}, + { 0x04b0, {1, {0x04b1}}}, + { 0x04b2, {1, {0x04b3}}}, + { 0x04b4, {1, {0x04b5}}}, + { 0x04b6, {1, {0x04b7}}}, + { 0x04b8, {1, {0x04b9}}}, + { 0x04ba, {1, {0x04bb}}}, + { 0x04bc, {1, {0x04bd}}}, + { 0x04be, {1, {0x04bf}}}, + { 0x04c1, {1, {0x04c2}}}, + { 0x04c3, {1, {0x04c4}}}, + { 0x04c5, {1, {0x04c6}}}, + { 0x04c7, {1, {0x04c8}}}, + { 0x04c9, {1, {0x04ca}}}, + { 0x04cb, {1, {0x04cc}}}, + { 0x04cd, {1, {0x04ce}}}, + { 0x04d0, {1, {0x04d1}}}, + { 0x04d2, {1, {0x04d3}}}, + { 0x04d4, {1, {0x04d5}}}, + { 0x04d6, {1, {0x04d7}}}, + { 0x04d8, {1, {0x04d9}}}, + { 0x04da, {1, {0x04db}}}, + { 0x04dc, {1, {0x04dd}}}, + { 0x04de, {1, {0x04df}}}, + { 0x04e0, {1, {0x04e1}}}, + { 0x04e2, {1, {0x04e3}}}, + { 0x04e4, {1, {0x04e5}}}, + { 0x04e6, {1, {0x04e7}}}, + { 0x04e8, {1, {0x04e9}}}, + { 0x04ea, {1, {0x04eb}}}, + { 0x04ec, {1, {0x04ed}}}, + { 0x04ee, {1, {0x04ef}}}, + { 0x04f0, {1, {0x04f1}}}, + { 0x04f2, {1, {0x04f3}}}, + { 0x04f4, {1, {0x04f5}}}, + { 0x04f6, {1, {0x04f7}}}, + { 0x04f8, {1, {0x04f9}}}, + { 0x0500, {1, {0x0501}}}, + { 0x0502, {1, {0x0503}}}, + { 0x0504, {1, {0x0505}}}, + { 0x0506, {1, {0x0507}}}, + { 0x0508, {1, {0x0509}}}, + { 0x050a, {1, {0x050b}}}, + { 0x050c, {1, {0x050d}}}, + { 0x050e, {1, {0x050f}}}, + { 0x0531, {1, {0x0561}}}, + { 0x0532, {1, {0x0562}}}, + { 0x0533, {1, {0x0563}}}, + { 0x0534, {1, {0x0564}}}, + { 0x0535, {1, {0x0565}}}, + { 0x0536, {1, {0x0566}}}, + { 0x0537, {1, {0x0567}}}, + { 0x0538, {1, {0x0568}}}, + { 0x0539, {1, {0x0569}}}, + { 0x053a, {1, {0x056a}}}, + { 0x053b, {1, {0x056b}}}, + { 0x053c, {1, {0x056c}}}, + { 0x053d, {1, {0x056d}}}, + { 0x053e, {1, {0x056e}}}, + { 0x053f, {1, {0x056f}}}, + { 0x0540, {1, {0x0570}}}, + { 0x0541, {1, {0x0571}}}, + { 0x0542, {1, {0x0572}}}, + { 0x0543, {1, {0x0573}}}, + { 0x0544, {1, {0x0574}}}, + { 0x0545, {1, {0x0575}}}, + { 0x0546, {1, {0x0576}}}, + { 0x0547, {1, {0x0577}}}, + { 0x0548, {1, {0x0578}}}, + { 0x0549, {1, {0x0579}}}, + { 0x054a, {1, {0x057a}}}, + { 0x054b, {1, {0x057b}}}, + { 0x054c, {1, {0x057c}}}, + { 0x054d, {1, {0x057d}}}, + { 0x054e, {1, {0x057e}}}, + { 0x054f, {1, {0x057f}}}, + { 0x0550, {1, {0x0580}}}, + { 0x0551, {1, {0x0581}}}, + { 0x0552, {1, {0x0582}}}, + { 0x0553, {1, {0x0583}}}, + { 0x0554, {1, {0x0584}}}, + { 0x0555, {1, {0x0585}}}, + { 0x0556, {1, {0x0586}}}, + { 0x0587, {2, {0x0565, 0x0582}}}, + { 0x10a0, {1, {0x2d00}}}, + { 0x10a1, {1, {0x2d01}}}, + { 0x10a2, {1, {0x2d02}}}, + { 0x10a3, {1, {0x2d03}}}, + { 0x10a4, {1, {0x2d04}}}, + { 0x10a5, {1, {0x2d05}}}, + { 0x10a6, {1, {0x2d06}}}, + { 0x10a7, {1, {0x2d07}}}, + { 0x10a8, {1, {0x2d08}}}, + { 0x10a9, {1, {0x2d09}}}, + { 0x10aa, {1, {0x2d0a}}}, + { 0x10ab, {1, {0x2d0b}}}, + { 0x10ac, {1, {0x2d0c}}}, + { 0x10ad, {1, {0x2d0d}}}, + { 0x10ae, {1, {0x2d0e}}}, + { 0x10af, {1, {0x2d0f}}}, + { 0x10b0, {1, {0x2d10}}}, + { 0x10b1, {1, {0x2d11}}}, + { 0x10b2, {1, {0x2d12}}}, + { 0x10b3, {1, {0x2d13}}}, + { 0x10b4, {1, {0x2d14}}}, + { 0x10b5, {1, {0x2d15}}}, + { 0x10b6, {1, {0x2d16}}}, + { 0x10b7, {1, {0x2d17}}}, + { 0x10b8, {1, {0x2d18}}}, + { 0x10b9, {1, {0x2d19}}}, + { 0x10ba, {1, {0x2d1a}}}, + { 0x10bb, {1, {0x2d1b}}}, + { 0x10bc, {1, {0x2d1c}}}, + { 0x10bd, {1, {0x2d1d}}}, + { 0x10be, {1, {0x2d1e}}}, + { 0x10bf, {1, {0x2d1f}}}, + { 0x10c0, {1, {0x2d20}}}, + { 0x10c1, {1, {0x2d21}}}, + { 0x10c2, {1, {0x2d22}}}, + { 0x10c3, {1, {0x2d23}}}, + { 0x10c4, {1, {0x2d24}}}, + { 0x10c5, {1, {0x2d25}}}, + { 0x1e00, {1, {0x1e01}}}, + { 0x1e02, {1, {0x1e03}}}, + { 0x1e04, {1, {0x1e05}}}, + { 0x1e06, {1, {0x1e07}}}, + { 0x1e08, {1, {0x1e09}}}, + { 0x1e0a, {1, {0x1e0b}}}, + { 0x1e0c, {1, {0x1e0d}}}, + { 0x1e0e, {1, {0x1e0f}}}, + { 0x1e10, {1, {0x1e11}}}, + { 0x1e12, {1, {0x1e13}}}, + { 0x1e14, {1, {0x1e15}}}, + { 0x1e16, {1, {0x1e17}}}, + { 0x1e18, {1, {0x1e19}}}, + { 0x1e1a, {1, {0x1e1b}}}, + { 0x1e1c, {1, {0x1e1d}}}, + { 0x1e1e, {1, {0x1e1f}}}, + { 0x1e20, {1, {0x1e21}}}, + { 0x1e22, {1, {0x1e23}}}, + { 0x1e24, {1, {0x1e25}}}, + { 0x1e26, {1, {0x1e27}}}, + { 0x1e28, {1, {0x1e29}}}, + { 0x1e2a, {1, {0x1e2b}}}, + { 0x1e2c, {1, {0x1e2d}}}, + { 0x1e2e, {1, {0x1e2f}}}, + { 0x1e30, {1, {0x1e31}}}, + { 0x1e32, {1, {0x1e33}}}, + { 0x1e34, {1, {0x1e35}}}, + { 0x1e36, {1, {0x1e37}}}, + { 0x1e38, {1, {0x1e39}}}, + { 0x1e3a, {1, {0x1e3b}}}, + { 0x1e3c, {1, {0x1e3d}}}, + { 0x1e3e, {1, {0x1e3f}}}, + { 0x1e40, {1, {0x1e41}}}, + { 0x1e42, {1, {0x1e43}}}, + { 0x1e44, {1, {0x1e45}}}, + { 0x1e46, {1, {0x1e47}}}, + { 0x1e48, {1, {0x1e49}}}, + { 0x1e4a, {1, {0x1e4b}}}, + { 0x1e4c, {1, {0x1e4d}}}, + { 0x1e4e, {1, {0x1e4f}}}, + { 0x1e50, {1, {0x1e51}}}, + { 0x1e52, {1, {0x1e53}}}, + { 0x1e54, {1, {0x1e55}}}, + { 0x1e56, {1, {0x1e57}}}, + { 0x1e58, {1, {0x1e59}}}, + { 0x1e5a, {1, {0x1e5b}}}, + { 0x1e5c, {1, {0x1e5d}}}, + { 0x1e5e, {1, {0x1e5f}}}, + { 0x1e60, {1, {0x1e61}}}, + { 0x1e62, {1, {0x1e63}}}, + { 0x1e64, {1, {0x1e65}}}, + { 0x1e66, {1, {0x1e67}}}, + { 0x1e68, {1, {0x1e69}}}, + { 0x1e6a, {1, {0x1e6b}}}, + { 0x1e6c, {1, {0x1e6d}}}, + { 0x1e6e, {1, {0x1e6f}}}, + { 0x1e70, {1, {0x1e71}}}, + { 0x1e72, {1, {0x1e73}}}, + { 0x1e74, {1, {0x1e75}}}, + { 0x1e76, {1, {0x1e77}}}, + { 0x1e78, {1, {0x1e79}}}, + { 0x1e7a, {1, {0x1e7b}}}, + { 0x1e7c, {1, {0x1e7d}}}, + { 0x1e7e, {1, {0x1e7f}}}, + { 0x1e80, {1, {0x1e81}}}, + { 0x1e82, {1, {0x1e83}}}, + { 0x1e84, {1, {0x1e85}}}, + { 0x1e86, {1, {0x1e87}}}, + { 0x1e88, {1, {0x1e89}}}, + { 0x1e8a, {1, {0x1e8b}}}, + { 0x1e8c, {1, {0x1e8d}}}, + { 0x1e8e, {1, {0x1e8f}}}, + { 0x1e90, {1, {0x1e91}}}, + { 0x1e92, {1, {0x1e93}}}, + { 0x1e94, {1, {0x1e95}}}, + { 0x1e96, {2, {0x0068, 0x0331}}}, + { 0x1e97, {2, {0x0074, 0x0308}}}, + { 0x1e98, {2, {0x0077, 0x030a}}}, + { 0x1e99, {2, {0x0079, 0x030a}}}, + { 0x1e9a, {2, {0x0061, 0x02be}}}, + { 0x1e9b, {1, {0x1e61}}}, + { 0x1ea0, {1, {0x1ea1}}}, + { 0x1ea2, {1, {0x1ea3}}}, + { 0x1ea4, {1, {0x1ea5}}}, + { 0x1ea6, {1, {0x1ea7}}}, + { 0x1ea8, {1, {0x1ea9}}}, + { 0x1eaa, {1, {0x1eab}}}, + { 0x1eac, {1, {0x1ead}}}, + { 0x1eae, {1, {0x1eaf}}}, + { 0x1eb0, {1, {0x1eb1}}}, + { 0x1eb2, {1, {0x1eb3}}}, + { 0x1eb4, {1, {0x1eb5}}}, + { 0x1eb6, {1, {0x1eb7}}}, + { 0x1eb8, {1, {0x1eb9}}}, + { 0x1eba, {1, {0x1ebb}}}, + { 0x1ebc, {1, {0x1ebd}}}, + { 0x1ebe, {1, {0x1ebf}}}, + { 0x1ec0, {1, {0x1ec1}}}, + { 0x1ec2, {1, {0x1ec3}}}, + { 0x1ec4, {1, {0x1ec5}}}, + { 0x1ec6, {1, {0x1ec7}}}, + { 0x1ec8, {1, {0x1ec9}}}, + { 0x1eca, {1, {0x1ecb}}}, + { 0x1ecc, {1, {0x1ecd}}}, + { 0x1ece, {1, {0x1ecf}}}, + { 0x1ed0, {1, {0x1ed1}}}, + { 0x1ed2, {1, {0x1ed3}}}, + { 0x1ed4, {1, {0x1ed5}}}, + { 0x1ed6, {1, {0x1ed7}}}, + { 0x1ed8, {1, {0x1ed9}}}, + { 0x1eda, {1, {0x1edb}}}, + { 0x1edc, {1, {0x1edd}}}, + { 0x1ede, {1, {0x1edf}}}, + { 0x1ee0, {1, {0x1ee1}}}, + { 0x1ee2, {1, {0x1ee3}}}, + { 0x1ee4, {1, {0x1ee5}}}, + { 0x1ee6, {1, {0x1ee7}}}, + { 0x1ee8, {1, {0x1ee9}}}, + { 0x1eea, {1, {0x1eeb}}}, + { 0x1eec, {1, {0x1eed}}}, + { 0x1eee, {1, {0x1eef}}}, + { 0x1ef0, {1, {0x1ef1}}}, + { 0x1ef2, {1, {0x1ef3}}}, + { 0x1ef4, {1, {0x1ef5}}}, + { 0x1ef6, {1, {0x1ef7}}}, + { 0x1ef8, {1, {0x1ef9}}}, + { 0x1f08, {1, {0x1f00}}}, + { 0x1f09, {1, {0x1f01}}}, + { 0x1f0a, {1, {0x1f02}}}, + { 0x1f0b, {1, {0x1f03}}}, + { 0x1f0c, {1, {0x1f04}}}, + { 0x1f0d, {1, {0x1f05}}}, + { 0x1f0e, {1, {0x1f06}}}, + { 0x1f0f, {1, {0x1f07}}}, + { 0x1f18, {1, {0x1f10}}}, + { 0x1f19, {1, {0x1f11}}}, + { 0x1f1a, {1, {0x1f12}}}, + { 0x1f1b, {1, {0x1f13}}}, + { 0x1f1c, {1, {0x1f14}}}, + { 0x1f1d, {1, {0x1f15}}}, + { 0x1f28, {1, {0x1f20}}}, + { 0x1f29, {1, {0x1f21}}}, + { 0x1f2a, {1, {0x1f22}}}, + { 0x1f2b, {1, {0x1f23}}}, + { 0x1f2c, {1, {0x1f24}}}, + { 0x1f2d, {1, {0x1f25}}}, + { 0x1f2e, {1, {0x1f26}}}, + { 0x1f2f, {1, {0x1f27}}}, + { 0x1f38, {1, {0x1f30}}}, + { 0x1f39, {1, {0x1f31}}}, + { 0x1f3a, {1, {0x1f32}}}, + { 0x1f3b, {1, {0x1f33}}}, + { 0x1f3c, {1, {0x1f34}}}, + { 0x1f3d, {1, {0x1f35}}}, + { 0x1f3e, {1, {0x1f36}}}, + { 0x1f3f, {1, {0x1f37}}}, + { 0x1f48, {1, {0x1f40}}}, + { 0x1f49, {1, {0x1f41}}}, + { 0x1f4a, {1, {0x1f42}}}, + { 0x1f4b, {1, {0x1f43}}}, + { 0x1f4c, {1, {0x1f44}}}, + { 0x1f4d, {1, {0x1f45}}}, + { 0x1f50, {2, {0x03c5, 0x0313}}}, + { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, + { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, + { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, + { 0x1f59, {1, {0x1f51}}}, + { 0x1f5b, {1, {0x1f53}}}, + { 0x1f5d, {1, {0x1f55}}}, + { 0x1f5f, {1, {0x1f57}}}, + { 0x1f68, {1, {0x1f60}}}, + { 0x1f69, {1, {0x1f61}}}, + { 0x1f6a, {1, {0x1f62}}}, + { 0x1f6b, {1, {0x1f63}}}, + { 0x1f6c, {1, {0x1f64}}}, + { 0x1f6d, {1, {0x1f65}}}, + { 0x1f6e, {1, {0x1f66}}}, + { 0x1f6f, {1, {0x1f67}}}, + { 0x1f80, {2, {0x1f00, 0x03b9}}}, + { 0x1f81, {2, {0x1f01, 0x03b9}}}, + { 0x1f82, {2, {0x1f02, 0x03b9}}}, + { 0x1f83, {2, {0x1f03, 0x03b9}}}, + { 0x1f84, {2, {0x1f04, 0x03b9}}}, + { 0x1f85, {2, {0x1f05, 0x03b9}}}, + { 0x1f86, {2, {0x1f06, 0x03b9}}}, + { 0x1f87, {2, {0x1f07, 0x03b9}}}, + { 0x1f88, {2, {0x1f00, 0x03b9}}}, + { 0x1f89, {2, {0x1f01, 0x03b9}}}, + { 0x1f8a, {2, {0x1f02, 0x03b9}}}, + { 0x1f8b, {2, {0x1f03, 0x03b9}}}, + { 0x1f8c, {2, {0x1f04, 0x03b9}}}, + { 0x1f8d, {2, {0x1f05, 0x03b9}}}, + { 0x1f8e, {2, {0x1f06, 0x03b9}}}, + { 0x1f8f, {2, {0x1f07, 0x03b9}}}, + { 0x1f90, {2, {0x1f20, 0x03b9}}}, + { 0x1f91, {2, {0x1f21, 0x03b9}}}, + { 0x1f92, {2, {0x1f22, 0x03b9}}}, + { 0x1f93, {2, {0x1f23, 0x03b9}}}, + { 0x1f94, {2, {0x1f24, 0x03b9}}}, + { 0x1f95, {2, {0x1f25, 0x03b9}}}, + { 0x1f96, {2, {0x1f26, 0x03b9}}}, + { 0x1f97, {2, {0x1f27, 0x03b9}}}, + { 0x1f98, {2, {0x1f20, 0x03b9}}}, + { 0x1f99, {2, {0x1f21, 0x03b9}}}, + { 0x1f9a, {2, {0x1f22, 0x03b9}}}, + { 0x1f9b, {2, {0x1f23, 0x03b9}}}, + { 0x1f9c, {2, {0x1f24, 0x03b9}}}, + { 0x1f9d, {2, {0x1f25, 0x03b9}}}, + { 0x1f9e, {2, {0x1f26, 0x03b9}}}, + { 0x1f9f, {2, {0x1f27, 0x03b9}}}, + { 0x1fa0, {2, {0x1f60, 0x03b9}}}, + { 0x1fa1, {2, {0x1f61, 0x03b9}}}, + { 0x1fa2, {2, {0x1f62, 0x03b9}}}, + { 0x1fa3, {2, {0x1f63, 0x03b9}}}, + { 0x1fa4, {2, {0x1f64, 0x03b9}}}, + { 0x1fa5, {2, {0x1f65, 0x03b9}}}, + { 0x1fa6, {2, {0x1f66, 0x03b9}}}, + { 0x1fa7, {2, {0x1f67, 0x03b9}}}, + { 0x1fa8, {2, {0x1f60, 0x03b9}}}, + { 0x1fa9, {2, {0x1f61, 0x03b9}}}, + { 0x1faa, {2, {0x1f62, 0x03b9}}}, + { 0x1fab, {2, {0x1f63, 0x03b9}}}, + { 0x1fac, {2, {0x1f64, 0x03b9}}}, + { 0x1fad, {2, {0x1f65, 0x03b9}}}, + { 0x1fae, {2, {0x1f66, 0x03b9}}}, + { 0x1faf, {2, {0x1f67, 0x03b9}}}, + { 0x1fb2, {2, {0x1f70, 0x03b9}}}, + { 0x1fb3, {2, {0x03b1, 0x03b9}}}, + { 0x1fb4, {2, {0x03ac, 0x03b9}}}, + { 0x1fb6, {2, {0x03b1, 0x0342}}}, + { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, + { 0x1fb8, {1, {0x1fb0}}}, + { 0x1fb9, {1, {0x1fb1}}}, + { 0x1fba, {1, {0x1f70}}}, + { 0x1fbb, {1, {0x1f71}}}, + { 0x1fbc, {2, {0x03b1, 0x03b9}}}, + { 0x1fbe, {1, {0x03b9}}}, + { 0x1fc2, {2, {0x1f74, 0x03b9}}}, + { 0x1fc3, {2, {0x03b7, 0x03b9}}}, + { 0x1fc4, {2, {0x03ae, 0x03b9}}}, + { 0x1fc6, {2, {0x03b7, 0x0342}}}, + { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, + { 0x1fc8, {1, {0x1f72}}}, + { 0x1fc9, {1, {0x1f73}}}, + { 0x1fca, {1, {0x1f74}}}, + { 0x1fcb, {1, {0x1f75}}}, + { 0x1fcc, {2, {0x03b7, 0x03b9}}}, + { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, + { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x1fd6, {2, {0x03b9, 0x0342}}}, + { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, + { 0x1fd8, {1, {0x1fd0}}}, + { 0x1fd9, {1, {0x1fd1}}}, + { 0x1fda, {1, {0x1f76}}}, + { 0x1fdb, {1, {0x1f77}}}, + { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, + { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x1fe4, {2, {0x03c1, 0x0313}}}, + { 0x1fe6, {2, {0x03c5, 0x0342}}}, + { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, + { 0x1fe8, {1, {0x1fe0}}}, + { 0x1fe9, {1, {0x1fe1}}}, + { 0x1fea, {1, {0x1f7a}}}, + { 0x1feb, {1, {0x1f7b}}}, + { 0x1fec, {1, {0x1fe5}}}, + { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, + { 0x1ff3, {2, {0x03c9, 0x03b9}}}, + { 0x1ff4, {2, {0x03ce, 0x03b9}}}, + { 0x1ff6, {2, {0x03c9, 0x0342}}}, + { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, + { 0x1ff8, {1, {0x1f78}}}, + { 0x1ff9, {1, {0x1f79}}}, + { 0x1ffa, {1, {0x1f7c}}}, + { 0x1ffb, {1, {0x1f7d}}}, + { 0x1ffc, {2, {0x03c9, 0x03b9}}}, + { 0x2126, {1, {0x03c9}}}, + { 0x212a, {1, {0x006b}}}, + { 0x212b, {1, {0x00e5}}}, + { 0x2160, {1, {0x2170}}}, + { 0x2161, {1, {0x2171}}}, + { 0x2162, {1, {0x2172}}}, + { 0x2163, {1, {0x2173}}}, + { 0x2164, {1, {0x2174}}}, + { 0x2165, {1, {0x2175}}}, + { 0x2166, {1, {0x2176}}}, + { 0x2167, {1, {0x2177}}}, + { 0x2168, {1, {0x2178}}}, + { 0x2169, {1, {0x2179}}}, + { 0x216a, {1, {0x217a}}}, + { 0x216b, {1, {0x217b}}}, + { 0x216c, {1, {0x217c}}}, + { 0x216d, {1, {0x217d}}}, + { 0x216e, {1, {0x217e}}}, + { 0x216f, {1, {0x217f}}}, + { 0x24b6, {1, {0x24d0}}}, + { 0x24b7, {1, {0x24d1}}}, + { 0x24b8, {1, {0x24d2}}}, + { 0x24b9, {1, {0x24d3}}}, + { 0x24ba, {1, {0x24d4}}}, + { 0x24bb, {1, {0x24d5}}}, + { 0x24bc, {1, {0x24d6}}}, + { 0x24bd, {1, {0x24d7}}}, + { 0x24be, {1, {0x24d8}}}, + { 0x24bf, {1, {0x24d9}}}, + { 0x24c0, {1, {0x24da}}}, + { 0x24c1, {1, {0x24db}}}, + { 0x24c2, {1, {0x24dc}}}, + { 0x24c3, {1, {0x24dd}}}, + { 0x24c4, {1, {0x24de}}}, + { 0x24c5, {1, {0x24df}}}, + { 0x24c6, {1, {0x24e0}}}, + { 0x24c7, {1, {0x24e1}}}, + { 0x24c8, {1, {0x24e2}}}, + { 0x24c9, {1, {0x24e3}}}, + { 0x24ca, {1, {0x24e4}}}, + { 0x24cb, {1, {0x24e5}}}, + { 0x24cc, {1, {0x24e6}}}, + { 0x24cd, {1, {0x24e7}}}, + { 0x24ce, {1, {0x24e8}}}, + { 0x24cf, {1, {0x24e9}}}, + { 0x2c00, {1, {0x2c30}}}, + { 0x2c01, {1, {0x2c31}}}, + { 0x2c02, {1, {0x2c32}}}, + { 0x2c03, {1, {0x2c33}}}, + { 0x2c04, {1, {0x2c34}}}, + { 0x2c05, {1, {0x2c35}}}, + { 0x2c06, {1, {0x2c36}}}, + { 0x2c07, {1, {0x2c37}}}, + { 0x2c08, {1, {0x2c38}}}, + { 0x2c09, {1, {0x2c39}}}, + { 0x2c0a, {1, {0x2c3a}}}, + { 0x2c0b, {1, {0x2c3b}}}, + { 0x2c0c, {1, {0x2c3c}}}, + { 0x2c0d, {1, {0x2c3d}}}, + { 0x2c0e, {1, {0x2c3e}}}, + { 0x2c0f, {1, {0x2c3f}}}, + { 0x2c10, {1, {0x2c40}}}, + { 0x2c11, {1, {0x2c41}}}, + { 0x2c12, {1, {0x2c42}}}, + { 0x2c13, {1, {0x2c43}}}, + { 0x2c14, {1, {0x2c44}}}, + { 0x2c15, {1, {0x2c45}}}, + { 0x2c16, {1, {0x2c46}}}, + { 0x2c17, {1, {0x2c47}}}, + { 0x2c18, {1, {0x2c48}}}, + { 0x2c19, {1, {0x2c49}}}, + { 0x2c1a, {1, {0x2c4a}}}, + { 0x2c1b, {1, {0x2c4b}}}, + { 0x2c1c, {1, {0x2c4c}}}, + { 0x2c1d, {1, {0x2c4d}}}, + { 0x2c1e, {1, {0x2c4e}}}, + { 0x2c1f, {1, {0x2c4f}}}, + { 0x2c20, {1, {0x2c50}}}, + { 0x2c21, {1, {0x2c51}}}, + { 0x2c22, {1, {0x2c52}}}, + { 0x2c23, {1, {0x2c53}}}, + { 0x2c24, {1, {0x2c54}}}, + { 0x2c25, {1, {0x2c55}}}, + { 0x2c26, {1, {0x2c56}}}, + { 0x2c27, {1, {0x2c57}}}, + { 0x2c28, {1, {0x2c58}}}, + { 0x2c29, {1, {0x2c59}}}, + { 0x2c2a, {1, {0x2c5a}}}, + { 0x2c2b, {1, {0x2c5b}}}, + { 0x2c2c, {1, {0x2c5c}}}, + { 0x2c2d, {1, {0x2c5d}}}, + { 0x2c2e, {1, {0x2c5e}}}, + { 0x2c80, {1, {0x2c81}}}, + { 0x2c82, {1, {0x2c83}}}, + { 0x2c84, {1, {0x2c85}}}, + { 0x2c86, {1, {0x2c87}}}, + { 0x2c88, {1, {0x2c89}}}, + { 0x2c8a, {1, {0x2c8b}}}, + { 0x2c8c, {1, {0x2c8d}}}, + { 0x2c8e, {1, {0x2c8f}}}, + { 0x2c90, {1, {0x2c91}}}, + { 0x2c92, {1, {0x2c93}}}, + { 0x2c94, {1, {0x2c95}}}, + { 0x2c96, {1, {0x2c97}}}, + { 0x2c98, {1, {0x2c99}}}, + { 0x2c9a, {1, {0x2c9b}}}, + { 0x2c9c, {1, {0x2c9d}}}, + { 0x2c9e, {1, {0x2c9f}}}, + { 0x2ca0, {1, {0x2ca1}}}, + { 0x2ca2, {1, {0x2ca3}}}, + { 0x2ca4, {1, {0x2ca5}}}, + { 0x2ca6, {1, {0x2ca7}}}, + { 0x2ca8, {1, {0x2ca9}}}, + { 0x2caa, {1, {0x2cab}}}, + { 0x2cac, {1, {0x2cad}}}, + { 0x2cae, {1, {0x2caf}}}, + { 0x2cb0, {1, {0x2cb1}}}, + { 0x2cb2, {1, {0x2cb3}}}, + { 0x2cb4, {1, {0x2cb5}}}, + { 0x2cb6, {1, {0x2cb7}}}, + { 0x2cb8, {1, {0x2cb9}}}, + { 0x2cba, {1, {0x2cbb}}}, + { 0x2cbc, {1, {0x2cbd}}}, + { 0x2cbe, {1, {0x2cbf}}}, + { 0x2cc0, {1, {0x2cc1}}}, + { 0x2cc2, {1, {0x2cc3}}}, + { 0x2cc4, {1, {0x2cc5}}}, + { 0x2cc6, {1, {0x2cc7}}}, + { 0x2cc8, {1, {0x2cc9}}}, + { 0x2cca, {1, {0x2ccb}}}, + { 0x2ccc, {1, {0x2ccd}}}, + { 0x2cce, {1, {0x2ccf}}}, + { 0x2cd0, {1, {0x2cd1}}}, + { 0x2cd2, {1, {0x2cd3}}}, + { 0x2cd4, {1, {0x2cd5}}}, + { 0x2cd6, {1, {0x2cd7}}}, + { 0x2cd8, {1, {0x2cd9}}}, + { 0x2cda, {1, {0x2cdb}}}, + { 0x2cdc, {1, {0x2cdd}}}, + { 0x2cde, {1, {0x2cdf}}}, + { 0x2ce0, {1, {0x2ce1}}}, + { 0x2ce2, {1, {0x2ce3}}}, + { 0xfb00, {2, {0x0066, 0x0066}}}, + { 0xfb01, {2, {0x0066, 0x0069}}}, + { 0xfb02, {2, {0x0066, 0x006c}}}, + { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, + { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, + { 0xfb05, {2, {0x0073, 0x0074}}}, + { 0xfb06, {2, {0x0073, 0x0074}}}, + { 0xfb13, {2, {0x0574, 0x0576}}}, + { 0xfb14, {2, {0x0574, 0x0565}}}, + { 0xfb15, {2, {0x0574, 0x056b}}}, + { 0xfb16, {2, {0x057e, 0x0576}}}, + { 0xfb17, {2, {0x0574, 0x056d}}}, + { 0xff21, {1, {0xff41}}}, + { 0xff22, {1, {0xff42}}}, + { 0xff23, {1, {0xff43}}}, + { 0xff24, {1, {0xff44}}}, + { 0xff25, {1, {0xff45}}}, + { 0xff26, {1, {0xff46}}}, + { 0xff27, {1, {0xff47}}}, + { 0xff28, {1, {0xff48}}}, + { 0xff29, {1, {0xff49}}}, + { 0xff2a, {1, {0xff4a}}}, + { 0xff2b, {1, {0xff4b}}}, + { 0xff2c, {1, {0xff4c}}}, + { 0xff2d, {1, {0xff4d}}}, + { 0xff2e, {1, {0xff4e}}}, + { 0xff2f, {1, {0xff4f}}}, + { 0xff30, {1, {0xff50}}}, + { 0xff31, {1, {0xff51}}}, + { 0xff32, {1, {0xff52}}}, + { 0xff33, {1, {0xff53}}}, + { 0xff34, {1, {0xff54}}}, + { 0xff35, {1, {0xff55}}}, + { 0xff36, {1, {0xff56}}}, + { 0xff37, {1, {0xff57}}}, + { 0xff38, {1, {0xff58}}}, + { 0xff39, {1, {0xff59}}}, + { 0xff3a, {1, {0xff5a}}}, + { 0x10400, {1, {0x10428}}}, + { 0x10401, {1, {0x10429}}}, + { 0x10402, {1, {0x1042a}}}, + { 0x10403, {1, {0x1042b}}}, + { 0x10404, {1, {0x1042c}}}, + { 0x10405, {1, {0x1042d}}}, + { 0x10406, {1, {0x1042e}}}, + { 0x10407, {1, {0x1042f}}}, + { 0x10408, {1, {0x10430}}}, + { 0x10409, {1, {0x10431}}}, + { 0x1040a, {1, {0x10432}}}, + { 0x1040b, {1, {0x10433}}}, + { 0x1040c, {1, {0x10434}}}, + { 0x1040d, {1, {0x10435}}}, + { 0x1040e, {1, {0x10436}}}, + { 0x1040f, {1, {0x10437}}}, + { 0x10410, {1, {0x10438}}}, + { 0x10411, {1, {0x10439}}}, + { 0x10412, {1, {0x1043a}}}, + { 0x10413, {1, {0x1043b}}}, + { 0x10414, {1, {0x1043c}}}, + { 0x10415, {1, {0x1043d}}}, + { 0x10416, {1, {0x1043e}}}, + { 0x10417, {1, {0x1043f}}}, + { 0x10418, {1, {0x10440}}}, + { 0x10419, {1, {0x10441}}}, + { 0x1041a, {1, {0x10442}}}, + { 0x1041b, {1, {0x10443}}}, + { 0x1041c, {1, {0x10444}}}, + { 0x1041d, {1, {0x10445}}}, + { 0x1041e, {1, {0x10446}}}, + { 0x1041f, {1, {0x10447}}}, + { 0x10420, {1, {0x10448}}}, + { 0x10421, {1, {0x10449}}}, + { 0x10422, {1, {0x1044a}}}, + { 0x10423, {1, {0x1044b}}}, + { 0x10424, {1, {0x1044c}}}, + { 0x10425, {1, {0x1044d}}}, + { 0x10426, {1, {0x1044e}}}, + { 0x10427, {1, {0x1044f}}} +}; + +static const CaseFold_11_Type CaseFold_Locale[] = { + { 0x0049, {1, {0x0069}}}, + { 0x0130, {2, {0x0069, 0x0307}}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11[] = { + { 0x0061, {1, {0x0041 }}}, + { 0x0062, {1, {0x0042 }}}, + { 0x0063, {1, {0x0043 }}}, + { 0x0064, {1, {0x0044 }}}, + { 0x0065, {1, {0x0045 }}}, + { 0x0066, {1, {0x0046 }}}, + { 0x0067, {1, {0x0047 }}}, + { 0x0068, {1, {0x0048 }}}, + { 0x006a, {1, {0x004a }}}, + { 0x006b, {2, {0x212a, 0x004b }}}, + { 0x006c, {1, {0x004c }}}, + { 0x006d, {1, {0x004d }}}, + { 0x006e, {1, {0x004e }}}, + { 0x006f, {1, {0x004f }}}, + { 0x0070, {1, {0x0050 }}}, + { 0x0071, {1, {0x0051 }}}, + { 0x0072, {1, {0x0052 }}}, + { 0x0073, {2, {0x0053, 0x017f }}}, + { 0x0074, {1, {0x0054 }}}, + { 0x0075, {1, {0x0055 }}}, + { 0x0076, {1, {0x0056 }}}, + { 0x0077, {1, {0x0057 }}}, + { 0x0078, {1, {0x0058 }}}, + { 0x0079, {1, {0x0059 }}}, + { 0x007a, {1, {0x005a }}}, + { 0x00e0, {1, {0x00c0 }}}, + { 0x00e1, {1, {0x00c1 }}}, + { 0x00e2, {1, {0x00c2 }}}, + { 0x00e3, {1, {0x00c3 }}}, + { 0x00e4, {1, {0x00c4 }}}, + { 0x00e5, {2, {0x212b, 0x00c5 }}}, + { 0x00e6, {1, {0x00c6 }}}, + { 0x00e7, {1, {0x00c7 }}}, + { 0x00e8, {1, {0x00c8 }}}, + { 0x00e9, {1, {0x00c9 }}}, + { 0x00ea, {1, {0x00ca }}}, + { 0x00eb, {1, {0x00cb }}}, + { 0x00ec, {1, {0x00cc }}}, + { 0x00ed, {1, {0x00cd }}}, + { 0x00ee, {1, {0x00ce }}}, + { 0x00ef, {1, {0x00cf }}}, + { 0x00f0, {1, {0x00d0 }}}, + { 0x00f1, {1, {0x00d1 }}}, + { 0x00f2, {1, {0x00d2 }}}, + { 0x00f3, {1, {0x00d3 }}}, + { 0x00f4, {1, {0x00d4 }}}, + { 0x00f5, {1, {0x00d5 }}}, + { 0x00f6, {1, {0x00d6 }}}, + { 0x00f8, {1, {0x00d8 }}}, + { 0x00f9, {1, {0x00d9 }}}, + { 0x00fa, {1, {0x00da }}}, + { 0x00fb, {1, {0x00db }}}, + { 0x00fc, {1, {0x00dc }}}, + { 0x00fd, {1, {0x00dd }}}, + { 0x00fe, {1, {0x00de }}}, + { 0x00ff, {1, {0x0178 }}}, + { 0x0101, {1, {0x0100 }}}, + { 0x0103, {1, {0x0102 }}}, + { 0x0105, {1, {0x0104 }}}, + { 0x0107, {1, {0x0106 }}}, + { 0x0109, {1, {0x0108 }}}, + { 0x010b, {1, {0x010a }}}, + { 0x010d, {1, {0x010c }}}, + { 0x010f, {1, {0x010e }}}, + { 0x0111, {1, {0x0110 }}}, + { 0x0113, {1, {0x0112 }}}, + { 0x0115, {1, {0x0114 }}}, + { 0x0117, {1, {0x0116 }}}, + { 0x0119, {1, {0x0118 }}}, + { 0x011b, {1, {0x011a }}}, + { 0x011d, {1, {0x011c }}}, + { 0x011f, {1, {0x011e }}}, + { 0x0121, {1, {0x0120 }}}, + { 0x0123, {1, {0x0122 }}}, + { 0x0125, {1, {0x0124 }}}, + { 0x0127, {1, {0x0126 }}}, + { 0x0129, {1, {0x0128 }}}, + { 0x012b, {1, {0x012a }}}, + { 0x012d, {1, {0x012c }}}, + { 0x012f, {1, {0x012e }}}, + { 0x0133, {1, {0x0132 }}}, + { 0x0135, {1, {0x0134 }}}, + { 0x0137, {1, {0x0136 }}}, + { 0x013a, {1, {0x0139 }}}, + { 0x013c, {1, {0x013b }}}, + { 0x013e, {1, {0x013d }}}, + { 0x0140, {1, {0x013f }}}, + { 0x0142, {1, {0x0141 }}}, + { 0x0144, {1, {0x0143 }}}, + { 0x0146, {1, {0x0145 }}}, + { 0x0148, {1, {0x0147 }}}, + { 0x014b, {1, {0x014a }}}, + { 0x014d, {1, {0x014c }}}, + { 0x014f, {1, {0x014e }}}, + { 0x0151, {1, {0x0150 }}}, + { 0x0153, {1, {0x0152 }}}, + { 0x0155, {1, {0x0154 }}}, + { 0x0157, {1, {0x0156 }}}, + { 0x0159, {1, {0x0158 }}}, + { 0x015b, {1, {0x015a }}}, + { 0x015d, {1, {0x015c }}}, + { 0x015f, {1, {0x015e }}}, + { 0x0161, {1, {0x0160 }}}, + { 0x0163, {1, {0x0162 }}}, + { 0x0165, {1, {0x0164 }}}, + { 0x0167, {1, {0x0166 }}}, + { 0x0169, {1, {0x0168 }}}, + { 0x016b, {1, {0x016a }}}, + { 0x016d, {1, {0x016c }}}, + { 0x016f, {1, {0x016e }}}, + { 0x0171, {1, {0x0170 }}}, + { 0x0173, {1, {0x0172 }}}, + { 0x0175, {1, {0x0174 }}}, + { 0x0177, {1, {0x0176 }}}, + { 0x017a, {1, {0x0179 }}}, + { 0x017c, {1, {0x017b }}}, + { 0x017e, {1, {0x017d }}}, + { 0x0183, {1, {0x0182 }}}, + { 0x0185, {1, {0x0184 }}}, + { 0x0188, {1, {0x0187 }}}, + { 0x018c, {1, {0x018b }}}, + { 0x0192, {1, {0x0191 }}}, + { 0x0195, {1, {0x01f6 }}}, + { 0x0199, {1, {0x0198 }}}, + { 0x019a, {1, {0x023d }}}, + { 0x019e, {1, {0x0220 }}}, + { 0x01a1, {1, {0x01a0 }}}, + { 0x01a3, {1, {0x01a2 }}}, + { 0x01a5, {1, {0x01a4 }}}, + { 0x01a8, {1, {0x01a7 }}}, + { 0x01ad, {1, {0x01ac }}}, + { 0x01b0, {1, {0x01af }}}, + { 0x01b4, {1, {0x01b3 }}}, + { 0x01b6, {1, {0x01b5 }}}, + { 0x01b9, {1, {0x01b8 }}}, + { 0x01bd, {1, {0x01bc }}}, + { 0x01bf, {1, {0x01f7 }}}, + { 0x01c6, {2, {0x01c4, 0x01c5 }}}, + { 0x01c9, {2, {0x01c7, 0x01c8 }}}, + { 0x01cc, {2, {0x01ca, 0x01cb }}}, + { 0x01ce, {1, {0x01cd }}}, + { 0x01d0, {1, {0x01cf }}}, + { 0x01d2, {1, {0x01d1 }}}, + { 0x01d4, {1, {0x01d3 }}}, + { 0x01d6, {1, {0x01d5 }}}, + { 0x01d8, {1, {0x01d7 }}}, + { 0x01da, {1, {0x01d9 }}}, + { 0x01dc, {1, {0x01db }}}, + { 0x01dd, {1, {0x018e }}}, + { 0x01df, {1, {0x01de }}}, + { 0x01e1, {1, {0x01e0 }}}, + { 0x01e3, {1, {0x01e2 }}}, + { 0x01e5, {1, {0x01e4 }}}, + { 0x01e7, {1, {0x01e6 }}}, + { 0x01e9, {1, {0x01e8 }}}, + { 0x01eb, {1, {0x01ea }}}, + { 0x01ed, {1, {0x01ec }}}, + { 0x01ef, {1, {0x01ee }}}, + { 0x01f3, {2, {0x01f1, 0x01f2 }}}, + { 0x01f5, {1, {0x01f4 }}}, + { 0x01f9, {1, {0x01f8 }}}, + { 0x01fb, {1, {0x01fa }}}, + { 0x01fd, {1, {0x01fc }}}, + { 0x01ff, {1, {0x01fe }}}, + { 0x0201, {1, {0x0200 }}}, + { 0x0203, {1, {0x0202 }}}, + { 0x0205, {1, {0x0204 }}}, + { 0x0207, {1, {0x0206 }}}, + { 0x0209, {1, {0x0208 }}}, + { 0x020b, {1, {0x020a }}}, + { 0x020d, {1, {0x020c }}}, + { 0x020f, {1, {0x020e }}}, + { 0x0211, {1, {0x0210 }}}, + { 0x0213, {1, {0x0212 }}}, + { 0x0215, {1, {0x0214 }}}, + { 0x0217, {1, {0x0216 }}}, + { 0x0219, {1, {0x0218 }}}, + { 0x021b, {1, {0x021a }}}, + { 0x021d, {1, {0x021c }}}, + { 0x021f, {1, {0x021e }}}, + { 0x0223, {1, {0x0222 }}}, + { 0x0225, {1, {0x0224 }}}, + { 0x0227, {1, {0x0226 }}}, + { 0x0229, {1, {0x0228 }}}, + { 0x022b, {1, {0x022a }}}, + { 0x022d, {1, {0x022c }}}, + { 0x022f, {1, {0x022e }}}, + { 0x0231, {1, {0x0230 }}}, + { 0x0233, {1, {0x0232 }}}, + { 0x023c, {1, {0x023b }}}, + { 0x0253, {1, {0x0181 }}}, + { 0x0254, {1, {0x0186 }}}, + { 0x0256, {1, {0x0189 }}}, + { 0x0257, {1, {0x018a }}}, + { 0x0259, {1, {0x018f }}}, + { 0x025b, {1, {0x0190 }}}, + { 0x0260, {1, {0x0193 }}}, + { 0x0263, {1, {0x0194 }}}, + { 0x0268, {1, {0x0197 }}}, + { 0x0269, {1, {0x0196 }}}, + { 0x026f, {1, {0x019c }}}, + { 0x0272, {1, {0x019d }}}, + { 0x0275, {1, {0x019f }}}, + { 0x0280, {1, {0x01a6 }}}, + { 0x0283, {1, {0x01a9 }}}, + { 0x0288, {1, {0x01ae }}}, + { 0x028a, {1, {0x01b1 }}}, + { 0x028b, {1, {0x01b2 }}}, + { 0x0292, {1, {0x01b7 }}}, + { 0x0294, {1, {0x0241 }}}, + { 0x03ac, {1, {0x0386 }}}, + { 0x03ad, {1, {0x0388 }}}, + { 0x03ae, {1, {0x0389 }}}, + { 0x03af, {1, {0x038a }}}, + { 0x03b1, {1, {0x0391 }}}, + { 0x03b2, {2, {0x0392, 0x03d0 }}}, + { 0x03b3, {1, {0x0393 }}}, + { 0x03b4, {1, {0x0394 }}}, + { 0x03b5, {2, {0x03f5, 0x0395 }}}, + { 0x03b6, {1, {0x0396 }}}, + { 0x03b7, {1, {0x0397 }}}, + { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, + { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, + { 0x03ba, {2, {0x03f0, 0x039a }}}, + { 0x03bb, {1, {0x039b }}}, + { 0x03bc, {2, {0x00b5, 0x039c }}}, + { 0x03bd, {1, {0x039d }}}, + { 0x03be, {1, {0x039e }}}, + { 0x03bf, {1, {0x039f }}}, + { 0x03c0, {2, {0x03a0, 0x03d6 }}}, + { 0x03c1, {2, {0x03f1, 0x03a1 }}}, + { 0x03c3, {2, {0x03a3, 0x03c2 }}}, + { 0x03c4, {1, {0x03a4 }}}, + { 0x03c5, {1, {0x03a5 }}}, + { 0x03c6, {2, {0x03a6, 0x03d5 }}}, + { 0x03c7, {1, {0x03a7 }}}, + { 0x03c8, {1, {0x03a8 }}}, + { 0x03c9, {2, {0x03a9, 0x2126 }}}, + { 0x03ca, {1, {0x03aa }}}, + { 0x03cb, {1, {0x03ab }}}, + { 0x03cc, {1, {0x038c }}}, + { 0x03cd, {1, {0x038e }}}, + { 0x03ce, {1, {0x038f }}}, + { 0x03d9, {1, {0x03d8 }}}, + { 0x03db, {1, {0x03da }}}, + { 0x03dd, {1, {0x03dc }}}, + { 0x03df, {1, {0x03de }}}, + { 0x03e1, {1, {0x03e0 }}}, + { 0x03e3, {1, {0x03e2 }}}, + { 0x03e5, {1, {0x03e4 }}}, + { 0x03e7, {1, {0x03e6 }}}, + { 0x03e9, {1, {0x03e8 }}}, + { 0x03eb, {1, {0x03ea }}}, + { 0x03ed, {1, {0x03ec }}}, + { 0x03ef, {1, {0x03ee }}}, + { 0x03f2, {1, {0x03f9 }}}, + { 0x03f8, {1, {0x03f7 }}}, + { 0x03fb, {1, {0x03fa }}}, + { 0x0430, {1, {0x0410 }}}, + { 0x0431, {1, {0x0411 }}}, + { 0x0432, {1, {0x0412 }}}, + { 0x0433, {1, {0x0413 }}}, + { 0x0434, {1, {0x0414 }}}, + { 0x0435, {1, {0x0415 }}}, + { 0x0436, {1, {0x0416 }}}, + { 0x0437, {1, {0x0417 }}}, + { 0x0438, {1, {0x0418 }}}, + { 0x0439, {1, {0x0419 }}}, + { 0x043a, {1, {0x041a }}}, + { 0x043b, {1, {0x041b }}}, + { 0x043c, {1, {0x041c }}}, + { 0x043d, {1, {0x041d }}}, + { 0x043e, {1, {0x041e }}}, + { 0x043f, {1, {0x041f }}}, + { 0x0440, {1, {0x0420 }}}, + { 0x0441, {1, {0x0421 }}}, + { 0x0442, {1, {0x0422 }}}, + { 0x0443, {1, {0x0423 }}}, + { 0x0444, {1, {0x0424 }}}, + { 0x0445, {1, {0x0425 }}}, + { 0x0446, {1, {0x0426 }}}, + { 0x0447, {1, {0x0427 }}}, + { 0x0448, {1, {0x0428 }}}, + { 0x0449, {1, {0x0429 }}}, + { 0x044a, {1, {0x042a }}}, + { 0x044b, {1, {0x042b }}}, + { 0x044c, {1, {0x042c }}}, + { 0x044d, {1, {0x042d }}}, + { 0x044e, {1, {0x042e }}}, + { 0x044f, {1, {0x042f }}}, + { 0x0450, {1, {0x0400 }}}, + { 0x0451, {1, {0x0401 }}}, + { 0x0452, {1, {0x0402 }}}, + { 0x0453, {1, {0x0403 }}}, + { 0x0454, {1, {0x0404 }}}, + { 0x0455, {1, {0x0405 }}}, + { 0x0456, {1, {0x0406 }}}, + { 0x0457, {1, {0x0407 }}}, + { 0x0458, {1, {0x0408 }}}, + { 0x0459, {1, {0x0409 }}}, + { 0x045a, {1, {0x040a }}}, + { 0x045b, {1, {0x040b }}}, + { 0x045c, {1, {0x040c }}}, + { 0x045d, {1, {0x040d }}}, + { 0x045e, {1, {0x040e }}}, + { 0x045f, {1, {0x040f }}}, + { 0x0461, {1, {0x0460 }}}, + { 0x0463, {1, {0x0462 }}}, + { 0x0465, {1, {0x0464 }}}, + { 0x0467, {1, {0x0466 }}}, + { 0x0469, {1, {0x0468 }}}, + { 0x046b, {1, {0x046a }}}, + { 0x046d, {1, {0x046c }}}, + { 0x046f, {1, {0x046e }}}, + { 0x0471, {1, {0x0470 }}}, + { 0x0473, {1, {0x0472 }}}, + { 0x0475, {1, {0x0474 }}}, + { 0x0477, {1, {0x0476 }}}, + { 0x0479, {1, {0x0478 }}}, + { 0x047b, {1, {0x047a }}}, + { 0x047d, {1, {0x047c }}}, + { 0x047f, {1, {0x047e }}}, + { 0x0481, {1, {0x0480 }}}, + { 0x048b, {1, {0x048a }}}, + { 0x048d, {1, {0x048c }}}, + { 0x048f, {1, {0x048e }}}, + { 0x0491, {1, {0x0490 }}}, + { 0x0493, {1, {0x0492 }}}, + { 0x0495, {1, {0x0494 }}}, + { 0x0497, {1, {0x0496 }}}, + { 0x0499, {1, {0x0498 }}}, + { 0x049b, {1, {0x049a }}}, + { 0x049d, {1, {0x049c }}}, + { 0x049f, {1, {0x049e }}}, + { 0x04a1, {1, {0x04a0 }}}, + { 0x04a3, {1, {0x04a2 }}}, + { 0x04a5, {1, {0x04a4 }}}, + { 0x04a7, {1, {0x04a6 }}}, + { 0x04a9, {1, {0x04a8 }}}, + { 0x04ab, {1, {0x04aa }}}, + { 0x04ad, {1, {0x04ac }}}, + { 0x04af, {1, {0x04ae }}}, + { 0x04b1, {1, {0x04b0 }}}, + { 0x04b3, {1, {0x04b2 }}}, + { 0x04b5, {1, {0x04b4 }}}, + { 0x04b7, {1, {0x04b6 }}}, + { 0x04b9, {1, {0x04b8 }}}, + { 0x04bb, {1, {0x04ba }}}, + { 0x04bd, {1, {0x04bc }}}, + { 0x04bf, {1, {0x04be }}}, + { 0x04c2, {1, {0x04c1 }}}, + { 0x04c4, {1, {0x04c3 }}}, + { 0x04c6, {1, {0x04c5 }}}, + { 0x04c8, {1, {0x04c7 }}}, + { 0x04ca, {1, {0x04c9 }}}, + { 0x04cc, {1, {0x04cb }}}, + { 0x04ce, {1, {0x04cd }}}, + { 0x04d1, {1, {0x04d0 }}}, + { 0x04d3, {1, {0x04d2 }}}, + { 0x04d5, {1, {0x04d4 }}}, + { 0x04d7, {1, {0x04d6 }}}, + { 0x04d9, {1, {0x04d8 }}}, + { 0x04db, {1, {0x04da }}}, + { 0x04dd, {1, {0x04dc }}}, + { 0x04df, {1, {0x04de }}}, + { 0x04e1, {1, {0x04e0 }}}, + { 0x04e3, {1, {0x04e2 }}}, + { 0x04e5, {1, {0x04e4 }}}, + { 0x04e7, {1, {0x04e6 }}}, + { 0x04e9, {1, {0x04e8 }}}, + { 0x04eb, {1, {0x04ea }}}, + { 0x04ed, {1, {0x04ec }}}, + { 0x04ef, {1, {0x04ee }}}, + { 0x04f1, {1, {0x04f0 }}}, + { 0x04f3, {1, {0x04f2 }}}, + { 0x04f5, {1, {0x04f4 }}}, + { 0x04f7, {1, {0x04f6 }}}, + { 0x04f9, {1, {0x04f8 }}}, + { 0x0501, {1, {0x0500 }}}, + { 0x0503, {1, {0x0502 }}}, + { 0x0505, {1, {0x0504 }}}, + { 0x0507, {1, {0x0506 }}}, + { 0x0509, {1, {0x0508 }}}, + { 0x050b, {1, {0x050a }}}, + { 0x050d, {1, {0x050c }}}, + { 0x050f, {1, {0x050e }}}, + { 0x0561, {1, {0x0531 }}}, + { 0x0562, {1, {0x0532 }}}, + { 0x0563, {1, {0x0533 }}}, + { 0x0564, {1, {0x0534 }}}, + { 0x0565, {1, {0x0535 }}}, + { 0x0566, {1, {0x0536 }}}, + { 0x0567, {1, {0x0537 }}}, + { 0x0568, {1, {0x0538 }}}, + { 0x0569, {1, {0x0539 }}}, + { 0x056a, {1, {0x053a }}}, + { 0x056b, {1, {0x053b }}}, + { 0x056c, {1, {0x053c }}}, + { 0x056d, {1, {0x053d }}}, + { 0x056e, {1, {0x053e }}}, + { 0x056f, {1, {0x053f }}}, + { 0x0570, {1, {0x0540 }}}, + { 0x0571, {1, {0x0541 }}}, + { 0x0572, {1, {0x0542 }}}, + { 0x0573, {1, {0x0543 }}}, + { 0x0574, {1, {0x0544 }}}, + { 0x0575, {1, {0x0545 }}}, + { 0x0576, {1, {0x0546 }}}, + { 0x0577, {1, {0x0547 }}}, + { 0x0578, {1, {0x0548 }}}, + { 0x0579, {1, {0x0549 }}}, + { 0x057a, {1, {0x054a }}}, + { 0x057b, {1, {0x054b }}}, + { 0x057c, {1, {0x054c }}}, + { 0x057d, {1, {0x054d }}}, + { 0x057e, {1, {0x054e }}}, + { 0x057f, {1, {0x054f }}}, + { 0x0580, {1, {0x0550 }}}, + { 0x0581, {1, {0x0551 }}}, + { 0x0582, {1, {0x0552 }}}, + { 0x0583, {1, {0x0553 }}}, + { 0x0584, {1, {0x0554 }}}, + { 0x0585, {1, {0x0555 }}}, + { 0x0586, {1, {0x0556 }}}, + { 0x1e01, {1, {0x1e00 }}}, + { 0x1e03, {1, {0x1e02 }}}, + { 0x1e05, {1, {0x1e04 }}}, + { 0x1e07, {1, {0x1e06 }}}, + { 0x1e09, {1, {0x1e08 }}}, + { 0x1e0b, {1, {0x1e0a }}}, + { 0x1e0d, {1, {0x1e0c }}}, + { 0x1e0f, {1, {0x1e0e }}}, + { 0x1e11, {1, {0x1e10 }}}, + { 0x1e13, {1, {0x1e12 }}}, + { 0x1e15, {1, {0x1e14 }}}, + { 0x1e17, {1, {0x1e16 }}}, + { 0x1e19, {1, {0x1e18 }}}, + { 0x1e1b, {1, {0x1e1a }}}, + { 0x1e1d, {1, {0x1e1c }}}, + { 0x1e1f, {1, {0x1e1e }}}, + { 0x1e21, {1, {0x1e20 }}}, + { 0x1e23, {1, {0x1e22 }}}, + { 0x1e25, {1, {0x1e24 }}}, + { 0x1e27, {1, {0x1e26 }}}, + { 0x1e29, {1, {0x1e28 }}}, + { 0x1e2b, {1, {0x1e2a }}}, + { 0x1e2d, {1, {0x1e2c }}}, + { 0x1e2f, {1, {0x1e2e }}}, + { 0x1e31, {1, {0x1e30 }}}, + { 0x1e33, {1, {0x1e32 }}}, + { 0x1e35, {1, {0x1e34 }}}, + { 0x1e37, {1, {0x1e36 }}}, + { 0x1e39, {1, {0x1e38 }}}, + { 0x1e3b, {1, {0x1e3a }}}, + { 0x1e3d, {1, {0x1e3c }}}, + { 0x1e3f, {1, {0x1e3e }}}, + { 0x1e41, {1, {0x1e40 }}}, + { 0x1e43, {1, {0x1e42 }}}, + { 0x1e45, {1, {0x1e44 }}}, + { 0x1e47, {1, {0x1e46 }}}, + { 0x1e49, {1, {0x1e48 }}}, + { 0x1e4b, {1, {0x1e4a }}}, + { 0x1e4d, {1, {0x1e4c }}}, + { 0x1e4f, {1, {0x1e4e }}}, + { 0x1e51, {1, {0x1e50 }}}, + { 0x1e53, {1, {0x1e52 }}}, + { 0x1e55, {1, {0x1e54 }}}, + { 0x1e57, {1, {0x1e56 }}}, + { 0x1e59, {1, {0x1e58 }}}, + { 0x1e5b, {1, {0x1e5a }}}, + { 0x1e5d, {1, {0x1e5c }}}, + { 0x1e5f, {1, {0x1e5e }}}, + { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, + { 0x1e63, {1, {0x1e62 }}}, + { 0x1e65, {1, {0x1e64 }}}, + { 0x1e67, {1, {0x1e66 }}}, + { 0x1e69, {1, {0x1e68 }}}, + { 0x1e6b, {1, {0x1e6a }}}, + { 0x1e6d, {1, {0x1e6c }}}, + { 0x1e6f, {1, {0x1e6e }}}, + { 0x1e71, {1, {0x1e70 }}}, + { 0x1e73, {1, {0x1e72 }}}, + { 0x1e75, {1, {0x1e74 }}}, + { 0x1e77, {1, {0x1e76 }}}, + { 0x1e79, {1, {0x1e78 }}}, + { 0x1e7b, {1, {0x1e7a }}}, + { 0x1e7d, {1, {0x1e7c }}}, + { 0x1e7f, {1, {0x1e7e }}}, + { 0x1e81, {1, {0x1e80 }}}, + { 0x1e83, {1, {0x1e82 }}}, + { 0x1e85, {1, {0x1e84 }}}, + { 0x1e87, {1, {0x1e86 }}}, + { 0x1e89, {1, {0x1e88 }}}, + { 0x1e8b, {1, {0x1e8a }}}, + { 0x1e8d, {1, {0x1e8c }}}, + { 0x1e8f, {1, {0x1e8e }}}, + { 0x1e91, {1, {0x1e90 }}}, + { 0x1e93, {1, {0x1e92 }}}, + { 0x1e95, {1, {0x1e94 }}}, + { 0x1ea1, {1, {0x1ea0 }}}, + { 0x1ea3, {1, {0x1ea2 }}}, + { 0x1ea5, {1, {0x1ea4 }}}, + { 0x1ea7, {1, {0x1ea6 }}}, + { 0x1ea9, {1, {0x1ea8 }}}, + { 0x1eab, {1, {0x1eaa }}}, + { 0x1ead, {1, {0x1eac }}}, + { 0x1eaf, {1, {0x1eae }}}, + { 0x1eb1, {1, {0x1eb0 }}}, + { 0x1eb3, {1, {0x1eb2 }}}, + { 0x1eb5, {1, {0x1eb4 }}}, + { 0x1eb7, {1, {0x1eb6 }}}, + { 0x1eb9, {1, {0x1eb8 }}}, + { 0x1ebb, {1, {0x1eba }}}, + { 0x1ebd, {1, {0x1ebc }}}, + { 0x1ebf, {1, {0x1ebe }}}, + { 0x1ec1, {1, {0x1ec0 }}}, + { 0x1ec3, {1, {0x1ec2 }}}, + { 0x1ec5, {1, {0x1ec4 }}}, + { 0x1ec7, {1, {0x1ec6 }}}, + { 0x1ec9, {1, {0x1ec8 }}}, + { 0x1ecb, {1, {0x1eca }}}, + { 0x1ecd, {1, {0x1ecc }}}, + { 0x1ecf, {1, {0x1ece }}}, + { 0x1ed1, {1, {0x1ed0 }}}, + { 0x1ed3, {1, {0x1ed2 }}}, + { 0x1ed5, {1, {0x1ed4 }}}, + { 0x1ed7, {1, {0x1ed6 }}}, + { 0x1ed9, {1, {0x1ed8 }}}, + { 0x1edb, {1, {0x1eda }}}, + { 0x1edd, {1, {0x1edc }}}, + { 0x1edf, {1, {0x1ede }}}, + { 0x1ee1, {1, {0x1ee0 }}}, + { 0x1ee3, {1, {0x1ee2 }}}, + { 0x1ee5, {1, {0x1ee4 }}}, + { 0x1ee7, {1, {0x1ee6 }}}, + { 0x1ee9, {1, {0x1ee8 }}}, + { 0x1eeb, {1, {0x1eea }}}, + { 0x1eed, {1, {0x1eec }}}, + { 0x1eef, {1, {0x1eee }}}, + { 0x1ef1, {1, {0x1ef0 }}}, + { 0x1ef3, {1, {0x1ef2 }}}, + { 0x1ef5, {1, {0x1ef4 }}}, + { 0x1ef7, {1, {0x1ef6 }}}, + { 0x1ef9, {1, {0x1ef8 }}}, + { 0x1f00, {1, {0x1f08 }}}, + { 0x1f01, {1, {0x1f09 }}}, + { 0x1f02, {1, {0x1f0a }}}, + { 0x1f03, {1, {0x1f0b }}}, + { 0x1f04, {1, {0x1f0c }}}, + { 0x1f05, {1, {0x1f0d }}}, + { 0x1f06, {1, {0x1f0e }}}, + { 0x1f07, {1, {0x1f0f }}}, + { 0x1f10, {1, {0x1f18 }}}, + { 0x1f11, {1, {0x1f19 }}}, + { 0x1f12, {1, {0x1f1a }}}, + { 0x1f13, {1, {0x1f1b }}}, + { 0x1f14, {1, {0x1f1c }}}, + { 0x1f15, {1, {0x1f1d }}}, + { 0x1f20, {1, {0x1f28 }}}, + { 0x1f21, {1, {0x1f29 }}}, + { 0x1f22, {1, {0x1f2a }}}, + { 0x1f23, {1, {0x1f2b }}}, + { 0x1f24, {1, {0x1f2c }}}, + { 0x1f25, {1, {0x1f2d }}}, + { 0x1f26, {1, {0x1f2e }}}, + { 0x1f27, {1, {0x1f2f }}}, + { 0x1f30, {1, {0x1f38 }}}, + { 0x1f31, {1, {0x1f39 }}}, + { 0x1f32, {1, {0x1f3a }}}, + { 0x1f33, {1, {0x1f3b }}}, + { 0x1f34, {1, {0x1f3c }}}, + { 0x1f35, {1, {0x1f3d }}}, + { 0x1f36, {1, {0x1f3e }}}, + { 0x1f37, {1, {0x1f3f }}}, + { 0x1f40, {1, {0x1f48 }}}, + { 0x1f41, {1, {0x1f49 }}}, + { 0x1f42, {1, {0x1f4a }}}, + { 0x1f43, {1, {0x1f4b }}}, + { 0x1f44, {1, {0x1f4c }}}, + { 0x1f45, {1, {0x1f4d }}}, + { 0x1f51, {1, {0x1f59 }}}, + { 0x1f53, {1, {0x1f5b }}}, + { 0x1f55, {1, {0x1f5d }}}, + { 0x1f57, {1, {0x1f5f }}}, + { 0x1f60, {1, {0x1f68 }}}, + { 0x1f61, {1, {0x1f69 }}}, + { 0x1f62, {1, {0x1f6a }}}, + { 0x1f63, {1, {0x1f6b }}}, + { 0x1f64, {1, {0x1f6c }}}, + { 0x1f65, {1, {0x1f6d }}}, + { 0x1f66, {1, {0x1f6e }}}, + { 0x1f67, {1, {0x1f6f }}}, + { 0x1f70, {1, {0x1fba }}}, + { 0x1f71, {1, {0x1fbb }}}, + { 0x1f72, {1, {0x1fc8 }}}, + { 0x1f73, {1, {0x1fc9 }}}, + { 0x1f74, {1, {0x1fca }}}, + { 0x1f75, {1, {0x1fcb }}}, + { 0x1f76, {1, {0x1fda }}}, + { 0x1f77, {1, {0x1fdb }}}, + { 0x1f78, {1, {0x1ff8 }}}, + { 0x1f79, {1, {0x1ff9 }}}, + { 0x1f7a, {1, {0x1fea }}}, + { 0x1f7b, {1, {0x1feb }}}, + { 0x1f7c, {1, {0x1ffa }}}, + { 0x1f7d, {1, {0x1ffb }}}, + { 0x1fb0, {1, {0x1fb8 }}}, + { 0x1fb1, {1, {0x1fb9 }}}, + { 0x1fd0, {1, {0x1fd8 }}}, + { 0x1fd1, {1, {0x1fd9 }}}, + { 0x1fe0, {1, {0x1fe8 }}}, + { 0x1fe1, {1, {0x1fe9 }}}, + { 0x1fe5, {1, {0x1fec }}}, + { 0x2170, {1, {0x2160 }}}, + { 0x2171, {1, {0x2161 }}}, + { 0x2172, {1, {0x2162 }}}, + { 0x2173, {1, {0x2163 }}}, + { 0x2174, {1, {0x2164 }}}, + { 0x2175, {1, {0x2165 }}}, + { 0x2176, {1, {0x2166 }}}, + { 0x2177, {1, {0x2167 }}}, + { 0x2178, {1, {0x2168 }}}, + { 0x2179, {1, {0x2169 }}}, + { 0x217a, {1, {0x216a }}}, + { 0x217b, {1, {0x216b }}}, + { 0x217c, {1, {0x216c }}}, + { 0x217d, {1, {0x216d }}}, + { 0x217e, {1, {0x216e }}}, + { 0x217f, {1, {0x216f }}}, + { 0x24d0, {1, {0x24b6 }}}, + { 0x24d1, {1, {0x24b7 }}}, + { 0x24d2, {1, {0x24b8 }}}, + { 0x24d3, {1, {0x24b9 }}}, + { 0x24d4, {1, {0x24ba }}}, + { 0x24d5, {1, {0x24bb }}}, + { 0x24d6, {1, {0x24bc }}}, + { 0x24d7, {1, {0x24bd }}}, + { 0x24d8, {1, {0x24be }}}, + { 0x24d9, {1, {0x24bf }}}, + { 0x24da, {1, {0x24c0 }}}, + { 0x24db, {1, {0x24c1 }}}, + { 0x24dc, {1, {0x24c2 }}}, + { 0x24dd, {1, {0x24c3 }}}, + { 0x24de, {1, {0x24c4 }}}, + { 0x24df, {1, {0x24c5 }}}, + { 0x24e0, {1, {0x24c6 }}}, + { 0x24e1, {1, {0x24c7 }}}, + { 0x24e2, {1, {0x24c8 }}}, + { 0x24e3, {1, {0x24c9 }}}, + { 0x24e4, {1, {0x24ca }}}, + { 0x24e5, {1, {0x24cb }}}, + { 0x24e6, {1, {0x24cc }}}, + { 0x24e7, {1, {0x24cd }}}, + { 0x24e8, {1, {0x24ce }}}, + { 0x24e9, {1, {0x24cf }}}, + { 0x2c30, {1, {0x2c00 }}}, + { 0x2c31, {1, {0x2c01 }}}, + { 0x2c32, {1, {0x2c02 }}}, + { 0x2c33, {1, {0x2c03 }}}, + { 0x2c34, {1, {0x2c04 }}}, + { 0x2c35, {1, {0x2c05 }}}, + { 0x2c36, {1, {0x2c06 }}}, + { 0x2c37, {1, {0x2c07 }}}, + { 0x2c38, {1, {0x2c08 }}}, + { 0x2c39, {1, {0x2c09 }}}, + { 0x2c3a, {1, {0x2c0a }}}, + { 0x2c3b, {1, {0x2c0b }}}, + { 0x2c3c, {1, {0x2c0c }}}, + { 0x2c3d, {1, {0x2c0d }}}, + { 0x2c3e, {1, {0x2c0e }}}, + { 0x2c3f, {1, {0x2c0f }}}, + { 0x2c40, {1, {0x2c10 }}}, + { 0x2c41, {1, {0x2c11 }}}, + { 0x2c42, {1, {0x2c12 }}}, + { 0x2c43, {1, {0x2c13 }}}, + { 0x2c44, {1, {0x2c14 }}}, + { 0x2c45, {1, {0x2c15 }}}, + { 0x2c46, {1, {0x2c16 }}}, + { 0x2c47, {1, {0x2c17 }}}, + { 0x2c48, {1, {0x2c18 }}}, + { 0x2c49, {1, {0x2c19 }}}, + { 0x2c4a, {1, {0x2c1a }}}, + { 0x2c4b, {1, {0x2c1b }}}, + { 0x2c4c, {1, {0x2c1c }}}, + { 0x2c4d, {1, {0x2c1d }}}, + { 0x2c4e, {1, {0x2c1e }}}, + { 0x2c4f, {1, {0x2c1f }}}, + { 0x2c50, {1, {0x2c20 }}}, + { 0x2c51, {1, {0x2c21 }}}, + { 0x2c52, {1, {0x2c22 }}}, + { 0x2c53, {1, {0x2c23 }}}, + { 0x2c54, {1, {0x2c24 }}}, + { 0x2c55, {1, {0x2c25 }}}, + { 0x2c56, {1, {0x2c26 }}}, + { 0x2c57, {1, {0x2c27 }}}, + { 0x2c58, {1, {0x2c28 }}}, + { 0x2c59, {1, {0x2c29 }}}, + { 0x2c5a, {1, {0x2c2a }}}, + { 0x2c5b, {1, {0x2c2b }}}, + { 0x2c5c, {1, {0x2c2c }}}, + { 0x2c5d, {1, {0x2c2d }}}, + { 0x2c5e, {1, {0x2c2e }}}, + { 0x2c81, {1, {0x2c80 }}}, + { 0x2c83, {1, {0x2c82 }}}, + { 0x2c85, {1, {0x2c84 }}}, + { 0x2c87, {1, {0x2c86 }}}, + { 0x2c89, {1, {0x2c88 }}}, + { 0x2c8b, {1, {0x2c8a }}}, + { 0x2c8d, {1, {0x2c8c }}}, + { 0x2c8f, {1, {0x2c8e }}}, + { 0x2c91, {1, {0x2c90 }}}, + { 0x2c93, {1, {0x2c92 }}}, + { 0x2c95, {1, {0x2c94 }}}, + { 0x2c97, {1, {0x2c96 }}}, + { 0x2c99, {1, {0x2c98 }}}, + { 0x2c9b, {1, {0x2c9a }}}, + { 0x2c9d, {1, {0x2c9c }}}, + { 0x2c9f, {1, {0x2c9e }}}, + { 0x2ca1, {1, {0x2ca0 }}}, + { 0x2ca3, {1, {0x2ca2 }}}, + { 0x2ca5, {1, {0x2ca4 }}}, + { 0x2ca7, {1, {0x2ca6 }}}, + { 0x2ca9, {1, {0x2ca8 }}}, + { 0x2cab, {1, {0x2caa }}}, + { 0x2cad, {1, {0x2cac }}}, + { 0x2caf, {1, {0x2cae }}}, + { 0x2cb1, {1, {0x2cb0 }}}, + { 0x2cb3, {1, {0x2cb2 }}}, + { 0x2cb5, {1, {0x2cb4 }}}, + { 0x2cb7, {1, {0x2cb6 }}}, + { 0x2cb9, {1, {0x2cb8 }}}, + { 0x2cbb, {1, {0x2cba }}}, + { 0x2cbd, {1, {0x2cbc }}}, + { 0x2cbf, {1, {0x2cbe }}}, + { 0x2cc1, {1, {0x2cc0 }}}, + { 0x2cc3, {1, {0x2cc2 }}}, + { 0x2cc5, {1, {0x2cc4 }}}, + { 0x2cc7, {1, {0x2cc6 }}}, + { 0x2cc9, {1, {0x2cc8 }}}, + { 0x2ccb, {1, {0x2cca }}}, + { 0x2ccd, {1, {0x2ccc }}}, + { 0x2ccf, {1, {0x2cce }}}, + { 0x2cd1, {1, {0x2cd0 }}}, + { 0x2cd3, {1, {0x2cd2 }}}, + { 0x2cd5, {1, {0x2cd4 }}}, + { 0x2cd7, {1, {0x2cd6 }}}, + { 0x2cd9, {1, {0x2cd8 }}}, + { 0x2cdb, {1, {0x2cda }}}, + { 0x2cdd, {1, {0x2cdc }}}, + { 0x2cdf, {1, {0x2cde }}}, + { 0x2ce1, {1, {0x2ce0 }}}, + { 0x2ce3, {1, {0x2ce2 }}}, + { 0x2d00, {1, {0x10a0 }}}, + { 0x2d01, {1, {0x10a1 }}}, + { 0x2d02, {1, {0x10a2 }}}, + { 0x2d03, {1, {0x10a3 }}}, + { 0x2d04, {1, {0x10a4 }}}, + { 0x2d05, {1, {0x10a5 }}}, + { 0x2d06, {1, {0x10a6 }}}, + { 0x2d07, {1, {0x10a7 }}}, + { 0x2d08, {1, {0x10a8 }}}, + { 0x2d09, {1, {0x10a9 }}}, + { 0x2d0a, {1, {0x10aa }}}, + { 0x2d0b, {1, {0x10ab }}}, + { 0x2d0c, {1, {0x10ac }}}, + { 0x2d0d, {1, {0x10ad }}}, + { 0x2d0e, {1, {0x10ae }}}, + { 0x2d0f, {1, {0x10af }}}, + { 0x2d10, {1, {0x10b0 }}}, + { 0x2d11, {1, {0x10b1 }}}, + { 0x2d12, {1, {0x10b2 }}}, + { 0x2d13, {1, {0x10b3 }}}, + { 0x2d14, {1, {0x10b4 }}}, + { 0x2d15, {1, {0x10b5 }}}, + { 0x2d16, {1, {0x10b6 }}}, + { 0x2d17, {1, {0x10b7 }}}, + { 0x2d18, {1, {0x10b8 }}}, + { 0x2d19, {1, {0x10b9 }}}, + { 0x2d1a, {1, {0x10ba }}}, + { 0x2d1b, {1, {0x10bb }}}, + { 0x2d1c, {1, {0x10bc }}}, + { 0x2d1d, {1, {0x10bd }}}, + { 0x2d1e, {1, {0x10be }}}, + { 0x2d1f, {1, {0x10bf }}}, + { 0x2d20, {1, {0x10c0 }}}, + { 0x2d21, {1, {0x10c1 }}}, + { 0x2d22, {1, {0x10c2 }}}, + { 0x2d23, {1, {0x10c3 }}}, + { 0x2d24, {1, {0x10c4 }}}, + { 0x2d25, {1, {0x10c5 }}}, + { 0xff41, {1, {0xff21 }}}, + { 0xff42, {1, {0xff22 }}}, + { 0xff43, {1, {0xff23 }}}, + { 0xff44, {1, {0xff24 }}}, + { 0xff45, {1, {0xff25 }}}, + { 0xff46, {1, {0xff26 }}}, + { 0xff47, {1, {0xff27 }}}, + { 0xff48, {1, {0xff28 }}}, + { 0xff49, {1, {0xff29 }}}, + { 0xff4a, {1, {0xff2a }}}, + { 0xff4b, {1, {0xff2b }}}, + { 0xff4c, {1, {0xff2c }}}, + { 0xff4d, {1, {0xff2d }}}, + { 0xff4e, {1, {0xff2e }}}, + { 0xff4f, {1, {0xff2f }}}, + { 0xff50, {1, {0xff30 }}}, + { 0xff51, {1, {0xff31 }}}, + { 0xff52, {1, {0xff32 }}}, + { 0xff53, {1, {0xff33 }}}, + { 0xff54, {1, {0xff34 }}}, + { 0xff55, {1, {0xff35 }}}, + { 0xff56, {1, {0xff36 }}}, + { 0xff57, {1, {0xff37 }}}, + { 0xff58, {1, {0xff38 }}}, + { 0xff59, {1, {0xff39 }}}, + { 0xff5a, {1, {0xff3a }}}, + { 0x10428, {1, {0x10400 }}}, + { 0x10429, {1, {0x10401 }}}, + { 0x1042a, {1, {0x10402 }}}, + { 0x1042b, {1, {0x10403 }}}, + { 0x1042c, {1, {0x10404 }}}, + { 0x1042d, {1, {0x10405 }}}, + { 0x1042e, {1, {0x10406 }}}, + { 0x1042f, {1, {0x10407 }}}, + { 0x10430, {1, {0x10408 }}}, + { 0x10431, {1, {0x10409 }}}, + { 0x10432, {1, {0x1040a }}}, + { 0x10433, {1, {0x1040b }}}, + { 0x10434, {1, {0x1040c }}}, + { 0x10435, {1, {0x1040d }}}, + { 0x10436, {1, {0x1040e }}}, + { 0x10437, {1, {0x1040f }}}, + { 0x10438, {1, {0x10410 }}}, + { 0x10439, {1, {0x10411 }}}, + { 0x1043a, {1, {0x10412 }}}, + { 0x1043b, {1, {0x10413 }}}, + { 0x1043c, {1, {0x10414 }}}, + { 0x1043d, {1, {0x10415 }}}, + { 0x1043e, {1, {0x10416 }}}, + { 0x1043f, {1, {0x10417 }}}, + { 0x10440, {1, {0x10418 }}}, + { 0x10441, {1, {0x10419 }}}, + { 0x10442, {1, {0x1041a }}}, + { 0x10443, {1, {0x1041b }}}, + { 0x10444, {1, {0x1041c }}}, + { 0x10445, {1, {0x1041d }}}, + { 0x10446, {1, {0x1041e }}}, + { 0x10447, {1, {0x1041f }}}, + { 0x10448, {1, {0x10420 }}}, + { 0x10449, {1, {0x10421 }}}, + { 0x1044a, {1, {0x10422 }}}, + { 0x1044b, {1, {0x10423 }}}, + { 0x1044c, {1, {0x10424 }}}, + { 0x1044d, {1, {0x10425 }}}, + { 0x1044e, {1, {0x10426 }}}, + { 0x1044f, {1, {0x10427 }}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { + { 0x0069, {1, {0x0049 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12[] = { + { {0x0061, 0x02be}, {1, {0x1e9a }}}, + { {0x0066, 0x0066}, {1, {0xfb00 }}}, + { {0x0066, 0x0069}, {1, {0xfb01 }}}, + { {0x0066, 0x006c}, {1, {0xfb02 }}}, + { {0x0068, 0x0331}, {1, {0x1e96 }}}, + { {0x006a, 0x030c}, {1, {0x01f0 }}}, + { {0x0073, 0x0073}, {1, {0x00df }}}, + { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, + { {0x0074, 0x0308}, {1, {0x1e97 }}}, + { {0x0077, 0x030a}, {1, {0x1e98 }}}, + { {0x0079, 0x030a}, {1, {0x1e99 }}}, + { {0x02bc, 0x006e}, {1, {0x0149 }}}, + { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, + { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, + { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, + { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, + { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, + { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, + { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, + { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, + { {0x03c5, 0x0313}, {1, {0x1f50 }}}, + { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, + { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, + { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, + { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, + { {0x0565, 0x0582}, {1, {0x0587 }}}, + { {0x0574, 0x0565}, {1, {0xfb14 }}}, + { {0x0574, 0x056b}, {1, {0xfb15 }}}, + { {0x0574, 0x056d}, {1, {0xfb17 }}}, + { {0x0574, 0x0576}, {1, {0xfb13 }}}, + { {0x057e, 0x0576}, {1, {0xfb16 }}}, + { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, + { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, + { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, + { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, + { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, + { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, + { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, + { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, + { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, + { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, + { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, + { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, + { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, + { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, + { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, + { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, + { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, + { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, + { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, + { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, + { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, + { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, + { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, + { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, + { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, + { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, + { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { + { {0x0069, 0x0307}, {1, {0x0130 }}} +}; + +static const CaseUnfold_13_Type CaseUnfold_13[] = { + { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, + { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, + { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, + { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, + { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, + { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, + { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, + { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, + { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, + { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, + { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, + { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, + { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, + { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} +}; + + +static PosixBracketEntryType HashEntryData[] = { + { (UChar* )"NEWLINE", 0, 7 }, + { (UChar* )"Alpha", 1, 5 }, + { (UChar* )"Blank", 2, 5 }, + { (UChar* )"Cntrl", 3, 5 }, + { (UChar* )"Digit", 4, 5 }, + { (UChar* )"Graph", 5, 5 }, + { (UChar* )"Lower", 6, 5 }, + { (UChar* )"Print", 7, 5 }, + { (UChar* )"Punct", 8, 5 }, + { (UChar* )"Space", 9, 5 }, + { (UChar* )"Upper", 10, 5 }, + { (UChar* )"XDigit", 11, 6 }, + { (UChar* )"Word", 12, 4 }, + { (UChar* )"Alnum", 13, 5 }, + { (UChar* )"ASCII", 14, 5 }, + +#ifdef USE_UNICODE_PROPERTIES + { (UChar* )"Any", 15, 3 }, + { (UChar* )"Assigned", 16, 8 }, + { (UChar* )"C", 17, 1 }, + { (UChar* )"Cc", 18, 2 }, + { (UChar* )"Cf", 19, 2 }, + { (UChar* )"Cn", 20, 2 }, + { (UChar* )"Co", 21, 2 }, + { (UChar* )"Cs", 22, 2 }, + { (UChar* )"L", 23, 1 }, + { (UChar* )"Ll", 24, 2 }, + { (UChar* )"Lm", 25, 2 }, + { (UChar* )"Lo", 26, 2 }, + { (UChar* )"Lt", 27, 2 }, + { (UChar* )"Lu", 28, 2 }, + { (UChar* )"M", 29, 1 }, + { (UChar* )"Mc", 30, 2 }, + { (UChar* )"Me", 31, 2 }, + { (UChar* )"Mn", 32, 2 }, + { (UChar* )"N", 33, 1 }, + { (UChar* )"Nd", 34, 2 }, + { (UChar* )"Nl", 35, 2 }, + { (UChar* )"No", 36, 2 }, + { (UChar* )"P", 37, 1 }, + { (UChar* )"Pc", 38, 2 }, + { (UChar* )"Pd", 39, 2 }, + { (UChar* )"Pe", 40, 2 }, + { (UChar* )"Pf", 41, 2 }, + { (UChar* )"Pi", 42, 2 }, + { (UChar* )"Po", 43, 2 }, + { (UChar* )"Ps", 44, 2 }, + { (UChar* )"S", 45, 1 }, + { (UChar* )"Sc", 46, 2 }, + { (UChar* )"Sk", 47, 2 }, + { (UChar* )"Sm", 48, 2 }, + { (UChar* )"So", 49, 2 }, + { (UChar* )"Z", 50, 1 }, + { (UChar* )"Zl", 51, 2 }, + { (UChar* )"Zp", 52, 2 }, + { (UChar* )"Zs", 53, 2 }, + { (UChar* )"Arabic", 54, 6 }, + { (UChar* )"Armenian", 55, 8 }, + { (UChar* )"Bengali", 56, 7 }, + { (UChar* )"Bopomofo", 57, 8 }, + { (UChar* )"Braille", 58, 7 }, + { (UChar* )"Buginese", 59, 8 }, + { (UChar* )"Buhid", 60, 5 }, + { (UChar* )"Canadian_Aboriginal", 61, 19 }, + { (UChar* )"Cherokee", 62, 8 }, + { (UChar* )"Common", 63, 6 }, + { (UChar* )"Coptic", 64, 6 }, + { (UChar* )"Cypriot", 65, 7 }, + { (UChar* )"Cyrillic", 66, 8 }, + { (UChar* )"Deseret", 67, 7 }, + { (UChar* )"Devanagari", 68, 10 }, + { (UChar* )"Ethiopic", 69, 8 }, + { (UChar* )"Georgian", 70, 8 }, + { (UChar* )"Glagolitic", 71, 10 }, + { (UChar* )"Gothic", 72, 6 }, + { (UChar* )"Greek", 73, 5 }, + { (UChar* )"Gujarati", 74, 8 }, + { (UChar* )"Gurmukhi", 75, 8 }, + { (UChar* )"Han", 76, 3 }, + { (UChar* )"Hangul", 77, 6 }, + { (UChar* )"Hanunoo", 78, 7 }, + { (UChar* )"Hebrew", 79, 6 }, + { (UChar* )"Hiragana", 80, 8 }, + { (UChar* )"Inherited", 81, 9 }, + { (UChar* )"Kannada", 82, 7 }, + { (UChar* )"Katakana", 83, 8 }, + { (UChar* )"Kharoshthi", 84, 10 }, + { (UChar* )"Khmer", 85, 5 }, + { (UChar* )"Lao", 86, 3 }, + { (UChar* )"Latin", 87, 5 }, + { (UChar* )"Limbu", 88, 5 }, + { (UChar* )"Linear_B", 89, 8 }, + { (UChar* )"Malayalam", 90, 9 }, + { (UChar* )"Mongolian", 91, 9 }, + { (UChar* )"Myanmar", 92, 7 }, + { (UChar* )"New_Tai_Lue", 93, 11 }, + { (UChar* )"Ogham", 94, 5 }, + { (UChar* )"Old_Italic", 95, 10 }, + { (UChar* )"Old_Persian", 96, 11 }, + { (UChar* )"Oriya", 97, 5 }, + { (UChar* )"Osmanya", 98, 7 }, + { (UChar* )"Runic", 99, 5 }, + { (UChar* )"Shavian", 100, 7 }, + { (UChar* )"Sinhala", 101, 7 }, + { (UChar* )"Syloti_Nagri", 102, 12 }, + { (UChar* )"Syriac", 103, 6 }, + { (UChar* )"Tagalog", 104, 7 }, + { (UChar* )"Tagbanwa", 105, 8 }, + { (UChar* )"Tai_Le", 106, 6 }, + { (UChar* )"Tamil", 107, 5 }, + { (UChar* )"Telugu", 108, 6 }, + { (UChar* )"Thaana", 109, 6 }, + { (UChar* )"Thai", 110, 4 }, + { (UChar* )"Tibetan", 111, 7 }, + { (UChar* )"Tifinagh", 112, 8 }, + { (UChar* )"Ugaritic", 113, 8 }, + { (UChar* )"Yi", 114, 2 }, +#endif /* USE_UNICODE_PROPERTIES */ + { (UChar* )NULL, -1, 0 } +}; + +#ifdef USE_UNICODE_PROPERTIES +#define CODE_RANGES_NUM 115 +#else +#define CODE_RANGES_NUM 15 +#endif + +static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; +static int CodeRangeTableInited = 0; + +static void init_code_range_array(void) { + THREAD_ATOMIC_START; + + CodeRanges[0] = CR_NEWLINE; + CodeRanges[1] = CR_Alpha; + CodeRanges[2] = CR_Blank; + CodeRanges[3] = CR_Cntrl; + CodeRanges[4] = CR_Digit; + CodeRanges[5] = CR_Graph; + CodeRanges[6] = CR_Lower; + CodeRanges[7] = CR_Print; + CodeRanges[8] = CR_Punct; + CodeRanges[9] = CR_Space; + CodeRanges[10] = CR_Upper; + CodeRanges[11] = CR_XDigit; + CodeRanges[12] = CR_Word; + CodeRanges[13] = CR_Alnum; + CodeRanges[14] = CR_ASCII; + +#ifdef USE_UNICODE_PROPERTIES + CodeRanges[15] = CR_Any; + CodeRanges[16] = CR_Assigned; + CodeRanges[17] = CR_C; + CodeRanges[18] = CR_Cc; + CodeRanges[19] = CR_Cf; + CodeRanges[20] = CR_Cn; + CodeRanges[21] = CR_Co; + CodeRanges[22] = CR_Cs; + CodeRanges[23] = CR_L; + CodeRanges[24] = CR_Ll; + CodeRanges[25] = CR_Lm; + CodeRanges[26] = CR_Lo; + CodeRanges[27] = CR_Lt; + CodeRanges[28] = CR_Lu; + CodeRanges[29] = CR_M; + CodeRanges[30] = CR_Mc; + CodeRanges[31] = CR_Me; + CodeRanges[32] = CR_Mn; + CodeRanges[33] = CR_N; + CodeRanges[34] = CR_Nd; + CodeRanges[35] = CR_Nl; + CodeRanges[36] = CR_No; + CodeRanges[37] = CR_P; + CodeRanges[38] = CR_Pc; + CodeRanges[39] = CR_Pd; + CodeRanges[40] = CR_Pe; + CodeRanges[41] = CR_Pf; + CodeRanges[42] = CR_Pi; + CodeRanges[43] = CR_Po; + CodeRanges[44] = CR_Ps; + CodeRanges[45] = CR_S; + CodeRanges[46] = CR_Sc; + CodeRanges[47] = CR_Sk; + CodeRanges[48] = CR_Sm; + CodeRanges[49] = CR_So; + CodeRanges[50] = CR_Z; + CodeRanges[51] = CR_Zl; + CodeRanges[52] = CR_Zp; + CodeRanges[53] = CR_Zs; + CodeRanges[54] = CR_Arabic; + CodeRanges[55] = CR_Armenian; + CodeRanges[56] = CR_Bengali; + CodeRanges[57] = CR_Bopomofo; + CodeRanges[58] = CR_Braille; + CodeRanges[59] = CR_Buginese; + CodeRanges[60] = CR_Buhid; + CodeRanges[61] = CR_Canadian_Aboriginal; + CodeRanges[62] = CR_Cherokee; + CodeRanges[63] = CR_Common; + CodeRanges[64] = CR_Coptic; + CodeRanges[65] = CR_Cypriot; + CodeRanges[66] = CR_Cyrillic; + CodeRanges[67] = CR_Deseret; + CodeRanges[68] = CR_Devanagari; + CodeRanges[69] = CR_Ethiopic; + CodeRanges[70] = CR_Georgian; + CodeRanges[71] = CR_Glagolitic; + CodeRanges[72] = CR_Gothic; + CodeRanges[73] = CR_Greek; + CodeRanges[74] = CR_Gujarati; + CodeRanges[75] = CR_Gurmukhi; + CodeRanges[76] = CR_Han; + CodeRanges[77] = CR_Hangul; + CodeRanges[78] = CR_Hanunoo; + CodeRanges[79] = CR_Hebrew; + CodeRanges[80] = CR_Hiragana; + CodeRanges[81] = CR_Inherited; + CodeRanges[82] = CR_Kannada; + CodeRanges[83] = CR_Katakana; + CodeRanges[84] = CR_Kharoshthi; + CodeRanges[85] = CR_Khmer; + CodeRanges[86] = CR_Lao; + CodeRanges[87] = CR_Latin; + CodeRanges[88] = CR_Limbu; + CodeRanges[89] = CR_Linear_B; + CodeRanges[90] = CR_Malayalam; + CodeRanges[91] = CR_Mongolian; + CodeRanges[92] = CR_Myanmar; + CodeRanges[93] = CR_New_Tai_Lue; + CodeRanges[94] = CR_Ogham; + CodeRanges[95] = CR_Old_Italic; + CodeRanges[96] = CR_Old_Persian; + CodeRanges[97] = CR_Oriya; + CodeRanges[98] = CR_Osmanya; + CodeRanges[99] = CR_Runic; + CodeRanges[100] = CR_Shavian; + CodeRanges[101] = CR_Sinhala; + CodeRanges[102] = CR_Syloti_Nagri; + CodeRanges[103] = CR_Syriac; + CodeRanges[104] = CR_Tagalog; + CodeRanges[105] = CR_Tagbanwa; + CodeRanges[106] = CR_Tai_Le; + CodeRanges[107] = CR_Tamil; + CodeRanges[108] = CR_Telugu; + CodeRanges[109] = CR_Thaana; + CodeRanges[110] = CR_Thai; + CodeRanges[111] = CR_Tibetan; + CodeRanges[112] = CR_Tifinagh; + CodeRanges[113] = CR_Ugaritic; + CodeRanges[114] = CR_Yi; +#endif /* USE_UNICODE_PROPERTIES */ + + CodeRangeTableInited = 1; + THREAD_ATOMIC_END; +} + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if ( +#ifdef USE_UNICODE_PROPERTIES + ctype <= ONIGENC_MAX_STD_CTYPE && +#endif + code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); +} + + +extern int +onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) +{ + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + *ranges = CodeRanges[ctype]; + + return 0; +} + +extern int +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[], + struct OnigEncodingTypeST* enc ARG_UNUSED) +{ + *sb_out = 0x00; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + +#include "ruby/st.h" + +#define PROPERTY_NAME_MAX_SIZE 20 + +static st_table* NameCtypeTable; +static int NameTableInited = 0; + +static int init_name_ctype_table(void) +{ + PosixBracketEntryType *pb; + + THREAD_ATOMIC_START; + + NameCtypeTable = onig_st_init_strend_table_with_size(100); + if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; + + for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { + onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, + (st_data_t )pb->ctype); + } + + NameTableInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) +{ + int len; + st_data_t ctype; + UChar buf[PROPERTY_NAME_MAX_SIZE]; + UChar *p; + OnigCodePoint code; + + p = name; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + buf[len++] = (UChar )code; + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + p += enclen(enc, p, end); + } + + buf[len] = 0; + + if (NameTableInited == 0) init_name_ctype_table(); + + if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, + &ctype) == 0) { + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + return ctype; +} + + +static int +code2_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1]) return 0; + return 1; +} + +static int +code2_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1]); +} + +static const struct st_hash_type type_code2_hash = { + code2_cmp, + code2_hash, +}; + +static int +code3_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; + return 1; +} + +static int +code3_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1] + x[2]); +} + +static const struct st_hash_type type_code3_hash = { + code3_cmp, + code3_hash, +}; + + +static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ +static st_table* Unfold1Table; +static st_table* Unfold2Table; +static st_table* Unfold3Table; +static int CaseFoldInited = 0; + +static int init_case_fold_table(void) +{ + const CaseFold_11_Type *p; + const CaseUnfold_11_Type *p1; + const CaseUnfold_12_Type *p2; + const CaseUnfold_13_Type *p3; + int i; + + THREAD_ATOMIC_START; + + FoldTable = st_init_numtable_with_size(1200); + if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; + for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { + p = &CaseFold[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); + i++) { + p = &CaseFold_Locale[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + + Unfold1Table = st_init_numtable_with_size(1000); + if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11_Locale[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + + Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); + if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12_Locale[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + + Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); + if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + p3 = &CaseUnfold_13[i]; + st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); + } + + CaseFoldInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_mbc_case_fold(OnigEncoding enc, + OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, + UChar* fold) +{ + CodePointList3 *to; + OnigCodePoint code; + int i, len, rlen; + const UChar *p = *pp; + + if (CaseFoldInited == 0) init_case_fold_table(); + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p, end); + *pp += len; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); + } + else if (code == 0x0130) { + return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); + } +#if 0 + /* NO NEEDS TO CHECK */ + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { +#else + else { +#endif + rlen = 0; + for (i = 0; i < to->n; i++) { + len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); + fold += len; + rlen += len; + } + return rlen; + } + } + + for (i = 0; i < len; i++) { + *fold++ = *p++; + } + return len; +} + +extern int +onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + const CaseUnfold_11_Type* p11; + OnigCodePoint code; + int i, j, k, r; + + /* if (CaseFoldInited == 0) init_case_fold_table(); */ + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + code = 0x0131; + r = (*f)(0x0049, &code, 1, arg); + if (r != 0) return r; + code = 0x0049; + r = (*f)(0x0131, &code, 1, arg); + if (r != 0) return r; + + code = 0x0130; + r = (*f)(0x0069, &code, 1, arg); + if (r != 0) return r; + code = 0x0069; + r = (*f)(0x0130, &code, 1, arg); + if (r != 0) return r; + } + else { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11_Locale[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), + 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12[i].to.n; j++) { + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + for (j = 0; j < CaseUnfold_13[i].to.n; j++) { + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_13[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + } + + return 0; +} + +extern int +onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, + OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + int n, i, j, k, len; + OnigCodePoint code, codes[3]; + CodePointList3 *to, *z3; + CodePointList2 *z2; + + if (CaseFoldInited == 0) init_case_fold_table(); + + n = 0; + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p, end); + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0131; + return 1; + } + else if (code == 0x0130) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0069; + return 1; + } + else if (code == 0x0131) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0049; + return 1; + } + else if (code == 0x0069) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0130; + return 1; + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + OnigCodePoint orig_code = code; + + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = to->code[0]; + n++; + + code = to->code[0]; + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + if (to->code[i] != orig_code) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + } + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + OnigCodePoint cs[3][4]; + int fn, ncs[3]; + + for (fn = 0; fn < to->n; fn++) { + cs[fn][0] = to->code[fn]; + if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], + (void* )&z3) != 0) { + for (i = 0; i < z3->n; i++) { + cs[fn][i+1] = z3->code[i]; + } + ncs[fn] = z3->n + 1; + } + else + ncs[fn] = 1; + } + + if (fn == 2) { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + items[n].byte_len = len; + items[n].code_len = 2; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + n++; + } + } + + if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + else { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + for (k = 0; k < ncs[2]; k++) { + items[n].byte_len = len; + items[n].code_len = 3; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + items[n].code[2] = cs[2][k]; + n++; + } + } + } + + if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + + /* multi char folded code is not head of another folded multi char */ + flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ + } + } + else { + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + p += len; + if (p < end) { + int clen; + + codes[0] = code; + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[1] = to->code[0]; + } + else + codes[1] = code; + + clen = enclen(enc, p, end); + len += clen; + if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + + p += clen; + if (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[2] = to->code[0]; + } + else + codes[2] = code; + + clen = enclen(enc, p, end); + len += clen; + if (onig_st_lookup(Unfold3Table, (st_data_t )codes, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + } + } + + return n; +} diff --git a/enc/us_ascii.c b/enc/us_ascii.c new file mode 100644 index 0000000..2e96dd3 --- /dev/null +++ b/enc/us_ascii.c @@ -0,0 +1,31 @@ +#include "regenc.h" + +static int +us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc) +{ + if (*p & 0x80) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); +} + +OnigEncodingDefine(us_ascii, US_ASCII) = { + us_ascii_mbc_enc_len, + "US-ASCII",/* name */ + 1, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + onigenc_ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("ASCII", "US-ASCII") +ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII") +ENC_ALIAS("646", "US-ASCII") diff --git a/enc/utf_16be.c b/enc/utf_16be.c new file mode 100644 index 0000000..8d7c8e9 --- /dev/null +++ b/enc/utf_16be.c @@ -0,0 +1,258 @@ +/********************************************************************** + utf_16be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) +#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + int byte = p[0]; + if (!UTF16_IS_SURROGATE(byte)) { + if (2 <= e-p) + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2); + else + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + } + if (UTF16_IS_SURROGATE_FIRST(byte)) { + switch (e-p) { + case 1: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(3); + case 2: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2); + case 3: + if (UTF16_IS_SURROGATE_SECOND(p[2])) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + break; + default: + if (UTF16_IS_SURROGATE_SECOND(p[2])) + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + break; + } + } + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static int +utf16be_is_mbc_newline(const UChar* p, const UChar* end, + OnigEncoding enc) +{ + if (p + 1 < end) { + if (*(p+1) == 0x0a && *p == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+1) == 0x0d || +#endif + *(p+1) == 0x85) && *p == 0x00) + return 1; + if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc) +{ + OnigCodePoint code; + + if (UTF16_IS_SURROGATE_FIRST(*p)) { + code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) + + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) + + p[3]; + } + else { + code = p[0] * 256 + p[1]; + } + return code; +} + +static int +utf16be_code_to_mbclen(OnigCodePoint code, + OnigEncoding enc) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16be_code_to_mbc(OnigCodePoint code, UChar *buf, + OnigEncoding enc) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int high = (code >> 10) + 0xD7C0; + unsigned int low = (code & 0x3FF) + 0xDC00; + *p++ = (high >> 8) & 0xFF; + *p++ = high & 0xFF; + *p++ = (low >> 8) & 0xFF; + *p++ = low & 0xFF; + return 4; + } + else { + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )(code & 0xff); + return 2; + } +} + +static int +utf16be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold, + OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) { + p++; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 2; + return 2; + } + } +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *pp += 2; + return 2; + } + else + return onigenc_unicode_mbc_case_fold(enc, flag, + pp, end, fold); +} + +#if 0 +static int +utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*p]; + + if (*p == 0) { + int c, v; + + p++; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf16be_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +static int +utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc) +{ + return onigenc_unicode_get_case_fold_codes_by_str(enc, + flag, p, end, items); +} + +OnigEncodingDefine(utf_16be, UTF_16BE) = { + utf16be_mbc_enc_len, + "UTF-16BE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + utf16be_is_mbc_newline, + utf16be_mbc_to_code, + utf16be_code_to_mbclen, + utf16be_code_to_mbc, + utf16be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf16be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; +ENC_ALIAS("UCS-2BE", "UTF-16BE") diff --git a/enc/utf_16le.c b/enc/utf_16le.c new file mode 100644 index 0000000..c8a1e7a --- /dev/null +++ b/enc/utf_16le.c @@ -0,0 +1,249 @@ +/********************************************************************** + utf_16le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) +#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16le_mbc_enc_len(const UChar* p, const OnigUChar* e, + OnigEncoding enc ARG_UNUSED) +{ + int len = e-p, byte; + if (len < 2) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + byte = p[1]; + if (!UTF16_IS_SURROGATE(byte)) { + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2); + } + if (UTF16_IS_SURROGATE_FIRST(byte)) { + if (len < 4) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-len); + if (UTF16_IS_SURROGATE_SECOND(p[3])) + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static int +utf16le_is_mbc_newline(const UChar* p, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + if (p + 1 < end) { + if (*p == 0x0a && *(p+1) == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) && *(p+1) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + OnigCodePoint code; + UChar c0 = *p; + UChar c1 = *(p+1); + + if (UTF16_IS_SURROGATE_FIRST(c1)) { + code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) + + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) + + p[2]; + } + else { + code = c1 * 256 + p[0]; + } + return code; +} + +static int +utf16le_code_to_mbclen(OnigCodePoint code, + OnigEncoding enc ARG_UNUSED) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16le_code_to_mbc(OnigCodePoint code, UChar *buf, + OnigEncoding enc ARG_UNUSED) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int high = (code >> 10) + 0xD7C0; + unsigned int low = (code & 0x3FF) + 0xDC00; + *p++ = high & 0xFF; + *p++ = (high >> 8) & 0xFF; + *p++ = low & 0xFF; + *p++ = (low >> 8) & 0xFF; + return 4; + } + else { + *p++ = (UChar )(code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + return 2; + } +} + +static int +utf16le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold, + OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold = 0x01; + (*pp) += 2; + return 2; + } + } +#endif + + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold = 0; + *pp += 2; + return 2; + } + else + return onigenc_unicode_mbc_case_fold(enc, flag, pp, + end, fold); +} + +#if 0 +static int +utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, + const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*(p+1)]; + + if (*(p+1) == 0) { + int c, v; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf16le_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +static int +utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc) +{ + return onigenc_unicode_get_case_fold_codes_by_str(enc, + flag, p, end, items); +} + +OnigEncodingDefine(utf_16le, UTF_16LE) = { + utf16le_mbc_enc_len, + "UTF-16LE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + utf16le_is_mbc_newline, + utf16le_mbc_to_code, + utf16le_code_to_mbclen, + utf16le_code_to_mbc, + utf16le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf16le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/enc/utf_32be.c b/enc/utf_32be.c new file mode 100644 index 0000000..61e7d0f --- /dev/null +++ b/enc/utf_32be.c @@ -0,0 +1,195 @@ +/********************************************************************** + utf_32be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return 4; +} + +static int +utf32be_is_mbc_newline(const UChar* p, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + if (p + 3 < end) { + if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+3) == 0x0d || +#endif + *(p+3) == 0x85) + && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) + return 1; + if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) + && *(p+1) == 0 && *p == 0) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); +} + +static int +utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return 4; +} + +static int +utf32be_code_to_mbc(OnigCodePoint code, UChar *buf, + OnigEncoding enc ARG_UNUSED) +{ + UChar* p = buf; + + *p++ = (UChar )((code & 0xff000000) >>24); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar ) (code & 0xff); + return 4; +} + +static int +utf32be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold, + OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) { + *fold++ = 0; + *fold++ = 0; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*(p+3) == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 4; + return 4; + } + } +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3)); + *pp += 4; + return 4; + } + else + return onigenc_unicode_mbc_case_fold(enc, flag, pp, + end, fold); +} + +#if 0 +static int +utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { + int c, v; + + p += 3; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf32be_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +static int +utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc) +{ + return onigenc_unicode_get_case_fold_codes_by_str(enc, + flag, p, end, items); +} + +OnigEncodingDefine(utf_32be, UTF_32BE) = { + utf32be_mbc_enc_len, + "UTF-32BE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + utf32be_is_mbc_newline, + utf32be_mbc_to_code, + utf32be_code_to_mbclen, + utf32be_code_to_mbc, + utf32be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf32be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; +ENC_ALIAS("UCS-4BE", "UTF-32BE") + diff --git a/enc/utf_32le.c b/enc/utf_32le.c new file mode 100644 index 0000000..3a0a41b --- /dev/null +++ b/enc/utf_32le.c @@ -0,0 +1,194 @@ +/********************************************************************** + utf_32le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return 4; +} + +static int +utf32le_is_mbc_newline(const UChar* p, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + if (p + 3 < end) { + if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) + && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) + && *(p+2) == 0x00 && *(p+3) == 0x00) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); +} + +static int +utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return 4; +} + +static int +utf32le_code_to_mbc(OnigCodePoint code, UChar *buf, + OnigEncoding enc ARG_UNUSED) +{ + UChar* p = buf; + + *p++ = (UChar ) (code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff000000) >>24); + return 4; +} + +static int +utf32le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold, + OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold++ = 0x01; + } + } + else { +#endif + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold++ = 0; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + *fold++ = 0; + *fold = 0; + *pp += 4; + return 4; + } + else + return onigenc_unicode_mbc_case_fold(enc, flag, pp, + end, fold); +} + +#if 0 +static int +utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { + int c, v; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf32le_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +static int +utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc) +{ + return onigenc_unicode_get_case_fold_codes_by_str(enc, + flag, p, end, items); +} + +OnigEncodingDefine(utf_32le, UTF_32LE) = { + utf32le_mbc_enc_len, + "UTF-32LE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + utf32le_is_mbc_newline, + utf32le_mbc_to_code, + utf32le_code_to_mbclen, + utf32le_code_to_mbc, + utf32le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf32le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; +ENC_ALIAS("UCS-4LE", "UTF-32LE") diff --git a/enc/utf_7.h b/enc/utf_7.h new file mode 100644 index 0000000..fa9f06b --- /dev/null +++ b/enc/utf_7.h @@ -0,0 +1,5 @@ +#include "regenc.h" +/* dummy for unsupported, statefull encoding */ +ENC_DUMMY("UTF-7"); +ENC_ALIAS("CP65000", "UTF-7"); + diff --git a/enc/utf_8.c b/enc/utf_8.c new file mode 100644 index 0000000..3b9387c --- /dev/null +++ b/enc/utf_8.c @@ -0,0 +1,455 @@ +/********************************************************************** + utf_8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define USE_INVALID_CODE_SCHEME + +#ifdef USE_INVALID_CODE_SCHEME +/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ +#define INVALID_CODE_FE 0xfffffffe +#define INVALID_CODE_FF 0xffffffff +#define VALID_CODE_LIMIT 0x7fffffff +#endif + +#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) + +static const int EncLen_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +typedef enum { + FAILURE = -2, + ACCEPT, + S0, S1, S2, S3, + S4, S5, S6, S7 +} state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, + /* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, + { /* S7 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + }, +}; +#undef A +#undef F + +static int +mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) +{ + int firstbyte = *p++; + state_t s; + s = trans[0][firstbyte]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-1); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3); + s = trans[s][*p++]; + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); +} + +static int +is_mbc_newline(const UChar* p, const UChar* end, OnigEncoding enc) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS +#ifndef USE_CRNL_AS_LINE_TERMINATOR + if (*p == 0x0d) return 1; +#endif + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) +{ + int c, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + c = *p++; + if (len > 1) { + len--; + n = c & ((1 << (6 - len)) - 1); + while (len--) { + c = *p++; + n = (n << 6) | (c & ((1 << 6) - 1)); + } + return n; + } + else { +#ifdef USE_INVALID_CODE_SCHEME + if (c > 0xfd) { + return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); + } +#endif + return (OnigCodePoint )c; + } +} + +static int +code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if ((code & 0xffffff80) == 0) return 1; + else if ((code & 0xfffff800) == 0) return 2; + else if ((code & 0xffff0000) == 0) return 3; + else if ((code & 0xffe00000) == 0) return 4; + else if ((code & 0xfc000000) == 0) return 5; + else if ((code & 0x80000000) == 0) return 6; +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) return 1; + else if (code == INVALID_CODE_FF) return 1; +#endif + else + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) +{ +#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) +#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) + + if ((code & 0xffffff80) == 0) { + *buf = (UChar )code; + return 1; + } + else { + UChar *p = buf; + + if ((code & 0xfffff800) == 0) { + *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); + } + else if ((code & 0xffff0000) == 0) { + *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xffe00000) == 0) { + *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xfc000000) == 0) { + *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0x80000000) == 0) { + *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); + *p++ = UTF8_TRAILS(code, 24); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) { + *p = 0xfe; + return 1; + } + else if (code == INVALID_CODE_FF) { + *p = 0xff; + return 1; + } +#endif + else { + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + } + + *p++ = UTF8_TRAIL0(code); + return p - buf; + } +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, UChar* fold, OnigEncoding enc) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0xc4; + *fold = 0xb1; + (*pp)++; + return 2; + } + } +#endif + + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ + } + else { + return onigenc_unicode_mbc_case_fold(enc, flag, pp, end, fold); + } +} + + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, + const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) +{ + *sb_out = 0x80; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!utf8_islead(*p) && p > start) p--; + return (UChar* )p; +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], + OnigEncoding enc) +{ + return onigenc_unicode_get_case_fold_codes_by_str(enc, flag, p, end, items); +} + +OnigEncodingDefine(utf_8, UTF_8) = { + mbc_enc_len, + "UTF-8", /* name */ + 6, /* max byte length */ + 1, /* min byte length */ + is_mbc_newline, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +ENC_ALIAS("CP65001", "UTF-8") + +/* + * Name: UTF8-MAC + * Link: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/BPFileSystem.html + * Link: http://developer.apple.com/qa/qa2001/qa1235.html + * Link: http://developer.apple.com/jp/qa/qa2001/qa1235.html + */ +ENC_REPLICATE("UTF8-MAC", "UTF-8") +ENC_ALIAS("UTF-8-MAC", "UTF8-MAC") + diff --git a/enc/windows_1251.c b/enc/windows_1251.c new file mode 100644 index 0000000..1cb4da4 --- /dev/null +++ b/enc/windows_1251.c @@ -0,0 +1,208 @@ +/********************************************************************** + cp1251.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2006-2007 Byte + * K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c] +#define ENC_IS_CP1251_CTYPE(code,ctype) \ + ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncCP1251_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncCP1251_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, + 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2, + 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0, + 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + const UChar* p = *pp; + + *lower = ENC_CP1251_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +static int +cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED) +{ + if (code < 256) + return ENC_IS_CP1251_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb8, 0xa8 }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + +static int +cp1251_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc ARG_UNUSED) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingDefine(windows_1251, Windows_1251) = { + onigenc_single_byte_mbc_enc_len, + "Windows-1251", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + cp1251_mbc_case_fold, + cp1251_apply_all_case_fold, + cp1251_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + cp1251_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +/* + * Name: windows-1251 + * MIBenum: 2251 + * Link: http://www.iana.org/assignments/character-sets + * Link: http://www.microsoft.com/globaldev/reference/sbcs/1251.mspx + * Link: http://en.wikipedia.org/wiki/Windows-1251 + */ +ENC_ALIAS("CP1251", "Windows-1251") diff --git a/encoding.c b/encoding.c new file mode 100644 index 0000000..8a2a5e8 --- /dev/null +++ b/encoding.c @@ -0,0 +1,1441 @@ +/********************************************************************** + + encoding.c - + + $Author: yugui $ + created at: Thu May 24 17:23:27 JST 2007 + + Copyright (C) 2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" +#include "regenc.h" +#include +#ifndef NO_LOCALE_CHARMAP +#ifdef __CYGWIN__ +#include +#endif +#ifdef HAVE_LANGINFO_H +#include +#endif +#endif +#include "ruby/util.h" + +static ID id_encoding; +VALUE rb_cEncoding; +static VALUE rb_encoding_list; + +struct rb_encoding_entry { + const char *name; + rb_encoding *enc; + rb_encoding *base; +}; + +static struct { + struct rb_encoding_entry *list; + int count; + int size; + st_table *names; +} enc_table; + +void rb_enc_init(void); + +#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX +#define UNSPECIFIED_ENCODING INT_MAX + +#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc)) + +static int load_encoding(const char *name); + +static void +enc_mark(void *ptr) +{ +} + +static VALUE +enc_new(rb_encoding *encoding) +{ + return Data_Wrap_Struct(rb_cEncoding, enc_mark, 0, encoding); +} + +VALUE +rb_enc_from_encoding(rb_encoding *encoding) +{ + VALUE list, enc; + int idx; + + if (!encoding) return Qnil; + idx = ENC_TO_ENCINDEX(encoding); + if (!(list = rb_encoding_list)) { + rb_bug("rb_enc_from_encoding(%d\"%s\"): no rb_encoding_list", + idx, rb_enc_name(encoding)); + } + enc = rb_ary_entry(list, idx); + if (NIL_P(enc)) { + rb_bug("rb_enc_from_encoding(%d\"%s\"): not created yet", + idx, rb_enc_name(encoding)); + } + return enc; +} + +static int enc_autoload(rb_encoding *); + +static int +check_encoding(rb_encoding *enc) +{ + int index = rb_enc_to_index(enc); + if (rb_enc_from_index(index) != enc) + return -1; + if (enc_autoload_p(enc)) { + index = enc_autoload(enc); + } + return index; +} + +static int +enc_check_encoding(VALUE obj) +{ + if (SPECIAL_CONST_P(obj) || BUILTIN_TYPE(obj) != T_DATA || + RDATA(obj)->dmark != enc_mark) { + return -1; + } + return check_encoding(RDATA(obj)->data); +} + +static int +must_encoding(VALUE enc) +{ + int index = enc_check_encoding(enc); + if (index < 0) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)", + rb_obj_classname(enc)); + } + return index; +} + +int +rb_to_encoding_index(VALUE enc) +{ + int idx; + + idx = enc_check_encoding(enc); + if (idx >= 0) { + return idx; + } + else if (NIL_P(enc = rb_check_string_type(enc))) { + return -1; + } + if (!rb_enc_asciicompat(rb_enc_get(enc))) { + return -1; + } + return rb_enc_find_index(StringValueCStr(enc)); +} + +static rb_encoding * +to_encoding(VALUE enc) +{ + int idx; + + StringValue(enc); + if (!rb_enc_asciicompat(rb_enc_get(enc))) { + rb_raise(rb_eArgError, "invalid name encoding (non ASCII)"); + } + idx = rb_enc_find_index(StringValueCStr(enc)); + if (idx < 0) { + rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); + } + return rb_enc_from_index(idx); +} + +rb_encoding * +rb_to_encoding(VALUE enc) +{ + if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data; + return to_encoding(enc); +} + +void +rb_gc_mark_encodings(void) +{ +} + +static int +enc_table_expand(int newsize) +{ + struct rb_encoding_entry *ent; + int count = newsize; + + if (enc_table.size >= newsize) return newsize; + newsize = (newsize + 7) / 8 * 8; + ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize); + if (!ent) return -1; + memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size)); + enc_table.list = ent; + enc_table.size = newsize; + return count; +} + +static int +enc_register_at(int index, const char *name, rb_encoding *encoding) +{ + struct rb_encoding_entry *ent = &enc_table.list[index]; + VALUE list; + + if (!ent->name) { + ent->name = name = strdup(name); + } + else if (STRCASECMP(name, ent->name)) { + return -1; + } + if (!ent->enc) { + ent->enc = xmalloc(sizeof(rb_encoding)); + } + if (encoding) { + *ent->enc = *encoding; + } + else { + memset(ent->enc, 0, sizeof(*ent->enc)); + } + encoding = ent->enc; + encoding->name = name; + encoding->ruby_encoding_index = index; + st_insert(enc_table.names, (st_data_t)name, (st_data_t)index); + list = rb_encoding_list; + if (list && NIL_P(rb_ary_entry(list, index))) { + /* initialize encoding data */ + rb_ary_store(list, index, enc_new(encoding)); + } + return index; +} + +static int +enc_register(const char *name, rb_encoding *encoding) +{ + int index = enc_table.count; + + if ((index = enc_table_expand(index + 1)) < 0) return -1; + enc_table.count = index; + return enc_register_at(index - 1, name, encoding); +} + +static void set_encoding_const(const char *, rb_encoding *); +int rb_enc_registered(const char *name); + +int +rb_enc_register(const char *name, rb_encoding *encoding) +{ + int index = rb_enc_registered(name); + + if (index >= 0) { + rb_encoding *oldenc = rb_enc_from_index(index); + if (STRCASECMP(name, rb_enc_name(oldenc))) { + index = enc_register(name, encoding); + } + else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) { + enc_register_at(index, name, encoding); + } + else { + rb_raise(rb_eArgError, "encoding %s is already registered", name); + } + } + else { + index = enc_register(name, encoding); + set_encoding_const(name, rb_enc_from_index(index)); + } + return index; +} + +void +rb_encdb_declare(const char *name) +{ + int idx = rb_enc_registered(name); + if (idx < 0) { + idx = enc_register(name, 0); + } + set_encoding_const(name, rb_enc_from_index(idx)); +} + +static void +enc_check_duplication(const char *name) +{ + if (rb_enc_registered(name) >= 0) { + rb_raise(rb_eArgError, "encoding %s is already registered", name); + } +} + +static rb_encoding* +set_base_encoding(int index, rb_encoding *base) +{ + rb_encoding *enc = enc_table.list[index].enc; + + enc_table.list[index].base = base; + if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc); + return enc; +} + +int +rb_enc_replicate(const char *name, rb_encoding *encoding) +{ + int idx; + + enc_check_duplication(name); + idx = enc_register(name, encoding); + set_base_encoding(idx, encoding); + set_encoding_const(name, rb_enc_from_index(idx)); + return idx; +} + +static int +enc_replicate(int idx, const char *name, rb_encoding *origenc) +{ + if (idx < 0) { + idx = enc_register(name, origenc); + } + else { + idx = enc_register_at(idx, name, origenc); + } + if (idx >= 0) { + set_base_encoding(idx, origenc); + set_encoding_const(name, rb_enc_from_index(idx)); + } + return idx; +} + +int +rb_encdb_replicate(const char *name, const char *orig) +{ + int origidx = rb_enc_registered(orig); + int idx = rb_enc_registered(name); + + if (origidx < 0) { + origidx = enc_register(orig, 0); + } + return enc_replicate(idx, name, rb_enc_from_index(origidx)); +} + +int +rb_define_dummy_encoding(const char *name) +{ + int index = rb_enc_replicate(name, rb_ascii8bit_encoding()); + rb_encoding *enc = enc_table.list[index].enc; + + ENC_SET_DUMMY(enc); + return index; +} + +int +rb_encdb_dummy(const char *name) +{ + int index = enc_replicate(rb_enc_registered(name), name, + rb_ascii8bit_encoding()); + rb_encoding *enc = enc_table.list[index].enc; + + ENC_SET_DUMMY(enc); + return index; +} + +/* + * call-seq: + * enc.dummy? => true or false + * + * Returns true for dummy encodings. + * A dummy encoding is an encoding for which character handling is not properly + * implemented. + * It is used for stateful encodings. + * + * Encoding::ISO_2022_JP.dummy? #=> true + * Encoding::UTF_8.dummy? #=> false + * + */ +static VALUE +enc_dummy_p(VALUE enc) +{ + return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse; +} + +static const char * +enc_alias_internal(const char *alias, int idx) +{ + alias = strdup(alias); + st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx); + return alias; +} + +static int +enc_alias(const char *alias, int idx) +{ + alias = enc_alias_internal(alias, idx); + set_encoding_const(alias, rb_enc_from_index(idx)); + return idx; +} + +int +rb_enc_alias(const char *alias, const char *orig) +{ + int idx; + + enc_check_duplication(alias); + if (!enc_table.list) { + rb_enc_init(); + } + if ((idx = rb_enc_find_index(orig)) < 0) { + return -1; + } + return enc_alias(alias, idx); +} + +int +rb_encdb_alias(const char *alias, const char *orig) +{ + int idx = rb_enc_registered(orig); + + if (idx < 0) { + idx = enc_register(orig, 0); + } + return enc_alias(alias, idx); +} + +enum { + ENCINDEX_ASCII, + ENCINDEX_UTF_8, + ENCINDEX_US_ASCII, + ENCINDEX_BUILTIN_MAX +}; + +extern rb_encoding OnigEncodingUTF_8; +extern rb_encoding OnigEncodingUS_ASCII; + +void +rb_enc_init(void) +{ + enc_table_expand(ENCODING_COUNT + 1); + if (!enc_table.names) { + enc_table.names = st_init_strcasetable(); + } +#define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc) + ENC_REGISTER(ASCII); + ENC_REGISTER(UTF_8); + ENC_REGISTER(US_ASCII); +#undef ENC_REGISTER + enc_table.count = ENCINDEX_BUILTIN_MAX; +} + +rb_encoding * +rb_enc_from_index(int index) +{ + if (!enc_table.list) { + rb_enc_init(); + } + if (index < 0 || enc_table.count <= index) { + return 0; + } + return enc_table.list[index].enc; +} + +int +rb_enc_registered(const char *name) +{ + st_data_t idx = 0; + + if (!name) return -1; + if (!enc_table.list) return -1; + if (st_lookup(enc_table.names, (st_data_t)name, &idx)) { + return (int)idx; + } + return -1; +} + +static VALUE +require_enc(VALUE enclib) +{ + return rb_require_safe(enclib, rb_safe_level()); +} + +static int +load_encoding(const char *name) +{ + VALUE enclib = rb_sprintf("enc/%s.so", name); + VALUE verbose = ruby_verbose; + VALUE debug = ruby_debug; + VALUE loaded; + char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3; + int idx; + + while (s < e) { + if (!ISALNUM(*s)) *s = '_'; + else if (ISUPPER(*s)) *s = TOLOWER(*s); + ++s; + } + OBJ_FREEZE(enclib); + ruby_verbose = Qfalse; + ruby_debug = Qfalse; + loaded = rb_protect(require_enc, enclib, 0); + ruby_verbose = verbose; + ruby_debug = debug; + rb_set_errinfo(Qnil); + if (NIL_P(loaded)) return -1; + if ((idx = rb_enc_registered(name)) < 0) return -1; + if (enc_autoload_p(enc_table.list[idx].enc)) return -1; + return idx; +} + +static int +enc_autoload(rb_encoding *enc) +{ + int i; + rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base; + + if (base) { + i = 0; + do { + if (i >= enc_table.count) return -1; + } while (enc_table.list[i].enc != base && (++i, 1)); + if (enc_autoload_p(base)) { + if (enc_autoload(base) < 0) return -1; + } + i = ENC_TO_ENCINDEX(enc); + enc_register_at(i, rb_enc_name(enc), base); + } + else { + i = load_encoding(rb_enc_name(enc)); + } + return i; +} + +int +rb_enc_find_index(const char *name) +{ + int i = rb_enc_registered(name); + rb_encoding *enc; + + if (i < 0) { + i = load_encoding(name); + } + else if (!(enc = rb_enc_from_index(i))) { + if (i != UNSPECIFIED_ENCODING) { + rb_raise(rb_eArgError, "encoding %s is not registered", name); + } + } + else if (enc_autoload_p(enc)) { + if (enc_autoload(enc) < 0) { + rb_warn("failed to load encoding (%s); use ASCII-8BIT instead", + name); + return 0; + } + } + return i; +} + +rb_encoding * +rb_enc_find(const char *name) +{ + int idx = rb_enc_find_index(name); + if (idx < 0) idx = 0; + return rb_enc_from_index(idx); +} + +static inline int +enc_capable(VALUE obj) +{ + if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj); + switch (BUILTIN_TYPE(obj)) { + case T_STRING: + case T_REGEXP: + case T_FILE: + return Qtrue; + case T_DATA: + if (RDATA(obj)->dmark == enc_mark) return Qtrue; + default: + return Qfalse; + } +} + +ID +rb_id_encoding(void) +{ + CONST_ID(id_encoding, "encoding"); + return id_encoding; +} + +int +rb_enc_get_index(VALUE obj) +{ + int i = -1; + VALUE tmp; + + if (SPECIAL_CONST_P(obj)) { + if (!SYMBOL_P(obj)) return -1; + obj = rb_id2str(SYM2ID(obj)); + } + switch (BUILTIN_TYPE(obj)) { + as_default: + default: + case T_STRING: + case T_REGEXP: + i = ENCODING_GET_INLINED(obj); + if (i == ENCODING_INLINE_MAX) { + VALUE iv; + + iv = rb_ivar_get(obj, rb_id_encoding()); + i = NUM2INT(iv); + } + break; + case T_FILE: + tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0); + if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0); + else obj = tmp; + if (NIL_P(obj)) break; + case T_DATA: + if (RDATA(obj)->dmark == enc_mark) { + i = enc_check_encoding(obj); + } + else { + goto as_default; + } + break; + } + return i; +} + +void +rb_enc_set_index(VALUE obj, int idx) +{ + if (idx < ENCODING_INLINE_MAX) { + ENCODING_SET_INLINED(obj, idx); + return; + } + ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); + rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); + return; +} + +VALUE +rb_enc_associate_index(VALUE obj, int idx) +{ +/* enc_check_capable(obj);*/ + if (rb_enc_get_index(obj) == idx) + return obj; + if (SPECIAL_CONST_P(obj)) { + rb_raise(rb_eArgError, "cannot set encoding"); + } + if (!ENC_CODERANGE_ASCIIONLY(obj) || + !rb_enc_asciicompat(rb_enc_from_index(idx))) { + ENC_CODERANGE_CLEAR(obj); + } + rb_enc_set_index(obj, idx); + return obj; +} + +VALUE +rb_enc_associate(VALUE obj, rb_encoding *enc) +{ + return rb_enc_associate_index(obj, rb_enc_to_index(enc)); +} + +rb_encoding* +rb_enc_get(VALUE obj) +{ + return rb_enc_from_index(rb_enc_get_index(obj)); +} + +rb_encoding* +rb_enc_check(VALUE str1, VALUE str2) +{ + rb_encoding *enc = rb_enc_compatible(str1, str2); + if (!enc) + rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s", + rb_enc_name(rb_enc_get(str1)), + rb_enc_name(rb_enc_get(str2))); + return enc; +} + +rb_encoding* +rb_enc_compatible(VALUE str1, VALUE str2) +{ + int idx1, idx2; + rb_encoding *enc1, *enc2; + + idx1 = rb_enc_get_index(str1); + idx2 = rb_enc_get_index(str2); + + if (idx1 < 0 || idx2 < 0) + return 0; + + if (idx1 == idx2) { + return rb_enc_from_index(idx1); + } + enc1 = rb_enc_from_index(idx1); + enc2 = rb_enc_from_index(idx2); + + if (TYPE(str2) == T_STRING && RSTRING_LEN(str2) == 0) + return enc1; + if (TYPE(str1) == T_STRING && RSTRING_LEN(str1) == 0) + return enc2; + if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) { + return 0; + } + + /* objects whose encoding is the same of contents */ + if (BUILTIN_TYPE(str2) != T_STRING && idx2 == ENCINDEX_US_ASCII) + return enc1; + if (BUILTIN_TYPE(str1) != T_STRING && idx1 == ENCINDEX_US_ASCII) + return enc2; + + if (BUILTIN_TYPE(str1) != T_STRING) { + VALUE tmp = str1; + int idx0 = idx1; + str1 = str2; + str2 = tmp; + idx1 = idx2; + idx2 = idx0; + } + if (BUILTIN_TYPE(str1) == T_STRING) { + int cr1, cr2; + + cr1 = rb_enc_str_coderange(str1); + if (BUILTIN_TYPE(str2) == T_STRING) { + cr2 = rb_enc_str_coderange(str2); + if (cr1 != cr2) { + /* may need to handle ENC_CODERANGE_BROKEN */ + if (cr1 == ENC_CODERANGE_7BIT) return enc2; + if (cr2 == ENC_CODERANGE_7BIT) return enc1; + } + if (cr2 == ENC_CODERANGE_7BIT) { + if (idx1 == ENCINDEX_ASCII) return enc2; + return enc1; + } + } + if (cr1 == ENC_CODERANGE_7BIT) + return enc2; + } + return 0; +} + +void +rb_enc_copy(VALUE obj1, VALUE obj2) +{ + rb_enc_associate_index(obj1, rb_enc_get_index(obj2)); +} + + +/* + * call-seq: + * obj.encoding => encoding + * + * Returns the Encoding object that represents the encoding of obj. + */ + +VALUE +rb_obj_encoding(VALUE obj) +{ + rb_encoding *enc = rb_enc_get(obj); + if (!enc) { + rb_raise(rb_eTypeError, "unknown encoding"); + } + return rb_enc_from_encoding(enc); +} + +int +rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) +{ + int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); + if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) + return MBCLEN_CHARFOUND_LEN(n); + else { + int min = rb_enc_mbminlen(enc); + return min <= e-p ? min : e-p; + } +} + +int +rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc) +{ + int n; + if (e <= p) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); + if (e-p < n) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(e-p)); + return n; +} + +int +rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc) +{ + unsigned int c, l; + if (e <= p) + return -1; + if (rb_enc_asciicompat(enc)) { + c = (unsigned char)*p; + if (!ISASCII(c)) + return -1; + if (len) *len = 1; + return c; + } + l = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(l)) + return -1; + c = rb_enc_mbc_to_codepoint(p, e, enc); + if (!rb_enc_isascii(c, enc)) + return -1; + if (len) *len = l; + return c; +} + +unsigned int +rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc) +{ + int r; + if (e <= p) + rb_raise(rb_eArgError, "empty string"); + r = rb_enc_precise_mbclen(p, e, enc); + if (MBCLEN_CHARFOUND_P(r)) + return rb_enc_mbc_to_codepoint(p, e, enc); + else + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); +} + +int +rb_enc_codelen(int c, rb_encoding *enc) +{ + int n = ONIGENC_CODE_TO_MBCLEN(enc,c); + if (n == 0) { + rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc)); + } + return n; +} + +int +rb_enc_toupper(int c, rb_encoding *enc) +{ + return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c)); +} + +int +rb_enc_tolower(int c, rb_encoding *enc) +{ + return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c)); +} + +/* + * call-seq: + * enc.inspect => string + * + * Returns a string which represents the encoding for programmers. + * + * Encoding::UTF_8.inspect #=> "#" + * Encoding::ISO_2022_JP.inspect #=> "#" + */ +static VALUE +enc_inspect(VALUE self) +{ + VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), + rb_enc_name((rb_encoding*)DATA_PTR(self)), + (enc_dummy_p(self) ? " (dummy)" : "")); + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + +/* + * call-seq: + * enc.name => string + * + * Returns the name of the encoding. + * + * Encoding::UTF_8.name => "UTF-8" + */ +static VALUE +enc_name(VALUE self) +{ + return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self))); +} + +static int +enc_names_i(st_data_t name, st_data_t idx, st_data_t args) +{ + VALUE *arg = (VALUE *)args; + + if ((int)idx == (int)arg[0]) { + VALUE str = rb_usascii_str_new2((char *)name); + OBJ_FREEZE(str); + rb_ary_push(arg[1], str); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * enc.names => array + * + * Returns the list of name and aliases of the encoding. + * + * Encoding::WINDOWS_31J.names => ["Windows-31J", "CP932", "csWindows31J"] + */ +static VALUE +enc_names(VALUE self) +{ + VALUE args[2]; + + args[0] = (VALUE)rb_to_encoding_index(self); + args[1] = rb_ary_new2(0); + st_foreach(enc_table.names, enc_names_i, (st_data_t)args); + return args[1]; +} + +/* + * call-seq: + * Encoding.list => [enc1, enc2, ...] + * + * Returns the list of loaded encodings. + * + * Encoding.list + * => [#, #, + * #] + * + * Encoding.find("US-ASCII") + * => # + * + * Encoding.list + * => [#, #, + * #, #] + * + */ +static VALUE +enc_list(VALUE klass) +{ + VALUE ary = rb_ary_new2(0); + rb_ary_replace(ary, rb_encoding_list); + return ary; +} + +/* + * call-seq: + * Encoding.find(string) => enc + * Encoding.find(symbol) => enc + * + * Search the encoding with specified name. + * name should be a string or symbol. + * + * Encoding.find("US-ASCII") => # + * Encoding.find(:Shift_JIS) => # + * + * An ArgumentError is raised when no encoding with name. + * Only +Encoding.find("internal")+ however returns nil when no encoding named "internal", + * in other words, when Ruby has no default internal encoding. + */ +static VALUE +enc_find(VALUE klass, VALUE enc) +{ + return rb_enc_from_encoding(to_encoding(enc)); +} + +/* + * call-seq: + * Encoding.compatible?(str1, str2) => enc or nil + * + * Checks the compatibility of two strings. + * If they are compatible, means concatenatable, + * returns an encoding which the concatinated string will be. + * If they are not compatible, nil is returned. + * + * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b") + * => # + * + * Encoding.compatible?( + * "\xa1".force_encoding("iso-8859-1"), + * "\xa1\xa1".force_encoding("euc-jp")) + * => nil + * + */ +static VALUE +enc_compatible_p(VALUE klass, VALUE str1, VALUE str2) +{ + rb_encoding *enc; + + if (!enc_capable(str1)) return Qnil; + if (!enc_capable(str2)) return Qnil; + enc = rb_enc_compatible(str1, str2); + if (!enc) return Qnil; + return rb_enc_from_encoding(enc); +} + +/* :nodoc: */ +static VALUE +enc_dump(int argc, VALUE *argv, VALUE self) +{ + rb_scan_args(argc, argv, "01", 0); + return enc_name(self); +} + +/* :nodoc: */ +static VALUE +enc_load(VALUE klass, VALUE str) +{ + return enc_find(klass, str); +} + +rb_encoding * +rb_ascii8bit_encoding(void) +{ + if (!enc_table.list) { + rb_enc_init(); + } + return enc_table.list[ENCINDEX_ASCII].enc; +} + +int +rb_ascii8bit_encindex(void) +{ + return ENCINDEX_ASCII; +} + +rb_encoding * +rb_utf8_encoding(void) +{ + if (!enc_table.list) { + rb_enc_init(); + } + return enc_table.list[ENCINDEX_UTF_8].enc; +} + +int +rb_utf8_encindex(void) +{ + return ENCINDEX_UTF_8; +} + +rb_encoding * +rb_usascii_encoding(void) +{ + if (!enc_table.list) { + rb_enc_init(); + } + return enc_table.list[ENCINDEX_US_ASCII].enc; +} + +int +rb_usascii_encindex(void) +{ + return ENCINDEX_US_ASCII; +} + +rb_encoding * +rb_locale_encoding(void) +{ + VALUE charmap = rb_locale_charmap(rb_cEncoding); + int idx; + + if (NIL_P(charmap)) + idx = rb_usascii_encindex(); + else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0) + idx = rb_ascii8bit_encindex(); + + if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx); + + return rb_enc_from_index(idx); +} + +rb_encoding * +rb_filesystem_encoding(void) +{ + rb_encoding *enc; +#if defined NO_LOCALE_CHARMAP + enc = rb_default_external_encoding(); +#elif defined _WIN32 || defined __CYGWIN__ + char cp[sizeof(int) * 8 / 3 + 4]; + snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP()); + enc = rb_enc_find(cp); +#elif defined __APPLE__ + enc = rb_enc_find("UTF8-MAC"); +#else + enc = rb_default_external_encoding(); +#endif + return enc; +} + +struct default_encoding { + int index; /* -2 => not yet set, -1 => nil */ + rb_encoding *enc; +}; + +static int +enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name) +{ + int overridden = Qfalse; + if (def->index != -2) + /* Already set */ + overridden = Qtrue; + + if (NIL_P(encoding)) { + def->index = -1; + def->enc = 0; + st_insert(enc_table.names, (st_data_t)strdup(name), + (st_data_t)UNSPECIFIED_ENCODING); + } + else { + def->index = rb_enc_to_index(rb_to_encoding(encoding)); + def->enc = 0; + enc_alias_internal(name, def->index); + } + + return overridden; +} + +static struct default_encoding default_external = {0}; + +rb_encoding * +rb_default_external_encoding(void) +{ + if (default_external.enc) return default_external.enc; + + if (default_external.index >= 0) { + default_external.enc = rb_enc_from_index(default_external.index); + return default_external.enc; + } + else { + return rb_locale_encoding(); + } +} + +VALUE +rb_enc_default_external(void) +{ + return rb_enc_from_encoding(rb_default_external_encoding()); +} + +/* + * call-seq: + * Encoding.default_external => enc + * + * Returns default external encoding. + * + * It is initialized by the locale or -E option. + */ +static VALUE +get_default_external(VALUE klass) +{ + return rb_enc_default_external(); +} + +void +rb_enc_set_default_external(VALUE encoding) +{ + if (NIL_P(encoding)) { + rb_raise(rb_eArgError, "default external can not be nil"); + } + enc_set_default_encoding(&default_external, encoding, + "external"); +} + +/* + * call-seq: + * Encoding.default_external = enc + * + * Sets default external encoding. + */ +static VALUE +set_default_external(VALUE klass, VALUE encoding) +{ + rb_warning("setting Encoding.default_external"); + rb_enc_set_default_external(encoding); + return encoding; +} + +static struct default_encoding default_internal = {-2}; + +rb_encoding * +rb_default_internal_encoding(void) +{ + if (!default_internal.enc && default_internal.index >= 0) { + default_internal.enc = rb_enc_from_index(default_internal.index); + } + return default_internal.enc; /* can be NULL */ +} + +VALUE +rb_enc_default_internal(void) +{ + /* Note: These functions cope with default_internal not being set */ + return rb_enc_from_encoding(rb_default_internal_encoding()); +} + +/* + * call-seq: + * Encoding.default_internal => enc + * + * Returns default internal encoding. + * + * It is initialized by the source internal_encoding or -E option. + */ +static VALUE +get_default_internal(VALUE klass) +{ + return rb_enc_default_internal(); +} + +void +rb_enc_set_default_internal(VALUE encoding) +{ + enc_set_default_encoding(&default_internal, encoding, + "internal"); +} + +/* + * call-seq: + * Encoding.default_internal = enc or nil + * + * Sets default internal encoding. + * Or removes default internal encoding when passed nil. + */ +static VALUE +set_default_internal(VALUE klass, VALUE encoding) +{ + rb_warning("setting Encoding.default_internal"); + rb_enc_set_default_internal(encoding); + return encoding; +} + +/* + * call-seq: + * Encoding.locale_charmap => string + * + * Returns the locale charmap name. + * + * Debian GNU/Linux + * LANG=C + * Encoding.locale_charmap => "ANSI_X3.4-1968" + * LANG=ja_JP.EUC-JP + * Encoding.locale_charmap => "EUC-JP" + * + * SunOS 5 + * LANG=C + * Encoding.locale_charmap => "646" + * LANG=ja + * Encoding.locale_charmap => "eucJP" + * + * The result is higly platform dependent. + * So Encoding.find(Encoding.locale_charmap) may cause an error. + * If you need some encoding object even for unknown locale, + * Encoding.find("locale") can be used. + * + */ +VALUE +rb_locale_charmap(VALUE klass) +{ +#if defined NO_LOCALE_CHARMAP + return rb_usascii_str_new2("ASCII-8BIT"); +#elif defined _WIN32 || defined __CYGWIN__ + const char *nl_langinfo_codeset(void); + const char *codeset = nl_langinfo_codeset(); + char cp[sizeof(int) * 3 + 4]; + if (!codeset) { + snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP()); + codeset = cp; + } + return rb_usascii_str_new2(codeset); +#elif defined HAVE_LANGINFO_H + char *codeset; + codeset = nl_langinfo(CODESET); + return rb_usascii_str_new2(codeset); +#else + return Qnil; +#endif +} + +static void +set_encoding_const(const char *name, rb_encoding *enc) +{ + VALUE encoding = rb_enc_from_encoding(enc); + char *s = (char *)name; + int haslower = 0, hasupper = 0, valid = 0; + + if (ISDIGIT(*s)) return; + if (ISUPPER(*s)) { + hasupper = 1; + while (*++s && (ISALNUM(*s) || *s == '_')) { + if (ISLOWER(*s)) haslower = 1; + } + } + if (!*s) { + valid = 1; + rb_define_const(rb_cEncoding, name, encoding); + } + if (!valid || haslower) { + int len = strlen(name) + 1; + if (!haslower || !hasupper) { + do { + if (ISLOWER(*s)) haslower = 1; + if (ISUPPER(*s)) hasupper = 1; + } while (*++s && (!haslower || !hasupper)); + } + MEMCPY(s = ALLOCA_N(char, len), name, char, len); + name = s; + if (!valid) { + if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); + for (; *s; ++s) { + if (!ISALNUM(*s)) *s = '_'; + } + if (hasupper) { + rb_define_const(rb_cEncoding, name, encoding); + } + } + if (haslower) { + for (s = (char *)name; *s; ++s) { + if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s); + } + rb_define_const(rb_cEncoding, name, encoding); + } + } +} + +static int +rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg) +{ + VALUE ary = (VALUE)arg; + VALUE str = rb_usascii_str_new2((char *)name); + OBJ_FREEZE(str); + rb_ary_push(ary, str); + return ST_CONTINUE; +} + +/* + * call-seq: + * Encoding.name_list => ["enc1", "enc2", ...] + * + * Returns the list of available encoding names. + * + * Encoding.name_list + * => ["US-ASCII", "ASCII-8BIT", "UTF-8", + * "ISO-8859-1", "Shift_JIS", "EUC-JP", + * "Windows-31J", + * "BINARY", "CP932", "eucJP"] + * + */ + +static VALUE +rb_enc_name_list(VALUE klass) +{ + VALUE ary = rb_ary_new2(enc_table.names->num_entries); + st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary); + return ary; +} + +static int +rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg) +{ + VALUE *p = (VALUE *)arg; + VALUE aliases = p[0], ary = p[1]; + int idx = (int)orig; + VALUE key, str = rb_ary_entry(ary, idx); + + if (NIL_P(str)) { + rb_encoding *enc = rb_enc_from_index(idx); + + if (!enc) return ST_CONTINUE; + if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) { + return ST_CONTINUE; + } + str = rb_usascii_str_new2(rb_enc_name(enc)); + OBJ_FREEZE(str); + rb_ary_store(ary, idx, str); + } + key = rb_usascii_str_new2((char *)name); + OBJ_FREEZE(key); + rb_hash_aset(aliases, key, str); + return ST_CONTINUE; +} + +/* + * call-seq: + * Encoding.aliases => {"alias1" => "orig1", "alias2" => "orig2", ...} + * + * Returns the hash of available encoding alias and original encoding name. + * + * Encoding.aliases + * => {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII", + * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"} + * + */ + +static VALUE +rb_enc_aliases(VALUE klass) +{ + VALUE aliases[2]; + aliases[0] = rb_hash_new(); + aliases[1] = rb_ary_new(); + st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases); + return aliases[0]; +} + +void +Init_Encoding(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + VALUE list; + int i; + + rb_cEncoding = rb_define_class("Encoding", rb_cObject); + rb_undef_alloc_func(rb_cEncoding); + rb_define_method(rb_cEncoding, "to_s", enc_name, 0); + rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0); + rb_define_method(rb_cEncoding, "name", enc_name, 0); + rb_define_method(rb_cEncoding, "names", enc_names, 0); + rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0); + rb_define_singleton_method(rb_cEncoding, "list", enc_list, 0); + rb_define_singleton_method(rb_cEncoding, "name_list", rb_enc_name_list, 0); + rb_define_singleton_method(rb_cEncoding, "aliases", rb_enc_aliases, 0); + rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1); + rb_define_singleton_method(rb_cEncoding, "compatible?", enc_compatible_p, 2); + + rb_define_method(rb_cEncoding, "_dump", enc_dump, -1); + rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1); + + rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0); + rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1); + rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0); + rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1); + rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); + + list = rb_ary_new2(enc_table.count); + RBASIC(list)->klass = 0; + rb_encoding_list = list; + rb_gc_register_mark_object(list); + + for (i = 0; i < enc_table.count; ++i) { + rb_ary_push(list, enc_new(enc_table.list[i].enc)); + } +} + +/* locale insensitive functions */ + +#define ctype_test(c, ctype) \ + (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype)) + +int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); } +int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); } +int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); } +int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); } +int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); } +int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); } +int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); } +int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); } +int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); } +int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); } +int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); } +int rb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); } + +int +rb_tolower(int c) +{ + return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c; +} + +int +rb_toupper(int c) +{ + return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c; +} + diff --git a/enum.c b/enum.c new file mode 100644 index 0000000..1021777 --- /dev/null +++ b/enum.c @@ -0,0 +1,1870 @@ +/********************************************************************** + + enum.c - + + $Author: yugui $ + created at: Fri Oct 1 15:15:19 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/util.h" +#include "node.h" + +VALUE rb_mEnumerable; +static ID id_each, id_eqq, id_cmp, id_next, id_size; + +static VALUE +enum_values_pack(int argc, VALUE *argv) +{ + if (argc == 0) return Qnil; + if (argc == 1) return argv[0]; + return rb_ary_new4(argc, argv); +} + +#define ENUM_WANT_SVALUE() do { \ + i = enum_values_pack(argc, argv); \ +} while (0) + +#define enum_yield rb_yield_values2 + +static VALUE +grep_i(VALUE i, VALUE *arg, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (RTEST(rb_funcall(arg[0], id_eqq, 1, i))) { + rb_ary_push(arg[1], i); + } + return Qnil; +} + +static VALUE +grep_iter_i(VALUE i, VALUE *arg, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (RTEST(rb_funcall(arg[0], id_eqq, 1, i))) { + rb_ary_push(arg[1], rb_yield(i)); + } + return Qnil; +} + +/* + * call-seq: + * enum.grep(pattern) => array + * enum.grep(pattern) {| obj | block } => array + * + * Returns an array of every element in enum for which + * Pattern === element. If the optional block is + * supplied, each matching element is passed to it, and the block's + * result is stored in the output array. + * + * (1..100).grep 38..44 #=> [38, 39, 40, 41, 42, 43, 44] + * c = IO.constants + * c.grep(/SEEK/) #=> [:SEEK_SET, :SEEK_CUR, :SEEK_END] + * res = c.grep(/SEEK/) {|v| IO.const_get(v) } + * res #=> [0, 1, 2] + * + */ + +static VALUE +enum_grep(VALUE obj, VALUE pat) +{ + VALUE ary = rb_ary_new(); + VALUE arg[2]; + + arg[0] = pat; + arg[1] = ary; + + rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? grep_iter_i : grep_i, (VALUE)arg); + + return ary; +} + +static VALUE +count_i(VALUE i, VALUE memop, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE*)memop; + + ENUM_WANT_SVALUE(); + + if (rb_equal(i, memo[1])) { + memo[0]++; + } + return Qnil; +} + +static VALUE +count_iter_i(VALUE i, VALUE memop, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE*)memop; + + if (RTEST(enum_yield(argc, argv))) { + memo[0]++; + } + return Qnil; +} + +static VALUE +count_all_i(VALUE i, VALUE memop, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE*)memop; + + memo[0]++; + return Qnil; +} + +/* + * call-seq: + * enum.count => int + * enum.count(item) => int + * enum.count {| obj | block } => int + * + * Returns the number of items in enum, where #size is called + * if it responds to it, otherwise the items are counted through + * enumeration. If an argument is given, counts the number of items + * in enum, for which equals to item. If a block is + * given, counts the number of elements yielding a true value. + * + * ary = [1, 2, 4, 2] + * ary.count # => 4 + * ary.count(2) # => 2 + * ary.count{|x|x%2==0} # => 3 + * + */ + +static VALUE +enum_count(int argc, VALUE *argv, VALUE obj) +{ + VALUE memo[2]; /* [count, condition value] */ + rb_block_call_func *func; + + if (argc == 0) { + if (rb_block_given_p()) { + func = count_iter_i; + } + else { + if (rb_respond_to(obj, id_size)) { + return rb_funcall(obj, id_size, 0, 0); + } + func = count_all_i; + } + } + else { + rb_scan_args(argc, argv, "1", &memo[1]); + if (rb_block_given_p()) { + rb_warn("given block not used"); + } + func = count_i; + } + + memo[0] = 0; + rb_block_call(obj, id_each, 0, 0, func, (VALUE)&memo); + return INT2NUM(memo[0]); +} + +static VALUE +find_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (RTEST(rb_yield(i))) { + *memo = i; + rb_iter_break(); + } + return Qnil; +} + +/* + * call-seq: + * enum.detect(ifnone = nil) {| obj | block } => obj or nil + * enum.find(ifnone = nil) {| obj | block } => obj or nil + * + * Passes each entry in enum to block. Returns the + * first for which block is not false. If no + * object matches, calls ifnone and returns its result when it + * is specified, or returns nil + * + * (1..10).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> nil + * (1..100).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> 35 + * + */ + +static VALUE +enum_find(int argc, VALUE *argv, VALUE obj) +{ + VALUE memo = Qundef; + VALUE if_none; + + rb_scan_args(argc, argv, "01", &if_none); + RETURN_ENUMERATOR(obj, argc, argv); + rb_block_call(obj, id_each, 0, 0, find_i, (VALUE)&memo); + if (memo != Qundef) { + return memo; + } + if (!NIL_P(if_none)) { + return rb_funcall(if_none, rb_intern("call"), 0, 0); + } + return Qnil; +} + +static VALUE +find_index_i(VALUE i, VALUE memop, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE*)memop; + + ENUM_WANT_SVALUE(); + + if (rb_equal(i, memo[2])) { + memo[0] = UINT2NUM(memo[1]); + rb_iter_break(); + } + memo[1]++; + return Qnil; +} + +static VALUE +find_index_iter_i(VALUE i, VALUE memop, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE*)memop; + + if (RTEST(enum_yield(argc, argv))) { + memo[0] = UINT2NUM(memo[1]); + rb_iter_break(); + } + memo[1]++; + return Qnil; +} + +/* + * call-seq: + * enum.find_index(value) => int or nil + * enum.find_index {| obj | block } => int or nil + * + * Compares each entry in enum with value or passes + * to block. Returns the index for the first for which the + * evaluated value is non-false. If no object matches, returns + * nil + * + * (1..10).find_index {|i| i % 5 == 0 and i % 7 == 0 } #=> nil + * (1..100).find_index {|i| i % 5 == 0 and i % 7 == 0 } #=> 34 + * (1..100).find_index(50) #=> 49 + * + */ + +static VALUE +enum_find_index(int argc, VALUE *argv, VALUE obj) +{ + VALUE memo[3]; /* [return value, current index, condition value] */ + rb_block_call_func *func; + + if (argc == 0) { + RETURN_ENUMERATOR(obj, 0, 0); + func = find_index_iter_i; + } + else { + rb_scan_args(argc, argv, "1", &memo[2]); + if (rb_block_given_p()) { + rb_warn("given block not used"); + } + func = find_index_i; + } + + memo[0] = Qnil; + memo[1] = 0; + rb_block_call(obj, id_each, 0, 0, func, (VALUE)memo); + return memo[0]; +} + +static VALUE +find_all_i(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (RTEST(rb_yield(i))) { + rb_ary_push(ary, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.find_all {| obj | block } => array + * enum.select {| obj | block } => array + * + * Returns an array containing all elements of enum for which + * block is not false (see also + * Enumerable#reject). + * + * (1..10).find_all {|i| i % 3 == 0 } #=> [3, 6, 9] + * + */ + +static VALUE +enum_find_all(VALUE obj) +{ + VALUE ary; + + RETURN_ENUMERATOR(obj, 0, 0); + + ary = rb_ary_new(); + rb_block_call(obj, id_each, 0, 0, find_all_i, ary); + + return ary; +} + +static VALUE +reject_i(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (!RTEST(rb_yield(i))) { + rb_ary_push(ary, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.reject {| obj | block } => array + * + * Returns an array for all elements of enum for which + * block is false (see also Enumerable#find_all). + * + * (1..10).reject {|i| i % 3 == 0 } #=> [1, 2, 4, 5, 7, 8, 10] + * + */ + +static VALUE +enum_reject(VALUE obj) +{ + VALUE ary; + + RETURN_ENUMERATOR(obj, 0, 0); + + ary = rb_ary_new(); + rb_block_call(obj, id_each, 0, 0, reject_i, ary); + + return ary; +} + +static VALUE +collect_i(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + rb_ary_push(ary, enum_yield(argc, argv)); + + return Qnil; +} + +static VALUE +collect_all(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + rb_ary_push(ary, enum_values_pack(argc, argv)); + + return Qnil; +} + +/* + * call-seq: + * enum.collect {| obj | block } => array + * enum.map {| obj | block } => array + * + * Returns a new array with the results of running block once + * for every element in enum. + * + * (1..4).collect {|i| i*i } #=> [1, 4, 9, 16] + * (1..4).collect { "cat" } #=> ["cat", "cat", "cat", "cat"] + * + */ + +static VALUE +enum_collect(VALUE obj) +{ + VALUE ary; + + RETURN_ENUMERATOR(obj, 0, 0); + + ary = rb_ary_new(); + rb_block_call(obj, id_each, 0, 0, collect_i, ary); + + return ary; +} + +/* + * call-seq: + * enum.to_a => array + * enum.entries => array + * + * Returns an array containing the items in enum. + * + * (1..7).to_a #=> [1, 2, 3, 4, 5, 6, 7] + * { 'a'=>1, 'b'=>2, 'c'=>3 }.to_a #=> [["a", 1], ["b", 2], ["c", 3]] + */ +static VALUE +enum_to_a(int argc, VALUE *argv, VALUE obj) +{ + VALUE ary = rb_ary_new(); + + rb_block_call(obj, id_each, argc, argv, collect_all, ary); + OBJ_INFECT(ary, obj); + + return ary; +} + +static VALUE +inject_i(VALUE i, VALUE p, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE *)p; + + ENUM_WANT_SVALUE(); + + if (memo[0] == Qundef) { + memo[0] = i; + } + else { + memo[0] = rb_yield_values(2, memo[0], i); + } + return Qnil; +} + +static VALUE +inject_op_i(VALUE i, VALUE p, int argc, VALUE *argv) +{ + VALUE *memo = (VALUE *)p; + + ENUM_WANT_SVALUE(); + + if (memo[0] == Qundef) { + memo[0] = i; + } + else { + memo[0] = rb_funcall(memo[0], (ID)memo[1], 1, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.inject(initial, sym) => obj + * enum.inject(sym) => obj + * enum.inject(initial) {| memo, obj | block } => obj + * enum.inject {| memo, obj | block } => obj + * + * enum.reduce(initial, sym) => obj + * enum.reduce(sym) => obj + * enum.reduce(initial) {| memo, obj | block } => obj + * enum.reduce {| memo, obj | block } => obj + * + * Combines all elements of enum by applying a binary + * operation, specified by a block or a symbol that names a + * method or operator. + * + * If you specify a block, then for each element in enum + * the block is passed an accumulator value (memo) and the element. + * If you specify a symbol instead, then each element in the collection + * will be passed to the named method of memo. + * In either case, the result becomes the new value for memo. + * At the end of the iteration, the final value of memo is the + * return value fo the method. + * + * If you do not explicitly specify an initial value for memo, + * then uses the first element of collection is used as the initial value + * of memo. + * + * Examples: + * + * # Sum some numbers + * (5..10).reduce(:+) #=> 45 + * # Same using a block and inject + * (5..10).inject {|sum, n| sum + n } #=> 45 + * # Multiply some numbers + * (5..10).reduce(1, :*) #=> 151200 + * # Same using a block + * (5..10).inject(1) {|product, n| product * n } #=> 151200 + * # find the longest word + * longest = %w{ cat sheep bear }.inject do |memo,word| + * memo.length > word.length ? memo : word + * end + * longest #=> "sheep" + * + */ +static VALUE +enum_inject(int argc, VALUE *argv, VALUE obj) +{ + VALUE memo[2]; + VALUE (*iter)(VALUE, VALUE, int, VALUE*) = inject_i; + + switch (rb_scan_args(argc, argv, "02", &memo[0], &memo[1])) { + case 0: + memo[0] = Qundef; + break; + case 1: + if (rb_block_given_p()) { + break; + } + memo[1] = (VALUE)rb_to_id(memo[0]); + memo[0] = Qundef; + iter = inject_op_i; + break; + case 2: + if (rb_block_given_p()) { + rb_warning("given block not used"); + } + memo[1] = (VALUE)rb_to_id(memo[1]); + iter = inject_op_i; + break; + } + rb_block_call(obj, id_each, 0, 0, iter, (VALUE)memo); + if (memo[0] == Qundef) return Qnil; + return memo[0]; +} + +static VALUE +partition_i(VALUE i, VALUE *ary, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (RTEST(rb_yield(i))) { + rb_ary_push(ary[0], i); + } + else { + rb_ary_push(ary[1], i); + } + return Qnil; +} + +/* + * call-seq: + * enum.partition {| obj | block } => [ true_array, false_array ] + * + * Returns two arrays, the first containing the elements of + * enum for which the block evaluates to true, the second + * containing the rest. + * + * (1..6).partition {|i| (i&1).zero?} #=> [[2, 4, 6], [1, 3, 5]] + * + */ + +static VALUE +enum_partition(VALUE obj) +{ + VALUE ary[2]; + + RETURN_ENUMERATOR(obj, 0, 0); + + ary[0] = rb_ary_new(); + ary[1] = rb_ary_new(); + rb_block_call(obj, id_each, 0, 0, partition_i, (VALUE)ary); + + return rb_assoc_new(ary[0], ary[1]); +} + +static VALUE +group_by_i(VALUE i, VALUE hash, int argc, VALUE *argv) +{ + VALUE group; + VALUE values; + + ENUM_WANT_SVALUE(); + + group = rb_yield(i); + values = rb_hash_aref(hash, group); + if (NIL_P(values)) { + values = rb_ary_new3(1, i); + rb_hash_aset(hash, group, values); + } + else { + rb_ary_push(values, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.group_by {| obj | block } => a_hash + * + * Returns a hash, which keys are evaluated result from the + * block, and values are arrays of elements in enum + * corresponding to the key. + * + * (1..6).group_by {|i| i%3} #=> {0=>[3, 6], 1=>[1, 4], 2=>[2, 5]} + * + */ + +static VALUE +enum_group_by(VALUE obj) +{ + VALUE hash; + + RETURN_ENUMERATOR(obj, 0, 0); + + hash = rb_hash_new(); + rb_block_call(obj, id_each, 0, 0, group_by_i, hash); + OBJ_INFECT(hash, obj); + + return hash; +} + +static VALUE +first_i(VALUE i, VALUE *params, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (NIL_P(params[1])) { + params[1] = i; + rb_iter_break(); + } + else { + long n = params[0]; + + rb_ary_push(params[1], i); + n--; + if (n <= 0) { + rb_iter_break(); + } + params[0] = n; + } + return Qnil; +} + +/* + * call-seq: + * enum.first -> obj or nil + * enum.first(n) -> an_array + * + * Returns the first element, or the first +n+ elements, of the enumerable. + * If the enumerable is empty, the first form returns nil, and the + * second form returns an empty array. + * + */ + +static VALUE +enum_first(int argc, VALUE *argv, VALUE obj) +{ + VALUE n, params[2]; + + if (argc == 0) { + params[0] = params[1] = Qnil; + } + else { + long len; + + rb_scan_args(argc, argv, "01", &n); + len = NUM2LONG(n); + if (len == 0) return rb_ary_new2(0); + params[0] = len; + params[1] = rb_ary_new2(len); + } + rb_block_call(obj, id_each, 0, 0, first_i, (VALUE)params); + + return params[1]; +} + + +/* + * call-seq: + * enum.sort => array + * enum.sort {| a, b | block } => array + * + * Returns an array containing the items in enum sorted, + * either according to their own <=> method, or by using + * the results of the supplied block. The block should return -1, 0, or + * +1 depending on the comparison between a and b. As of + * Ruby 1.8, the method Enumerable#sort_by implements a + * built-in Schwartzian Transform, useful when key computation or + * comparison is expensive.. + * + * %w(rhea kea flea).sort #=> ["flea", "kea", "rhea"] + * (1..10).sort {|a,b| b <=> a} #=> [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + */ + +static VALUE +enum_sort(VALUE obj) +{ + return rb_ary_sort(enum_to_a(0, 0, obj)); +} + +static VALUE +sort_by_i(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + NODE *memo; + + ENUM_WANT_SVALUE(); + + if (RBASIC(ary)->klass) { + rb_raise(rb_eRuntimeError, "sort_by reentered"); + } + /* use NODE_DOT2 as memo(v, v, -) */ + memo = rb_node_newnode(NODE_DOT2, rb_yield(i), i, 0); + rb_ary_push(ary, (VALUE)memo); + return Qnil; +} + +static int +sort_by_cmp(const void *ap, const void *bp, void *data) +{ + VALUE a = (*(NODE *const *)ap)->u1.value; + VALUE b = (*(NODE *const *)bp)->u1.value; + VALUE ary = (VALUE)data; + + if (RBASIC(ary)->klass) { + rb_raise(rb_eRuntimeError, "sort_by reentered"); + } + return rb_cmpint(rb_funcall(a, id_cmp, 1, b), a, b); +} + +/* + * call-seq: + * enum.sort_by {| obj | block } => array + * + * Sorts enum using a set of keys generated by mapping the + * values in enum through the given block. + * + * %w{ apple pear fig }.sort_by {|word| word.length} + * #=> ["fig", "pear", "apple"] + * + * The current implementation of sort_by generates an + * array of tuples containing the original collection element and the + * mapped value. This makes sort_by fairly expensive when + * the keysets are simple + * + * require 'benchmark' + * include Benchmark + * + * a = (1..100000).map {rand(100000)} + * + * bm(10) do |b| + * b.report("Sort") { a.sort } + * b.report("Sort by") { a.sort_by {|a| a} } + * end + * + * produces: + * + * user system total real + * Sort 0.180000 0.000000 0.180000 ( 0.175469) + * Sort by 1.980000 0.040000 2.020000 ( 2.013586) + * + * However, consider the case where comparing the keys is a non-trivial + * operation. The following code sorts some files on modification time + * using the basic sort method. + * + * files = Dir["*"] + * sorted = files.sort {|a,b| File.new(a).mtime <=> File.new(b).mtime} + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This sort is inefficient: it generates two new File + * objects during every comparison. A slightly better technique is to + * use the Kernel#test method to generate the modification + * times directly. + * + * files = Dir["*"] + * sorted = files.sort { |a,b| + * test(?M, a) <=> test(?M, b) + * } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This still generates many unnecessary Time objects. A + * more efficient technique is to cache the sort keys (modification + * times in this case) before the sort. Perl users often call this + * approach a Schwartzian Transform, after Randal Schwartz. We + * construct a temporary array, where each element is an array + * containing our sort key along with the filename. We sort this array, + * and then extract the filename from the result. + * + * sorted = Dir["*"].collect { |f| + * [test(?M, f), f] + * }.sort.collect { |f| f[1] } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This is exactly what sort_by does internally. + * + * sorted = Dir["*"].sort_by {|f| test(?M, f)} + * sorted #=> ["mon", "tues", "wed", "thurs"] + */ + +static VALUE +enum_sort_by(VALUE obj) +{ + VALUE ary; + long i; + + RETURN_ENUMERATOR(obj, 0, 0); + + if (TYPE(obj) == T_ARRAY) { + ary = rb_ary_new2(RARRAY_LEN(obj)); + } + else { + ary = rb_ary_new(); + } + RBASIC(ary)->klass = 0; + rb_block_call(obj, id_each, 0, 0, sort_by_i, ary); + if (RARRAY_LEN(ary) > 1) { + ruby_qsort(RARRAY_PTR(ary), RARRAY_LEN(ary), sizeof(VALUE), + sort_by_cmp, (void *)ary); + } + if (RBASIC(ary)->klass) { + rb_raise(rb_eRuntimeError, "sort_by reentered"); + } + for (i=0; iu2.value; + } + RBASIC(ary)->klass = rb_cArray; + OBJ_INFECT(ary, obj); + + return ary; +} + +#define DEFINE_ENUMFUNCS(name) \ +static VALUE \ +name##_i(VALUE i, VALUE *memo, int argc, VALUE *argv) \ +{ \ + return enum_##name##_func(enum_values_pack(argc, argv), memo); \ +} \ +\ +static VALUE \ +name##_iter_i(VALUE i, VALUE *memo, int argc, VALUE *argv) \ +{ \ + return enum_##name##_func(enum_yield(argc, argv), memo); \ +} + +static VALUE +enum_all_func(VALUE result, VALUE *memo) +{ + if (!RTEST(result)) { + *memo = Qfalse; + rb_iter_break(); + } + return Qnil; +} + +DEFINE_ENUMFUNCS(all) + +/* + * call-seq: + * enum.all? [{|obj| block } ] => true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block never returns + * false or nil. If the block is not given, + * Ruby adds an implicit block of {|obj| obj} (that is + * all? will return true only if none of the + * collection members are false or nil.) + * + * %w{ant bear cat}.all? {|word| word.length >= 3} #=> true + * %w{ant bear cat}.all? {|word| word.length >= 4} #=> false + * [ nil, true, 99 ].all? #=> false + * + */ + +static VALUE +enum_all(VALUE obj) +{ + VALUE result = Qtrue; + + rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? all_iter_i : all_i, (VALUE)&result); + return result; +} + +static VALUE +enum_any_func(VALUE result, VALUE *memo) +{ + if (RTEST(result)) { + *memo = Qtrue; + rb_iter_break(); + } + return Qnil; +} + +DEFINE_ENUMFUNCS(any) + +/* + * call-seq: + * enum.any? [{|obj| block } ] => true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block ever returns a value other + * than false or nil. If the block is not + * given, Ruby adds an implicit block of {|obj| obj} (that + * is any? will return true if at least one + * of the collection members is not false or + * nil. + * + * %w{ant bear cat}.any? {|word| word.length >= 3} #=> true + * %w{ant bear cat}.any? {|word| word.length >= 4} #=> true + * [ nil, true, 99 ].any? #=> true + * + */ + +static VALUE +enum_any(VALUE obj) +{ + VALUE result = Qfalse; + + rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? any_iter_i : any_i, (VALUE)&result); + return result; +} + +static VALUE +enum_one_func(VALUE result, VALUE *memo) +{ + if (RTEST(result)) { + if (*memo == Qundef) { + *memo = Qtrue; + } + else if (*memo == Qtrue) { + *memo = Qfalse; + rb_iter_break(); + } + } + return Qnil; +} + +DEFINE_ENUMFUNCS(one) + +/* + * call-seq: + * enum.one? [{|obj| block }] => true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block returns true + * exactly once. If the block is not given, one? will return + * true only if exactly one of the collection members is + * true. + * + * %w{ant bear cat}.one? {|word| word.length == 4} #=> true + * %w{ant bear cat}.one? {|word| word.length > 4} #=> false + * %w{ant bear cat}.one? {|word| word.length < 4} #=> false + * [ nil, true, 99 ].one? #=> false + * [ nil, true, false ].one? #=> true + * + */ + +static VALUE +enum_one(VALUE obj) +{ + VALUE result = Qundef; + + rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? one_iter_i : one_i, (VALUE)&result); + if (result == Qundef) return Qfalse; + return result; +} + +static VALUE +enum_none_func(VALUE result, VALUE *memo) +{ + if (RTEST(result)) { + *memo = Qfalse; + rb_iter_break(); + } + return Qnil; +} + +DEFINE_ENUMFUNCS(none) + +/* + * call-seq: + * enum.none? [{|obj| block }] => true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block never returns true + * for all elements. If the block is not given, none? will return + * true only if none of the collection members is true. + * + * %w{ant bear cat}.none? {|word| word.length == 5} #=> true + * %w{ant bear cat}.none? {|word| word.length >= 4} #=> false + * [].none? #=> true + * [nil].none? #=> true + * [nil,false].none? #=> true + */ +static VALUE +enum_none(VALUE obj) +{ + VALUE result = Qtrue; + + rb_block_call(obj, id_each, 0, 0, rb_block_given_p() ? none_iter_i : none_i, (VALUE)&result); + return result; +} + +static VALUE +min_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE cmp; + + ENUM_WANT_SVALUE(); + + if (*memo == Qundef) { + *memo = i; + } + else { + cmp = rb_funcall(i, id_cmp, 1, *memo); + if (rb_cmpint(cmp, i, *memo) < 0) { + *memo = i; + } + } + return Qnil; +} + +static VALUE +min_ii(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE cmp; + + ENUM_WANT_SVALUE(); + + if (*memo == Qundef) { + *memo = i; + } + else { + VALUE ary = memo[1]; + RARRAY_PTR(ary)[0] = i; + RARRAY_PTR(ary)[1] = *memo; + cmp = rb_yield(ary); + if (rb_cmpint(cmp, i, *memo) < 0) { + *memo = i; + } + } + return Qnil; +} + + +/* + * call-seq: + * enum.min => obj + * enum.min {| a,b | block } => obj + * + * Returns the object in enum with the minimum value. The + * first form assumes all objects implement Comparable; + * the second uses the block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.min #=> "albatross" + * a.min {|a,b| a.length <=> b.length } #=> "dog" + */ + +static VALUE +enum_min(VALUE obj) +{ + VALUE result[2]; + + result[0] = Qundef; + if (rb_block_given_p()) { + result[1] = rb_ary_new3(2, Qnil, Qnil); + rb_block_call(obj, id_each, 0, 0, min_ii, (VALUE)result); + } + else { + rb_block_call(obj, id_each, 0, 0, min_i, (VALUE)result); + } + if (result[0] == Qundef) return Qnil; + return result[0]; +} + +static VALUE +max_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE cmp; + + ENUM_WANT_SVALUE(); + + if (*memo == Qundef) { + *memo = i; + } + else { + cmp = rb_funcall(i, id_cmp, 1, *memo); + if (rb_cmpint(cmp, i, *memo) > 0) { + *memo = i; + } + } + return Qnil; +} + +static VALUE +max_ii(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE cmp; + + ENUM_WANT_SVALUE(); + + if (*memo == Qundef) { + *memo = i; + } + else { + VALUE ary = memo[1]; + RARRAY_PTR(ary)[0] = i; + RARRAY_PTR(ary)[1] = *memo; + cmp = rb_yield(ary); + if (rb_cmpint(cmp, i, *memo) > 0) { + *memo = i; + } + } + return Qnil; +} + +/* + * call-seq: + * enum.max => obj + * enum.max {|a,b| block } => obj + * + * Returns the object in _enum_ with the maximum value. The + * first form assumes all objects implement Comparable; + * the second uses the block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.max #=> "horse" + * a.max {|a,b| a.length <=> b.length } #=> "albatross" + */ + +static VALUE +enum_max(VALUE obj) +{ + VALUE result[2]; + + result[0] = Qundef; + if (rb_block_given_p()) { + result[1] = rb_ary_new3(2, Qnil, Qnil); + rb_block_call(obj, id_each, 0, 0, max_ii, (VALUE)result); + } + else { + rb_block_call(obj, id_each, 0, 0, max_i, (VALUE)result); + } + if (result[0] == Qundef) return Qnil; + return result[0]; +} + +static VALUE +minmax_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + int n; + + ENUM_WANT_SVALUE(); + + if (memo[0] == Qundef) { + memo[0] = i; + memo[1] = i; + } + else { + n = rb_cmpint(rb_funcall(i, id_cmp, 1, memo[0]), i, memo[0]); + if (n < 0) { + memo[0] = i; + } + n = rb_cmpint(rb_funcall(i, id_cmp, 1, memo[1]), i, memo[1]); + if (n > 0) { + memo[1] = i; + } + } + return Qnil; +} + +static VALUE +minmax_ii(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + int n; + + ENUM_WANT_SVALUE(); + + if (memo[0] == Qundef) { + memo[0] = i; + memo[1] = i; + } + else { + VALUE ary = memo[2]; + + RARRAY_PTR(ary)[0] = i; + RARRAY_PTR(ary)[1] = memo[0]; + n = rb_cmpint(rb_yield(ary), i, memo[0]); + if (n < 0) { + memo[0] = i; + } + RARRAY_PTR(ary)[0] = i; + RARRAY_PTR(ary)[1] = memo[1]; + n = rb_cmpint(rb_yield(ary), i, memo[1]); + if (n > 0) { + memo[1] = i; + } + } + return Qnil; +} + +/* + * call-seq: + * enum.minmax => [min,max] + * enum.minmax {|a,b| block } => [min,max] + * + * Returns two elements array which contains the minimum and the + * maximum value in the enumerable. The first form assumes all + * objects implement Comparable; the second uses the + * block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.minmax #=> ["albatross", "horse"] + * a.minmax {|a,b| a.length <=> b.length } #=> ["dog", "albatross"] + */ + +static VALUE +enum_minmax(VALUE obj) +{ + VALUE result[3]; + VALUE ary = rb_ary_new3(2, Qnil, Qnil); + + result[0] = Qundef; + if (rb_block_given_p()) { + result[2] = ary; + rb_block_call(obj, id_each, 0, 0, minmax_ii, (VALUE)result); + } + else { + rb_block_call(obj, id_each, 0, 0, minmax_i, (VALUE)result); + } + if (result[0] != Qundef) { + RARRAY_PTR(ary)[0] = result[0]; + RARRAY_PTR(ary)[1] = result[1]; + } + return ary; +} + +static VALUE +min_by_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE v; + + ENUM_WANT_SVALUE(); + + v = rb_yield(i); + if (memo[0] == Qundef) { + memo[0] = v; + memo[1] = i; + } + else if (rb_cmpint(rb_funcall(v, id_cmp, 1, memo[0]), v, memo[0]) < 0) { + memo[0] = v; + memo[1] = i; + } + return Qnil; +} + +/* + * call-seq: + * enum.min_by {| obj| block } => obj + * + * Returns the object in enum that gives the minimum + * value from the given block. + * + * a = %w(albatross dog horse) + * a.min_by {|x| x.length } #=> "dog" + */ + +static VALUE +enum_min_by(VALUE obj) +{ + VALUE memo[2]; + + RETURN_ENUMERATOR(obj, 0, 0); + + memo[0] = Qundef; + memo[1] = Qnil; + rb_block_call(obj, id_each, 0, 0, min_by_i, (VALUE)memo); + return memo[1]; +} + +static VALUE +max_by_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE v; + + ENUM_WANT_SVALUE(); + + v = rb_yield(i); + if (memo[0] == Qundef) { + memo[0] = v; + memo[1] = i; + } + else if (rb_cmpint(rb_funcall(v, id_cmp, 1, memo[0]), v, memo[0]) > 0) { + memo[0] = v; + memo[1] = i; + } + return Qnil; +} + +/* + * call-seq: + * enum.max_by {| obj| block } => obj + * + * Returns the object in enum that gives the maximum + * value from the given block. + * + * a = %w(albatross dog horse) + * a.max_by {|x| x.length } #=> "albatross" + */ + +static VALUE +enum_max_by(VALUE obj) +{ + VALUE memo[2]; + + RETURN_ENUMERATOR(obj, 0, 0); + + memo[0] = Qundef; + memo[1] = Qnil; + rb_block_call(obj, id_each, 0, 0, max_by_i, (VALUE)memo); + return memo[1]; +} + +static VALUE +minmax_by_i(VALUE i, VALUE *memo, int argc, VALUE *argv) +{ + VALUE v; + + ENUM_WANT_SVALUE(); + + v = rb_yield(i); + if (memo[0] == Qundef) { + memo[0] = v; + memo[1] = v; + memo[2] = i; + memo[3] = i; + } + else { + if (rb_cmpint(rb_funcall(v, id_cmp, 1, memo[0]), v, memo[0]) < 0) { + memo[0] = v; + memo[2] = i; + } + if (rb_cmpint(rb_funcall(v, id_cmp, 1, memo[1]), v, memo[1]) > 0) { + memo[1] = v; + memo[3] = i; + } + } + return Qnil; +} + +/* + * call-seq: + * enum.minmax_by {| obj| block } => [min, max] + * + * Returns two elements array array containing the objects in + * enum that gives the minimum and maximum values respectively + * from the given block. + * + * a = %w(albatross dog horse) + * a.minmax_by {|x| x.length } #=> ["dog", "albatross"] + */ + +static VALUE +enum_minmax_by(VALUE obj) +{ + VALUE memo[4]; + + RETURN_ENUMERATOR(obj, 0, 0); + + memo[0] = Qundef; + memo[1] = Qundef; + memo[2] = Qnil; + memo[3] = Qnil; + rb_block_call(obj, id_each, 0, 0, minmax_by_i, (VALUE)memo); + return rb_assoc_new(memo[2], memo[3]); +} + +static VALUE +member_i(VALUE iter, VALUE *memo, int argc, VALUE *argv) +{ + if (rb_equal(enum_values_pack(argc, argv), memo[0])) { + memo[1] = Qtrue; + rb_iter_break(); + } + return Qnil; +} + +/* + * call-seq: + * enum.include?(obj) => true or false + * enum.member?(obj) => true or false + * + * Returns true if any member of enum equals + * obj. Equality is tested using ==. + * + * IO.constants.include? :SEEK_SET #=> true + * IO.constants.include? :SEEK_NO_FURTHER #=> false + * + */ + +static VALUE +enum_member(VALUE obj, VALUE val) +{ + VALUE memo[2]; + + memo[0] = val; + memo[1] = Qfalse; + rb_block_call(obj, id_each, 0, 0, member_i, (VALUE)memo); + return memo[1]; +} + +static VALUE +each_with_index_i(VALUE i, VALUE memo, int argc, VALUE *argv) +{ + long n = (*(VALUE *)memo)++; + + return rb_yield_values(2, enum_values_pack(argc, argv), INT2NUM(n)); +} + +/* + * call-seq: + * enum.each_with_index {|obj, i| block } -> enum + * + * Calls block with two arguments, the item and its index, + * for each item in enum. Given arguments are passed through + * to #each(). + * + * hash = Hash.new + * %w(cat dog wombat).each_with_index {|item, index| + * hash[item] = index + * } + * hash #=> {"cat"=>0, "dog"=>1, "wombat"=>2} + * + */ + +static VALUE +enum_each_with_index(int argc, VALUE *argv, VALUE obj) +{ + long memo; + + RETURN_ENUMERATOR(obj, argc, argv); + + memo = 0; + rb_block_call(obj, id_each, argc, argv, each_with_index_i, (VALUE)&memo); + return obj; +} + + +/* + * call-seq: + * enum.reverse_each {|item| block } + * + * Traverses enum in reverse order. + */ + +static VALUE +enum_reverse_each(int argc, VALUE *argv, VALUE obj) +{ + VALUE ary; + long i; + + RETURN_ENUMERATOR(obj, argc, argv); + + ary = enum_to_a(argc, argv, obj); + + for (i = RARRAY_LEN(ary); --i >= 0; ) { + rb_yield(RARRAY_PTR(ary)[i]); + } + + return obj; +} + + +static VALUE +zip_ary(VALUE val, NODE *memo, int argc, VALUE *argv) +{ + volatile VALUE result = memo->u1.value; + volatile VALUE args = memo->u2.value; + int n = memo->u3.cnt++; + volatile VALUE tmp; + int i; + + tmp = rb_ary_new2(RARRAY_LEN(args) + 1); + rb_ary_store(tmp, 0, enum_values_pack(argc, argv)); + for (i=0; iu1.value; + volatile VALUE args = memo->u2.value; + volatile VALUE tmp; + int i; + + tmp = rb_ary_new2(RARRAY_LEN(args) + 1); + rb_ary_store(tmp, 0, enum_values_pack(argc, argv)); + for (i=0; i enumerator + * enum.zip(arg, ...) {|arr| block } => nil + * + * Takes one element from enum and merges corresponding + * elements from each args. This generates a sequence of + * n-element arrays, where n is one more than the + * count of arguments. The length of the resulting sequence will be + * enum#sizeenum#size, nil values are supplied. If + * a block is given, it is invoked for each output array, otherwise + * an array of arrays is returned. + * + * a = [ 4, 5, 6 ] + * b = [ 7, 8, 9 ] + * + * [1,2,3].zip(a, b) #=> [[1, 4, 7], [2, 5, 8], [3, 6, 9]] + * [1,2].zip(a,b) #=> [[1, 4, 7], [2, 5, 8]] + * a.zip([1,2],[8]) #=> [[4, 1, 8], [5, 2, nil], [6, nil, nil]] + * + */ + +static VALUE +enum_zip(int argc, VALUE *argv, VALUE obj) +{ + int i; + ID conv; + NODE *memo; + VALUE result = Qnil; + VALUE args = rb_ary_new4(argc, argv); + int allary = Qtrue; + + argv = RARRAY_PTR(args); + for (i=0; i array + * + * Returns first n elements from enum. + * + * a = [1, 2, 3, 4, 5, 0] + * a.take(3) # => [1, 2, 3] + * + */ + +static VALUE +enum_take(VALUE obj, VALUE n) +{ + VALUE args[2]; + long len = NUM2LONG(n); + + if (len < 0) { + rb_raise(rb_eArgError, "attempt to take negative size"); + } + + if (len == 0) return rb_ary_new2(0); + args[0] = rb_ary_new(); + args[1] = len; + rb_block_call(obj, id_each, 0, 0, take_i, (VALUE)args); + return args[0]; +} + + +static VALUE +take_while_i(VALUE i, VALUE *ary, int argc, VALUE *argv) +{ + if (!RTEST(enum_yield(argc, argv))) rb_iter_break(); + rb_ary_push(*ary, enum_values_pack(argc, argv)); + return Qnil; +} + +/* + * call-seq: + * enum.take_while {|arr| block } => array + * + * Passes elements to the block until the block returns nil or false, + * then stops iterating and returns an array of all prior elements. + * + * a = [1, 2, 3, 4, 5, 0] + * a.take_while {|i| i < 3 } # => [1, 2] + * + */ + +static VALUE +enum_take_while(VALUE obj) +{ + VALUE ary; + + RETURN_ENUMERATOR(obj, 0, 0); + ary = rb_ary_new(); + rb_block_call(obj, id_each, 0, 0, take_while_i, (VALUE)&ary); + return ary; +} + +static VALUE +drop_i(VALUE i, VALUE *arg, int argc, VALUE *argv) +{ + if (arg[1] == 0) { + rb_ary_push(arg[0], enum_values_pack(argc, argv)); + } + else { + arg[1]--; + } + return Qnil; +} + +/* + * call-seq: + * enum.drop(n) => array + * + * Drops first n elements from enum, and returns rest elements + * in an array. + * + * a = [1, 2, 3, 4, 5, 0] + * a.drop(3) # => [4, 5, 0] + * + */ + +static VALUE +enum_drop(VALUE obj, VALUE n) +{ + VALUE args[2]; + long len = NUM2LONG(n); + + if (len < 0) { + rb_raise(rb_eArgError, "attempt to drop negative size"); + } + + args[1] = len; + args[0] = rb_ary_new(); + rb_block_call(obj, id_each, 0, 0, drop_i, (VALUE)args); + return args[0]; +} + + +static VALUE +drop_while_i(VALUE i, VALUE *args, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + if (!args[1] && !RTEST(rb_yield(i))) { + args[1] = Qtrue; + } + if (args[1]) { + rb_ary_push(args[0], i); + } + return Qnil; +} + +/* + * call-seq: + * enum.drop_while {|arr| block } => array + * + * Drops elements up to, but not including, the first element for + * which the block returns nil or false and returns an array + * containing the remaining elements. + * + * a = [1, 2, 3, 4, 5, 0] + * a.drop_while {|i| i < 3 } # => [3, 4, 5, 0] + * + */ + +static VALUE +enum_drop_while(VALUE obj) +{ + VALUE args[2]; + + RETURN_ENUMERATOR(obj, 0, 0); + args[0] = rb_ary_new(); + args[1] = Qfalse; + rb_block_call(obj, id_each, 0, 0, drop_while_i, (VALUE)args); + return args[0]; +} + +static VALUE +cycle_i(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + ENUM_WANT_SVALUE(); + + rb_ary_push(ary, i); + rb_yield(i); + return Qnil; +} + +/* + * call-seq: + * enum.cycle {|obj| block } + * enum.cycle(n) {|obj| block } + * + * Calls block for each element of enum repeatedly _n_ + * times or forever if none or nil is given. If a non-positive + * number is given or the collection is empty, does nothing. Returns + * nil if the loop has finished without getting interrupted. + * + * Enumerable#cycle saves elements in an internal array so changes + * to enum after the first pass have no effect. + * + * a = ["a", "b", "c"] + * a.cycle {|x| puts x } # print, a, b, c, a, b, c,.. forever. + * a.cycle(2) {|x| puts x } # print, a, b, c, a, b, c. + * + */ + +static VALUE +enum_cycle(int argc, VALUE *argv, VALUE obj) +{ + VALUE ary; + VALUE nv = Qnil; + long n, i, len; + + rb_scan_args(argc, argv, "01", &nv); + + RETURN_ENUMERATOR(obj, argc, argv); + if (NIL_P(nv)) { + n = -1; + } + else { + n = NUM2LONG(nv); + if (n <= 0) return Qnil; + } + ary = rb_ary_new(); + RBASIC(ary)->klass = 0; + rb_block_call(obj, id_each, 0, 0, cycle_i, ary); + len = RARRAY_LEN(ary); + if (len == 0) return Qnil; + while (n < 0 || 0 < --n) { + for (i=0; iEnumerable mixin provides collection classes with + * several traversal and searching methods, and with the ability to + * sort. The class must provide a method each, which + * yields successive members of the collection. If + * Enumerable#max, #min, or + * #sort is used, the objects in the collection must also + * implement a meaningful <=> operator, as these methods + * rely on an ordering between members of the collection. + */ + +void +Init_Enumerable(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + rb_mEnumerable = rb_define_module("Enumerable"); + + rb_define_method(rb_mEnumerable, "to_a", enum_to_a, -1); + rb_define_method(rb_mEnumerable, "entries", enum_to_a, -1); + + rb_define_method(rb_mEnumerable, "sort", enum_sort, 0); + rb_define_method(rb_mEnumerable, "sort_by", enum_sort_by, 0); + rb_define_method(rb_mEnumerable, "grep", enum_grep, 1); + rb_define_method(rb_mEnumerable, "count", enum_count, -1); + rb_define_method(rb_mEnumerable, "find", enum_find, -1); + rb_define_method(rb_mEnumerable, "detect", enum_find, -1); + rb_define_method(rb_mEnumerable, "find_index", enum_find_index, -1); + rb_define_method(rb_mEnumerable, "find_all", enum_find_all, 0); + rb_define_method(rb_mEnumerable, "select", enum_find_all, 0); + rb_define_method(rb_mEnumerable, "reject", enum_reject, 0); + rb_define_method(rb_mEnumerable, "collect", enum_collect, 0); + rb_define_method(rb_mEnumerable, "map", enum_collect, 0); + rb_define_method(rb_mEnumerable, "inject", enum_inject, -1); + rb_define_method(rb_mEnumerable, "reduce", enum_inject, -1); + rb_define_method(rb_mEnumerable, "partition", enum_partition, 0); + rb_define_method(rb_mEnumerable, "group_by", enum_group_by, 0); + rb_define_method(rb_mEnumerable, "first", enum_first, -1); + rb_define_method(rb_mEnumerable, "all?", enum_all, 0); + rb_define_method(rb_mEnumerable, "any?", enum_any, 0); + rb_define_method(rb_mEnumerable, "one?", enum_one, 0); + rb_define_method(rb_mEnumerable, "none?", enum_none, 0); + rb_define_method(rb_mEnumerable, "min", enum_min, 0); + rb_define_method(rb_mEnumerable, "max", enum_max, 0); + rb_define_method(rb_mEnumerable, "minmax", enum_minmax, 0); + rb_define_method(rb_mEnumerable, "min_by", enum_min_by, 0); + rb_define_method(rb_mEnumerable, "max_by", enum_max_by, 0); + rb_define_method(rb_mEnumerable, "minmax_by", enum_minmax_by, 0); + rb_define_method(rb_mEnumerable, "member?", enum_member, 1); + rb_define_method(rb_mEnumerable, "include?", enum_member, 1); + rb_define_method(rb_mEnumerable, "each_with_index", enum_each_with_index, -1); + rb_define_method(rb_mEnumerable, "reverse_each", enum_reverse_each, -1); + rb_define_method(rb_mEnumerable, "zip", enum_zip, -1); + rb_define_method(rb_mEnumerable, "take", enum_take, 1); + rb_define_method(rb_mEnumerable, "take_while", enum_take_while, 0); + rb_define_method(rb_mEnumerable, "drop", enum_drop, 1); + rb_define_method(rb_mEnumerable, "drop_while", enum_drop_while, 0); + rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1); + + id_eqq = rb_intern("==="); + id_each = rb_intern("each"); + id_cmp = rb_intern("<=>"); + id_next = rb_intern("next"); + id_size = rb_intern("size"); +} + diff --git a/enumerator.c b/enumerator.c new file mode 100644 index 0000000..cbfe262 --- /dev/null +++ b/enumerator.c @@ -0,0 +1,813 @@ +/************************************************ + + enumerator.c - provides Enumerator class + + $Author: yugui $ + + Copyright (C) 2001-2003 Akinori MUSHA + + $Idaemons: /home/cvs/rb/enumerator/enumerator.c,v 1.1.1.1 2001/07/15 10:12:48 knu Exp $ + $RoughId: enumerator.c,v 1.6 2003/07/27 11:03:24 nobu Exp $ + $Id: enumerator.c 24122 2009-07-15 12:00:57Z yugui $ + +************************************************/ + +#include "ruby/ruby.h" + +/* + * Document-class: Enumerator + * + * A class which provides a method `each' to be used as an Enumerable + * object. + */ +VALUE rb_cEnumerator; +static VALUE sym_each; +static ID id_rewind; + +VALUE rb_eStopIteration; + +struct enumerator { + VALUE obj; + ID meth; + VALUE args; + VALUE fib; + VALUE dst; + VALUE no_next; +}; + +static VALUE rb_cGenerator, rb_cYielder; + +struct generator { + VALUE proc; +}; + +struct yielder { + VALUE proc; +}; + +static VALUE generator_allocate(VALUE klass); +static VALUE generator_init(VALUE obj, VALUE proc); + +/* + * Enumerator + */ +static void +enumerator_mark(void *p) +{ + struct enumerator *ptr = p; + rb_gc_mark(ptr->obj); + rb_gc_mark(ptr->args); + rb_gc_mark(ptr->fib); + rb_gc_mark(ptr->dst); +} + +static struct enumerator * +enumerator_ptr(VALUE obj) +{ + struct enumerator *ptr; + + Data_Get_Struct(obj, struct enumerator, ptr); + if (RDATA(obj)->dmark != enumerator_mark) { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected %s)", + rb_obj_classname(obj), rb_class2name(rb_cEnumerator)); + } + if (!ptr || ptr->obj == Qundef) { + rb_raise(rb_eArgError, "uninitialized enumerator"); + } + return ptr; +} + +/* + * call-seq: + * obj.to_enum(method = :each, *args) + * obj.enum_for(method = :each, *args) + * + * Returns Enumerator.new(self, method, *args). + * + * e.g.: + * + * str = "xyz" + * + * enum = str.enum_for(:each_byte) + * a = enum.map {|b| '%02x' % b } #=> ["78", "79", "7a"] + * + * # protects an array from being modified + * a = [1, 2, 3] + * some_method(a.to_enum) + * + */ +static VALUE +obj_to_enum(int argc, VALUE *argv, VALUE obj) +{ + VALUE meth = sym_each; + + if (argc > 0) { + --argc; + meth = *argv++; + } + return rb_enumeratorize(obj, meth, argc, argv); +} + +static VALUE +each_slice_i(VALUE val, VALUE *memo) +{ + VALUE ary = memo[0]; + VALUE v = Qnil; + long size = (long)memo[1]; + + rb_ary_push(ary, val); + + if (RARRAY_LEN(ary) == size) { + v = rb_yield(ary); + memo[0] = rb_ary_new2(size); + } + + return v; +} + +/* + * call-seq: + * e.each_slice(n) {...} + * e.each_slice(n) + * + * Iterates the given block for each slice of elements. If no + * block is given, returns an enumerator. + * + * e.g.: + * (1..10).each_slice(3) {|a| p a} + * # outputs below + * [1, 2, 3] + * [4, 5, 6] + * [7, 8, 9] + * [10] + * + */ +static VALUE +enum_each_slice(VALUE obj, VALUE n) +{ + long size = NUM2LONG(n); + VALUE args[2], ary; + + if (size <= 0) rb_raise(rb_eArgError, "invalid slice size"); + RETURN_ENUMERATOR(obj, 1, &n); + args[0] = rb_ary_new2(size); + args[1] = (VALUE)size; + + rb_block_call(obj, SYM2ID(sym_each), 0, 0, each_slice_i, (VALUE)args); + + ary = args[0]; + if (RARRAY_LEN(ary) > 0) rb_yield(ary); + + return Qnil; +} + +static VALUE +each_cons_i(VALUE val, VALUE *memo) +{ + VALUE ary = memo[0]; + VALUE v = Qnil; + long size = (long)memo[1]; + + if (RARRAY_LEN(ary) == size) { + rb_ary_shift(ary); + } + rb_ary_push(ary, val); + if (RARRAY_LEN(ary) == size) { + v = rb_yield(rb_ary_dup(ary)); + } + return v; +} + +/* + * call-seq: + * each_cons(n) {...} + * each_cons(n) + * + * Iterates the given block for each array of consecutive + * elements. If no block is given, returns an enumerator. + * + * e.g.: + * (1..10).each_cons(3) {|a| p a} + * # outputs below + * [1, 2, 3] + * [2, 3, 4] + * [3, 4, 5] + * [4, 5, 6] + * [5, 6, 7] + * [6, 7, 8] + * [7, 8, 9] + * [8, 9, 10] + * + */ +static VALUE +enum_each_cons(VALUE obj, VALUE n) +{ + long size = NUM2LONG(n); + VALUE args[2]; + + if (size <= 0) rb_raise(rb_eArgError, "invalid size"); + RETURN_ENUMERATOR(obj, 1, &n); + args[0] = rb_ary_new2(size); + args[1] = (VALUE)size; + + rb_block_call(obj, SYM2ID(sym_each), 0, 0, each_cons_i, (VALUE)args); + + return Qnil; +} + +static VALUE +each_with_object_i(VALUE val, VALUE memo) +{ + return rb_yield_values(2, val, memo); +} + +/* + * call-seq: + * each_with_object(obj) {|(*args), memo_obj| ... } + * each_with_object(obj) + * + * Iterates the given block for each element with an arbitrary + * object given, and returns the initially given object. + + * If no block is given, returns an enumerator. + * + * e.g.: + * evens = (1..10).each_with_object([]) {|i, a| a << i*2 } + * # => [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] + * + */ +static VALUE +enum_each_with_object(VALUE obj, VALUE memo) +{ + RETURN_ENUMERATOR(obj, 1, &memo); + + rb_block_call(obj, SYM2ID(sym_each), 0, 0, each_with_object_i, memo); + + return memo; +} + +static VALUE +enumerator_allocate(VALUE klass) +{ + struct enumerator *ptr; + VALUE enum_obj; + + enum_obj = Data_Make_Struct(klass, struct enumerator, enumerator_mark, -1, ptr); + ptr->obj = Qundef; + + return enum_obj; +} + +static VALUE +enumerator_each_i(VALUE v, VALUE enum_obj, int argc, VALUE *argv) +{ + return rb_yield_values2(argc, argv); +} + +static VALUE +enumerator_init(VALUE enum_obj, VALUE obj, VALUE meth, int argc, VALUE *argv) +{ + struct enumerator *ptr; + + Data_Get_Struct(enum_obj, struct enumerator, ptr); + + if (!ptr) { + rb_raise(rb_eArgError, "unallocated enumerator"); + } + + ptr->obj = obj; + ptr->meth = rb_to_id(meth); + if (argc) ptr->args = rb_ary_new4(argc, argv); + ptr->fib = 0; + ptr->dst = Qnil; + ptr->no_next = Qfalse; + + return enum_obj; +} + +/* + * call-seq: + * Enumerator.new(obj, method = :each, *args) + * Enumerator.new { |y| ... } + * + * Creates a new Enumerator object, which is to be used as an + * Enumerable object iterating in a given way. + * + * In the first form, a generated Enumerator iterates over the given + * object using the given method with the given arguments passed. + * Use of this form is discouraged. Use Kernel#enum_for(), alias + * to_enum, instead. + * + * e = Enumerator.new(ObjectSpace, :each_object) + * #-> ObjectSpace.enum_for(:each_object) + * + * e.select { |obj| obj.is_a?(Class) } #=> array of all classes + * + * In the second form, iteration is defined by the given block, in + * which a "yielder" object given as block parameter can be used to + * yield a value by calling the +yield+ method, alias +<<+. + * + * fib = Enumerator.new { |y| + * a = b = 1 + * loop { + * y << a + * a, b = b, a + b + * } + * } + * + * p fib.take(10) #=> [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] + */ +static VALUE +enumerator_initialize(int argc, VALUE *argv, VALUE obj) +{ + VALUE recv, meth = sym_each; + + if (argc == 0) { + if (!rb_block_given_p()) + rb_raise(rb_eArgError, "wrong number of argument (0 for 1+)"); + + recv = generator_init(generator_allocate(rb_cGenerator), rb_block_proc()); + } else { + recv = *argv++; + if (--argc) { + meth = *argv++; + --argc; + } + } + + return enumerator_init(obj, recv, meth, argc, argv); +} + +/* :nodoc: */ +static VALUE +enumerator_init_copy(VALUE obj, VALUE orig) +{ + struct enumerator *ptr0, *ptr1; + + ptr0 = enumerator_ptr(orig); + if (ptr0->fib) { + /* Fibers cannot be copied */ + rb_raise(rb_eTypeError, "can't copy execution context"); + } + + Data_Get_Struct(obj, struct enumerator, ptr1); + + if (!ptr1) { + rb_raise(rb_eArgError, "unallocated enumerator"); + } + + ptr1->obj = ptr0->obj; + ptr1->meth = ptr0->meth; + ptr1->args = ptr0->args; + ptr1->fib = 0; + + return obj; +} + +VALUE +rb_enumeratorize(VALUE obj, VALUE meth, int argc, VALUE *argv) +{ + return enumerator_init(enumerator_allocate(rb_cEnumerator), obj, meth, argc, argv); +} + +/* + * call-seq: + * enum.each {...} + * + * Iterates the given block using the object and the method specified + * in the first place. If no block is given, returns self. + * + */ +static VALUE +enumerator_each(VALUE obj) +{ + struct enumerator *e; + int argc = 0; + VALUE *argv = 0; + + if (!rb_block_given_p()) return obj; + e = enumerator_ptr(obj); + if (e->args) { + argc = RARRAY_LEN(e->args); + argv = RARRAY_PTR(e->args); + } + return rb_block_call(e->obj, e->meth, argc, argv, + enumerator_each_i, (VALUE)e); +} + +static VALUE +enumerator_with_index_i(VALUE val, VALUE *memo, int argc, VALUE *argv) +{ + VALUE idx; + + idx = INT2FIX(*memo); + ++*memo; + + if (argc <= 1) + return rb_yield_values(2, val, idx); + + return rb_yield_values(2, rb_ary_new4(argc, argv), idx); +} + +/* + * call-seq: + * e.with_index {|(*args), idx| ... } + * e.with_index + * + * Iterates the given block for each element with an index, which + * start from 0. If no block is given, returns an enumerator. + * + */ +static VALUE +enumerator_with_index(VALUE obj) +{ + struct enumerator *e; + VALUE memo = 0; + int argc = 0; + VALUE *argv = 0; + + RETURN_ENUMERATOR(obj, 0, 0); + e = enumerator_ptr(obj); + if (e->args) { + argc = RARRAY_LEN(e->args); + argv = RARRAY_PTR(e->args); + } + return rb_block_call(e->obj, e->meth, argc, argv, + enumerator_with_index_i, (VALUE)&memo); +} + +static VALUE +enumerator_with_object_i(VALUE val, VALUE memo, int argc, VALUE *argv) +{ + if (argc <= 1) + return rb_yield_values(2, val, memo); + + return rb_yield_values(2, rb_ary_new4(argc, argv), memo); +} + +/* + * call-seq: + * e.with_object(obj) {|(*args), memo_obj| ... } + * e.with_object(obj) + * + * Iterates the given block for each element with an arbitrary + * object given, and returns the initially given object. + * + * If no block is given, returns an enumerator. + * + */ +static VALUE +enumerator_with_object(VALUE obj, VALUE memo) +{ + struct enumerator *e; + int argc = 0; + VALUE *argv = 0; + + RETURN_ENUMERATOR(obj, 1, &memo); + e = enumerator_ptr(obj); + if (e->args) { + argc = RARRAY_LEN(e->args); + argv = RARRAY_PTR(e->args); + } + rb_block_call(e->obj, e->meth, argc, argv, + enumerator_with_object_i, memo); + + return memo; +} + +static VALUE +next_ii(VALUE i, VALUE obj, int argc, VALUE *argv) +{ + rb_fiber_yield(argc, argv); + return Qnil; +} + +static VALUE +next_i(VALUE curr, VALUE obj) +{ + struct enumerator *e = enumerator_ptr(obj); + VALUE nil = Qnil; + + rb_block_call(obj, rb_intern("each"), 0, 0, next_ii, obj); + e->no_next = Qtrue; + return rb_fiber_yield(1, &nil); +} + +static void +next_init(VALUE obj, struct enumerator *e) +{ + VALUE curr = rb_fiber_current(); + e->dst = curr; + e->fib = rb_fiber_new(next_i, obj); +} + +/* + * call-seq: + * e.next => object + * + * Returns the next object in the enumerator, and move the internal + * position forward. When the position reached at the end, internal + * position is rewound then StopIteration is raised. + * + * Note that enumeration sequence by next method does not affect other + * non-external enumeration methods, unless underlying iteration + * methods itself has side-effect, e.g. IO#each_line. + * + */ + +static VALUE +enumerator_next(VALUE obj) +{ + struct enumerator *e = enumerator_ptr(obj); + VALUE curr, v; + curr = rb_fiber_current(); + + if (!e->fib || !rb_fiber_alive_p(e->fib)) { + next_init(obj, e); + } + + v = rb_fiber_resume(e->fib, 1, &curr); + if (e->no_next) { + e->fib = 0; + e->dst = Qnil; + e->no_next = Qfalse; + rb_raise(rb_eStopIteration, "iteration reached at end"); + } + return v; +} + +/* + * call-seq: + * e.rewind => e + * + * Rewinds the enumeration sequence by the next method. + * + * If the enclosed object responds to a "rewind" method, it is called. + */ + +static VALUE +enumerator_rewind(VALUE obj) +{ + struct enumerator *e = enumerator_ptr(obj); + + if (rb_respond_to(e->obj, id_rewind)) + rb_funcall(e->obj, id_rewind, 0); + + e->fib = 0; + e->dst = Qnil; + e->no_next = Qfalse; + return obj; +} + +/* + * Yielder + */ +static void +yielder_mark(void *p) +{ + struct yielder *ptr = p; + rb_gc_mark(ptr->proc); +} + +static struct yielder * +yielder_ptr(VALUE obj) +{ + struct yielder *ptr; + + Data_Get_Struct(obj, struct yielder, ptr); + if (RDATA(obj)->dmark != yielder_mark) { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected %s)", + rb_obj_classname(obj), rb_class2name(rb_cYielder)); + } + if (!ptr || ptr->proc == Qundef) { + rb_raise(rb_eArgError, "uninitialized yielder"); + } + return ptr; +} + +/* :nodoc: */ +static VALUE +yielder_allocate(VALUE klass) +{ + struct yielder *ptr; + VALUE obj; + + obj = Data_Make_Struct(klass, struct yielder, yielder_mark, -1, ptr); + ptr->proc = Qundef; + + return obj; +} + +static VALUE +yielder_init(VALUE obj, VALUE proc) +{ + struct yielder *ptr; + + Data_Get_Struct(obj, struct yielder, ptr); + + if (!ptr) { + rb_raise(rb_eArgError, "unallocated yielder"); + } + + ptr->proc = proc; + + return obj; +} + +/* :nodoc: */ +static VALUE +yielder_initialize(VALUE obj) +{ + rb_need_block(); + + return yielder_init(obj, rb_block_proc()); +} + +/* :nodoc: */ +static VALUE +yielder_yield(VALUE obj, VALUE args) +{ + struct yielder *ptr = yielder_ptr(obj); + + rb_proc_call(ptr->proc, args); + + return obj; +} + +static VALUE +yielder_yield_i(VALUE obj, VALUE memo, int argc, VALUE *argv) +{ + return rb_yield_values2(argc, argv); +} + +static VALUE +yielder_new(void) +{ + return yielder_init(yielder_allocate(rb_cYielder), rb_proc_new(yielder_yield_i, 0)); +} + +/* + * Generator + */ +static void +generator_mark(void *p) +{ + struct generator *ptr = p; + rb_gc_mark(ptr->proc); +} + +static struct generator * +generator_ptr(VALUE obj) +{ + struct generator *ptr; + + Data_Get_Struct(obj, struct generator, ptr); + if (RDATA(obj)->dmark != generator_mark) { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected %s)", + rb_obj_classname(obj), rb_class2name(rb_cGenerator)); + } + if (!ptr || ptr->proc == Qundef) { + rb_raise(rb_eArgError, "uninitialized generator"); + } + return ptr; +} + +/* :nodoc: */ +static VALUE +generator_allocate(VALUE klass) +{ + struct generator *ptr; + VALUE obj; + + obj = Data_Make_Struct(klass, struct generator, generator_mark, -1, ptr); + ptr->proc = Qundef; + + return obj; +} + +static VALUE +generator_init(VALUE obj, VALUE proc) +{ + struct generator *ptr; + + Data_Get_Struct(obj, struct generator, ptr); + + if (!ptr) { + rb_raise(rb_eArgError, "unallocated generator"); + } + + ptr->proc = proc; + + return obj; +} + +VALUE rb_obj_is_proc(VALUE proc); + +/* :nodoc: */ +static VALUE +generator_initialize(int argc, VALUE *argv, VALUE obj) +{ + VALUE proc; + + if (argc == 0) { + rb_need_block(); + + proc = rb_block_proc(); + } else { + rb_scan_args(argc, argv, "1", &proc); + + if (!rb_obj_is_proc(proc)) + rb_raise(rb_eTypeError, + "wrong argument type %s (expected Proc)", + rb_obj_classname(proc)); + + if (rb_block_given_p()) { + rb_warn("given block not used"); + } + } + + return generator_init(obj, proc); +} + +/* :nodoc: */ +static VALUE +generator_init_copy(VALUE obj, VALUE orig) +{ + struct generator *ptr0, *ptr1; + + ptr0 = generator_ptr(orig); + + Data_Get_Struct(obj, struct generator, ptr1); + + if (!ptr1) { + rb_raise(rb_eArgError, "unallocated generator"); + } + + ptr1->proc = ptr0->proc; + + return obj; +} + +/* :nodoc: */ +static VALUE +generator_each(VALUE obj) +{ + struct generator *ptr = generator_ptr(obj); + VALUE yielder; + + yielder = yielder_new(); + + rb_proc_call(ptr->proc, rb_ary_new3(1, yielder)); + + return obj; +} + +void +Init_Enumerator(void) +{ + rb_define_method(rb_mKernel, "to_enum", obj_to_enum, -1); + rb_define_method(rb_mKernel, "enum_for", obj_to_enum, -1); + + rb_define_method(rb_mEnumerable, "each_slice", enum_each_slice, 1); + rb_define_method(rb_mEnumerable, "each_cons", enum_each_cons, 1); + rb_define_method(rb_mEnumerable, "each_with_object", enum_each_with_object, 1); + + rb_cEnumerator = rb_define_class("Enumerator", rb_cObject); + rb_include_module(rb_cEnumerator, rb_mEnumerable); + + rb_define_alloc_func(rb_cEnumerator, enumerator_allocate); + rb_define_method(rb_cEnumerator, "initialize", enumerator_initialize, -1); + rb_define_method(rb_cEnumerator, "initialize_copy", enumerator_init_copy, 1); + rb_define_method(rb_cEnumerator, "each", enumerator_each, 0); + rb_define_method(rb_cEnumerator, "each_with_index", enumerator_with_index, 0); + rb_define_method(rb_cEnumerator, "each_with_object", enumerator_with_object, 1); + rb_define_method(rb_cEnumerator, "with_index", enumerator_with_index, 0); + rb_define_method(rb_cEnumerator, "with_object", enumerator_with_object, 1); + rb_define_method(rb_cEnumerator, "next", enumerator_next, 0); + rb_define_method(rb_cEnumerator, "rewind", enumerator_rewind, 0); + + rb_eStopIteration = rb_define_class("StopIteration", rb_eIndexError); + + /* Generator */ + rb_cGenerator = rb_define_class_under(rb_cEnumerator, "Generator", rb_cObject); + rb_include_module(rb_cGenerator, rb_mEnumerable); + rb_define_alloc_func(rb_cGenerator, generator_allocate); + rb_define_method(rb_cGenerator, "initialize", generator_initialize, -1); + rb_define_method(rb_cGenerator, "initialize_copy", generator_init_copy, 1); + rb_define_method(rb_cGenerator, "each", generator_each, 0); + + /* Yielder */ + rb_cYielder = rb_define_class_under(rb_cEnumerator, "Yielder", rb_cObject); + rb_define_alloc_func(rb_cYielder, yielder_allocate); + rb_define_method(rb_cYielder, "initialize", yielder_initialize, 0); + rb_define_method(rb_cYielder, "yield", yielder_yield, -2); + rb_define_method(rb_cYielder, "<<", yielder_yield, -2); + + sym_each = ID2SYM(rb_intern("each")); + id_rewind = rb_intern("rewind"); + + rb_provide("enumerator.so"); /* for backward compatibility */ +} diff --git a/error.c b/error.c new file mode 100644 index 0000000..99c27d2 --- /dev/null +++ b/error.c @@ -0,0 +1,1218 @@ +/********************************************************************** + + error.c - + + $Author: yugui $ + created at: Mon Aug 9 16:11:34 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "vm_core.h" + +#include +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +extern const char ruby_description[]; + +static int +err_position_0(char *buf, long len, const char *file, int line) +{ + if (!file) { + return 0; + } + else if (line == 0) { + return snprintf(buf, len, "%s: ", file); + } + else { + return snprintf(buf, len, "%s:%d: ", file, line); + } +} + +static int +err_position(char *buf, long len) +{ + return err_position_0(buf, len, rb_sourcefile(), rb_sourceline()); +} + +static void +err_snprintf(char *buf, long len, const char *fmt, va_list args) +{ + long n; + + n = err_position(buf, len); + if (len > n) { + vsnprintf((char*)buf+n, len-n, fmt, args); + } +} + +static void +compile_snprintf(char *buf, long len, const char *file, int line, const char *fmt, va_list args) +{ + long n; + + n = err_position_0(buf, len, file, line); + if (len > n) { + vsnprintf((char*)buf+n, len-n, fmt, args); + } +} + +static void err_append(const char*); + +void +rb_compile_error(const char *file, int line, const char *fmt, ...) +{ + va_list args; + char buf[BUFSIZ]; + + va_start(args, fmt); + compile_snprintf(buf, BUFSIZ, file, line, fmt, args); + va_end(args); + err_append(buf); +} + +void +rb_compile_error_append(const char *fmt, ...) +{ + va_list args; + char buf[BUFSIZ]; + + va_start(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + err_append(buf); +} + +static void +compile_warn_print(const char *file, int line, const char *fmt, va_list args) +{ + char buf[BUFSIZ]; + int len; + + compile_snprintf(buf, BUFSIZ, file, line, fmt, args); + len = strlen(buf); + buf[len++] = '\n'; + rb_write_error2(buf, len); +} + +void +rb_compile_warn(const char *file, int line, const char *fmt, ...) +{ + char buf[BUFSIZ]; + va_list args; + + if (NIL_P(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + + va_start(args, fmt); + compile_warn_print(file, line, buf, args); + va_end(args); +} + +/* rb_compile_warning() reports only in verbose mode */ +void +rb_compile_warning(const char *file, int line, const char *fmt, ...) +{ + char buf[BUFSIZ]; + va_list args; + + if (!RTEST(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + + va_start(args, fmt); + compile_warn_print(file, line, buf, args); + va_end(args); +} + +static void +warn_print(const char *fmt, va_list args) +{ + char buf[BUFSIZ]; + int len; + + err_snprintf(buf, BUFSIZ, fmt, args); + len = strlen(buf); + buf[len++] = '\n'; + rb_write_error2(buf, len); +} + +void +rb_warn(const char *fmt, ...) +{ + char buf[BUFSIZ]; + va_list args; + + if (NIL_P(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + + va_start(args, fmt); + warn_print(buf, args); + va_end(args); +} + +/* rb_warning() reports only in verbose mode */ +void +rb_warning(const char *fmt, ...) +{ + char buf[BUFSIZ]; + va_list args; + + if (!RTEST(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + + va_start(args, fmt); + warn_print(buf, args); + va_end(args); +} + +/* + * call-seq: + * warn(msg) => nil + * + * Display the given message (followed by a newline) on STDERR unless + * warnings are disabled (for example with the -W0 flag). + */ + +static VALUE +rb_warn_m(VALUE self, VALUE mesg) +{ + if (!NIL_P(ruby_verbose)) { + rb_io_write(rb_stderr, mesg); + rb_io_write(rb_stderr, rb_default_rs); + } + return Qnil; +} + +void rb_vm_bugreport(void); + +static void +report_bug(const char *file, int line, const char *fmt, va_list args) +{ + char buf[BUFSIZ]; + FILE *out = stderr; + int len = err_position_0(buf, BUFSIZ, file, line); + + if (fwrite(buf, 1, len, out) == len || + fwrite(buf, 1, len, (out = stdout)) == len) { + + fputs("[BUG] ", out); + vfprintf(out, fmt, args); + fprintf(out, "\n%s\n\n", ruby_description); + + rb_vm_bugreport(); + + fprintf(out, + "[NOTE]\n" + "You may encounter a bug of Ruby interpreter. Bug reports are welcome.\n" + "For details: http://www.ruby-lang.org/bugreport.html\n\n"); + } +} + +void +rb_bug(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + report_bug(rb_sourcefile(), rb_sourceline(), fmt, args); + va_end(args); + + abort(); +} + +void +rb_compile_bug(const char *file, int line, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + report_bug(file, line, fmt, args); + va_end(args); + + abort(); +} + +static const struct types { + int type; + const char *name; +} builtin_types[] = { + {T_NIL, "nil"}, + {T_OBJECT, "Object"}, + {T_CLASS, "Class"}, + {T_ICLASS, "iClass"}, /* internal use: mixed-in module holder */ + {T_MODULE, "Module"}, + {T_FLOAT, "Float"}, + {T_STRING, "String"}, + {T_REGEXP, "Regexp"}, + {T_ARRAY, "Array"}, + {T_FIXNUM, "Fixnum"}, + {T_HASH, "Hash"}, + {T_STRUCT, "Struct"}, + {T_BIGNUM, "Bignum"}, + {T_FILE, "File"}, + {T_RATIONAL,"Rational"}, + {T_COMPLEX, "Complex"}, + {T_TRUE, "true"}, + {T_FALSE, "false"}, + {T_SYMBOL, "Symbol"}, /* :symbol */ + {T_DATA, "Data"}, /* internal use: wrapped C pointers */ + {T_MATCH, "MatchData"}, /* data of $~ */ + {T_NODE, "Node"}, /* internal use: syntax tree node */ + {T_UNDEF, "undef"}, /* internal use: #undef; should not happen */ +}; + +void +rb_check_type(VALUE x, int t) +{ + const struct types *type = builtin_types; + const struct types *const typeend = builtin_types + + sizeof(builtin_types) / sizeof(builtin_types[0]); + + if (x == Qundef) { + rb_bug("undef leaked to the Ruby space"); + } + + if (TYPE(x) != t) { + while (type < typeend) { + if (type->type == t) { + const char *etype; + + if (NIL_P(x)) { + etype = "nil"; + } + else if (FIXNUM_P(x)) { + etype = "Fixnum"; + } + else if (SYMBOL_P(x)) { + etype = "Symbol"; + } + else if (rb_special_const_p(x)) { + etype = RSTRING_PTR(rb_obj_as_string(x)); + } + else { + etype = rb_obj_classname(x); + } + rb_raise(rb_eTypeError, "wrong argument type %s (expected %s)", + etype, type->name); + } + type++; + } + rb_bug("unknown type 0x%x (0x%x given)", t, TYPE(x)); + } +} + +/* exception classes */ +#include + +VALUE rb_eException; +VALUE rb_eSystemExit; +VALUE rb_eInterrupt; +VALUE rb_eSignal; +VALUE rb_eFatal; +VALUE rb_eStandardError; +VALUE rb_eRuntimeError; +VALUE rb_eTypeError; +VALUE rb_eArgError; +VALUE rb_eIndexError; +VALUE rb_eKeyError; +VALUE rb_eRangeError; +VALUE rb_eNameError; +VALUE rb_eEncodingError; +VALUE rb_eEncCompatError; +VALUE rb_eNoMethodError; +VALUE rb_eSecurityError; +VALUE rb_eNotImpError; +VALUE rb_eNoMemError; +VALUE rb_cNameErrorMesg; + +VALUE rb_eScriptError; +VALUE rb_eSyntaxError; +VALUE rb_eLoadError; + +VALUE rb_eSystemCallError; +VALUE rb_mErrno; +static VALUE rb_eNOERROR; + +VALUE +rb_exc_new(VALUE etype, const char *ptr, long len) +{ + return rb_funcall(etype, rb_intern("new"), 1, rb_str_new(ptr, len)); +} + +VALUE +rb_exc_new2(VALUE etype, const char *s) +{ + return rb_exc_new(etype, s, strlen(s)); +} + +VALUE +rb_exc_new3(VALUE etype, VALUE str) +{ + StringValue(str); + return rb_funcall(etype, rb_intern("new"), 1, str); +} + +/* + * call-seq: + * Exception.new(msg = nil) => exception + * + * Construct a new Exception object, optionally passing in + * a message. + */ + +static VALUE +exc_initialize(int argc, VALUE *argv, VALUE exc) +{ + VALUE arg; + + rb_scan_args(argc, argv, "01", &arg); + rb_iv_set(exc, "mesg", arg); + rb_iv_set(exc, "bt", Qnil); + + return exc; +} + +/* + * Document-method: exception + * + * call-seq: + * exc.exception(string) -> an_exception or exc + * + * With no argument, or if the argument is the same as the receiver, + * return the receiver. Otherwise, create a new + * exception object of the same class as the receiver, but with a + * message equal to string.to_str. + * + */ + +static VALUE +exc_exception(int argc, VALUE *argv, VALUE self) +{ + VALUE exc; + + if (argc == 0) return self; + if (argc == 1 && self == argv[0]) return self; + exc = rb_obj_clone(self); + exc_initialize(argc, argv, exc); + + return exc; +} + +/* + * call-seq: + * exception.to_s => string + * + * Returns exception's message (or the name of the exception if + * no message is set). + */ + +static VALUE +exc_to_s(VALUE exc) +{ + VALUE mesg = rb_attr_get(exc, rb_intern("mesg")); + + if (NIL_P(mesg)) return rb_class_name(CLASS_OF(exc)); + if (OBJ_TAINTED(exc)) OBJ_TAINT(mesg); + return mesg; +} + +/* + * call-seq: + * exception.message => string + * + * Returns the result of invoking exception.to_s. + * Normally this returns the exception's message or name. By + * supplying a to_str method, exceptions are agreeing to + * be used where Strings are expected. + */ + +static VALUE +exc_message(VALUE exc) +{ + return rb_funcall(exc, rb_intern("to_s"), 0, 0); +} + +/* + * call-seq: + * exception.inspect => string + * + * Return this exception's class name an message + */ + +static VALUE +exc_inspect(VALUE exc) +{ + VALUE str, klass; + + klass = CLASS_OF(exc); + exc = rb_obj_as_string(exc); + if (RSTRING_LEN(exc) == 0) { + return rb_str_dup(rb_class_name(klass)); + } + + str = rb_str_buf_new2("#<"); + klass = rb_class_name(klass); + rb_str_buf_append(str, klass); + rb_str_buf_cat(str, ": ", 2); + rb_str_buf_append(str, exc); + rb_str_buf_cat(str, ">", 1); + + return str; +} + +/* + * call-seq: + * exception.backtrace => array + * + * Returns any backtrace associated with the exception. The backtrace + * is an array of strings, each containing either ``filename:lineNo: in + * `method''' or ``filename:lineNo.'' + * + * def a + * raise "boom" + * end + * + * def b + * a() + * end + * + * begin + * b() + * rescue => detail + * print detail.backtrace.join("\n") + * end + * + * produces: + * + * prog.rb:2:in `a' + * prog.rb:6:in `b' + * prog.rb:10 +*/ + +static VALUE +exc_backtrace(VALUE exc) +{ + ID bt; + + CONST_ID(bt, "bt"); + return rb_attr_get(exc, bt); +} + +VALUE +rb_check_backtrace(VALUE bt) +{ + long i; + static const char err[] = "backtrace must be Array of String"; + + if (!NIL_P(bt)) { + int t = TYPE(bt); + + if (t == T_STRING) return rb_ary_new3(1, bt); + if (t != T_ARRAY) { + rb_raise(rb_eTypeError, err); + } + for (i=0;i array + * + * Sets the backtrace information associated with exc. The + * argument must be an array of String objects in the + * format described in Exception#backtrace. + * + */ + +static VALUE +exc_set_backtrace(VALUE exc, VALUE bt) +{ + return rb_iv_set(exc, "bt", rb_check_backtrace(bt)); +} + +/* + * call-seq: + * exc == obj => true or false + * + * Equality---If obj is not an Exception, returns + * false. Otherwise, returns true if exc and + * obj share same class, messages, and backtrace. + */ + +static VALUE +exc_equal(VALUE exc, VALUE obj) +{ + VALUE mesg, backtrace; + ID id_mesg; + + if (exc == obj) return Qtrue; + CONST_ID(id_mesg, "mesg"); + + if (rb_obj_class(exc) != rb_obj_class(obj)) { + ID id_message, id_backtrace; + CONST_ID(id_message, "message"); + CONST_ID(id_backtrace, "backtrace"); + + if (rb_respond_to(obj, id_message) && rb_respond_to(obj, id_backtrace)) { + mesg = rb_funcall(obj, id_message, 0, 0); + backtrace = rb_funcall(obj, id_backtrace, 0, 0); + } + else { + return Qfalse; + } + } + else { + mesg = rb_attr_get(obj, id_mesg); + backtrace = exc_backtrace(obj); + } + + if (!rb_equal(rb_attr_get(exc, id_mesg), mesg)) + return Qfalse; + if (!rb_equal(exc_backtrace(exc), backtrace)) + return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * SystemExit.new(status=0) => system_exit + * + * Create a new +SystemExit+ exception with the given status. + */ + +static VALUE +exit_initialize(int argc, VALUE *argv, VALUE exc) +{ + VALUE status = INT2FIX(EXIT_SUCCESS); + if (argc > 0 && FIXNUM_P(argv[0])) { + status = *argv++; + --argc; + } + rb_call_super(argc, argv); + rb_iv_set(exc, "status", status); + return exc; +} + + +/* + * call-seq: + * system_exit.status => fixnum + * + * Return the status value associated with this system exit. + */ + +static VALUE +exit_status(VALUE exc) +{ + return rb_attr_get(exc, rb_intern("status")); +} + + +/* + * call-seq: + * system_exit.success? => true or false + * + * Returns +true+ if exiting successful, +false+ if not. + */ + +static VALUE +exit_success_p(VALUE exc) +{ + VALUE status = rb_attr_get(exc, rb_intern("status")); + if (NIL_P(status)) return Qtrue; + if (status == INT2FIX(EXIT_SUCCESS)) return Qtrue; + return Qfalse; +} + +void +rb_name_error(ID id, const char *fmt, ...) +{ + VALUE exc, argv[2]; + va_list args; + + va_start(args, fmt); + argv[0] = rb_vsprintf(fmt, args); + va_end(args); + + argv[1] = ID2SYM(id); + exc = rb_class_new_instance(2, argv, rb_eNameError); + rb_exc_raise(exc); +} + +/* + * call-seq: + * NameError.new(msg [, name]) => name_error + * + * Construct a new NameError exception. If given the name + * parameter may subsequently be examined using the NameError.name + * method. + */ + +static VALUE +name_err_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE name; + + name = (argc > 1) ? argv[--argc] : Qnil; + rb_call_super(argc, argv); + rb_iv_set(self, "name", name); + return self; +} + +/* + * call-seq: + * name_error.name => string or nil + * + * Return the name associated with this NameError exception. + */ + +static VALUE +name_err_name(VALUE self) +{ + return rb_attr_get(self, rb_intern("name")); +} + +/* + * call-seq: + * name_error.to_s => string + * + * Produce a nicely-formatted string representing the +NameError+. + */ + +static VALUE +name_err_to_s(VALUE exc) +{ + VALUE mesg = rb_attr_get(exc, rb_intern("mesg")); + VALUE str = mesg; + + if (NIL_P(mesg)) return rb_class_name(CLASS_OF(exc)); + StringValue(str); + if (str != mesg) { + rb_iv_set(exc, "mesg", mesg = str); + } + if (OBJ_TAINTED(exc)) OBJ_TAINT(mesg); + return mesg; +} + +/* + * call-seq: + * NoMethodError.new(msg, name [, args]) => no_method_error + * + * Construct a NoMethodError exception for a method of the given name + * called with the given arguments. The name may be accessed using + * the #name method on the resulting object, and the + * arguments using the #args method. + */ + +static VALUE +nometh_err_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE args = (argc > 2) ? argv[--argc] : Qnil; + name_err_initialize(argc, argv, self); + rb_iv_set(self, "args", args); + return self; +} + +/* :nodoc: */ +static void +name_err_mesg_mark(VALUE *ptr) +{ + rb_gc_mark_locations(ptr, ptr+3); +} + +/* :nodoc: */ +static VALUE +name_err_mesg_new(VALUE obj, VALUE mesg, VALUE recv, VALUE method) +{ + VALUE *ptr = ALLOC_N(VALUE, 3); + + ptr[0] = mesg; + ptr[1] = recv; + ptr[2] = method; + return Data_Wrap_Struct(rb_cNameErrorMesg, name_err_mesg_mark, -1, ptr); +} + +/* :nodoc: */ +static VALUE +name_err_mesg_equal(VALUE obj1, VALUE obj2) +{ + VALUE *ptr1, *ptr2; + int i; + + if (obj1 == obj2) return Qtrue; + if (rb_obj_class(obj2) != rb_cNameErrorMesg) + return Qfalse; + + Data_Get_Struct(obj1, VALUE, ptr1); + Data_Get_Struct(obj2, VALUE, ptr2); + for (i=0; i<3; i++) { + if (!rb_equal(ptr1[i], ptr2[i])) + return Qfalse; + } + return Qtrue; +} + +/* :nodoc: */ +static VALUE +name_err_mesg_to_str(VALUE obj) +{ + VALUE *ptr, mesg; + Data_Get_Struct(obj, VALUE, ptr); + + mesg = ptr[0]; + if (NIL_P(mesg)) return Qnil; + else { + const char *desc = 0; + VALUE d = 0, args[3]; + + obj = ptr[1]; + switch (TYPE(obj)) { + case T_NIL: + desc = "nil"; + break; + case T_TRUE: + desc = "true"; + break; + case T_FALSE: + desc = "false"; + break; + default: + d = rb_protect(rb_inspect, obj, 0); + if (NIL_P(d) || RSTRING_LEN(d) > 65) { + d = rb_any_to_s(obj); + } + desc = RSTRING_PTR(d); + break; + } + if (desc && desc[0] != '#') { + d = rb_str_new2(desc); + rb_str_cat2(d, ":"); + rb_str_cat2(d, rb_obj_classname(obj)); + } + args[0] = mesg; + args[1] = ptr[2]; + args[2] = d; + mesg = rb_f_sprintf(3, args); + } + if (OBJ_TAINTED(obj)) OBJ_TAINT(mesg); + return mesg; +} + +/* :nodoc: */ +static VALUE +name_err_mesg_load(VALUE klass, VALUE str) +{ + return str; +} + +/* + * call-seq: + * no_method_error.args => obj + * + * Return the arguments passed in as the third parameter to + * the constructor. + */ + +static VALUE +nometh_err_args(VALUE self) +{ + return rb_attr_get(self, rb_intern("args")); +} + +void +rb_invalid_str(const char *str, const char *type) +{ + VALUE s = rb_str_inspect(rb_str_new2(str)); + + rb_raise(rb_eArgError, "invalid value for %s: %s", type, RSTRING_PTR(s)); +} + +/* + * Document-module: Errno + * + * Ruby exception objects are subclasses of Exception. + * However, operating systems typically report errors using plain + * integers. Module Errno is created dynamically to map + * these operating system errors to Ruby classes, with each error + * number generating its own subclass of SystemCallError. + * As the subclass is created in module Errno, its name + * will start Errno::. + * + * The names of the Errno:: classes depend on + * the environment in which Ruby runs. On a typical Unix or Windows + * platform, there are Errno classes such as + * Errno::EACCES, Errno::EAGAIN, + * Errno::EINTR, and so on. + * + * The integer operating system error number corresponding to a + * particular error is available as the class constant + * Errno::error::Errno. + * + * Errno::EACCES::Errno #=> 13 + * Errno::EAGAIN::Errno #=> 11 + * Errno::EINTR::Errno #=> 4 + * + * The full list of operating system errors on your particular platform + * are available as the constants of Errno. + * + * Errno.constants #=> :E2BIG, :EACCES, :EADDRINUSE, :EADDRNOTAVAIL, ... + */ + +static st_table *syserr_tbl; + +static VALUE +set_syserr(int n, const char *name) +{ + VALUE error; + + if (!st_lookup(syserr_tbl, n, &error)) { + error = rb_define_class_under(rb_mErrno, name, rb_eSystemCallError); + rb_define_const(error, "Errno", INT2NUM(n)); + st_add_direct(syserr_tbl, n, error); + } + else { + rb_define_const(rb_mErrno, name, error); + } + return error; +} + +static VALUE +get_syserr(int n) +{ + VALUE error; + + if (!st_lookup(syserr_tbl, n, &error)) { + char name[8]; /* some Windows' errno have 5 digits. */ + + snprintf(name, sizeof(name), "E%03d", n); + error = set_syserr(n, name); + } + return error; +} + +/* + * call-seq: + * SystemCallError.new(msg, errno) => system_call_error_subclass + * + * If _errno_ corresponds to a known system error code, constructs + * the appropriate Errno class for that error, otherwise + * constructs a generic SystemCallError object. The + * error number is subsequently available via the errno + * method. + */ + +static VALUE +syserr_initialize(int argc, VALUE *argv, VALUE self) +{ +#if !defined(_WIN32) + char *strerror(); +#endif + const char *err; + VALUE mesg, error; + VALUE klass = rb_obj_class(self); + + if (klass == rb_eSystemCallError) { + rb_scan_args(argc, argv, "11", &mesg, &error); + if (argc == 1 && FIXNUM_P(mesg)) { + error = mesg; mesg = Qnil; + } + if (!NIL_P(error) && st_lookup(syserr_tbl, NUM2LONG(error), &klass)) { + /* change class */ + if (TYPE(self) != T_OBJECT) { /* insurance to avoid type crash */ + rb_raise(rb_eTypeError, "invalid instance type"); + } + RBASIC(self)->klass = klass; + } + } + else { + rb_scan_args(argc, argv, "01", &mesg); + error = rb_const_get(klass, rb_intern("Errno")); + } + if (!NIL_P(error)) err = strerror(NUM2INT(error)); + else err = "unknown error"; + if (!NIL_P(mesg)) { + VALUE str = mesg; + + StringValue(str); + mesg = rb_sprintf("%s - %.*s", err, + (int)RSTRING_LEN(str), RSTRING_PTR(str)); + } + else { + mesg = rb_str_new2(err); + } + rb_call_super(1, &mesg); + rb_iv_set(self, "errno", error); + return self; +} + +/* + * call-seq: + * system_call_error.errno => fixnum + * + * Return this SystemCallError's error number. + */ + +static VALUE +syserr_errno(VALUE self) +{ + return rb_attr_get(self, rb_intern("errno")); +} + +/* + * call-seq: + * system_call_error === other => true or false + * + * Return +true+ if the receiver is a generic +SystemCallError+, or + * if the error numbers _self_ and _other_ are the same. + */ + +static VALUE +syserr_eqq(VALUE self, VALUE exc) +{ + VALUE num, e; + ID en; + + CONST_ID(en, "errno"); + + if (!rb_obj_is_kind_of(exc, rb_eSystemCallError)) { + if (!rb_respond_to(exc, en)) return Qfalse; + } + else if (self == rb_eSystemCallError) return Qtrue; + + num = rb_attr_get(exc, rb_intern("errno")); + if (NIL_P(num)) { + num = rb_funcall(exc, en, 0, 0); + } + e = rb_const_get(self, rb_intern("Errno")); + if (FIXNUM_P(num) ? num == e : rb_equal(num, e)) + return Qtrue; + return Qfalse; +} + +/* + * Descendants of class Exception are used to communicate + * between raise methods and rescue + * statements in begin/end blocks. Exception + * objects carry information about the exception---its type (the + * exception's class name), an optional descriptive string, and + * optional traceback information. Programs may subclass + * Exception to add additional information. + */ + +void +Init_Exception(void) +{ + rb_eException = rb_define_class("Exception", rb_cObject); + rb_define_singleton_method(rb_eException, "exception", rb_class_new_instance, -1); + rb_define_method(rb_eException, "exception", exc_exception, -1); + rb_define_method(rb_eException, "initialize", exc_initialize, -1); + rb_define_method(rb_eException, "==", exc_equal, 1); + rb_define_method(rb_eException, "to_s", exc_to_s, 0); + rb_define_method(rb_eException, "message", exc_message, 0); + rb_define_method(rb_eException, "inspect", exc_inspect, 0); + rb_define_method(rb_eException, "backtrace", exc_backtrace, 0); + rb_define_method(rb_eException, "set_backtrace", exc_set_backtrace, 1); + + rb_eSystemExit = rb_define_class("SystemExit", rb_eException); + rb_define_method(rb_eSystemExit, "initialize", exit_initialize, -1); + rb_define_method(rb_eSystemExit, "status", exit_status, 0); + rb_define_method(rb_eSystemExit, "success?", exit_success_p, 0); + + rb_eFatal = rb_define_class("fatal", rb_eException); + rb_eSignal = rb_define_class("SignalException", rb_eException); + rb_eInterrupt = rb_define_class("Interrupt", rb_eSignal); + + rb_eStandardError = rb_define_class("StandardError", rb_eException); + rb_eTypeError = rb_define_class("TypeError", rb_eStandardError); + rb_eArgError = rb_define_class("ArgumentError", rb_eStandardError); + rb_eIndexError = rb_define_class("IndexError", rb_eStandardError); + rb_eKeyError = rb_define_class("KeyError", rb_eIndexError); + rb_eRangeError = rb_define_class("RangeError", rb_eStandardError); + + rb_eScriptError = rb_define_class("ScriptError", rb_eException); + rb_eSyntaxError = rb_define_class("SyntaxError", rb_eScriptError); + rb_eLoadError = rb_define_class("LoadError", rb_eScriptError); + rb_eNotImpError = rb_define_class("NotImplementedError", rb_eScriptError); + + rb_eNameError = rb_define_class("NameError", rb_eStandardError); + rb_define_method(rb_eNameError, "initialize", name_err_initialize, -1); + rb_define_method(rb_eNameError, "name", name_err_name, 0); + rb_define_method(rb_eNameError, "to_s", name_err_to_s, 0); + rb_cNameErrorMesg = rb_define_class_under(rb_eNameError, "message", rb_cData); + rb_define_singleton_method(rb_cNameErrorMesg, "!", name_err_mesg_new, 3); + rb_define_method(rb_cNameErrorMesg, "==", name_err_mesg_equal, 1); + rb_define_method(rb_cNameErrorMesg, "to_str", name_err_mesg_to_str, 0); + rb_define_method(rb_cNameErrorMesg, "_dump", name_err_mesg_to_str, 1); + rb_define_singleton_method(rb_cNameErrorMesg, "_load", name_err_mesg_load, 1); + rb_eNoMethodError = rb_define_class("NoMethodError", rb_eNameError); + rb_define_method(rb_eNoMethodError, "initialize", nometh_err_initialize, -1); + rb_define_method(rb_eNoMethodError, "args", nometh_err_args, 0); + + rb_eRuntimeError = rb_define_class("RuntimeError", rb_eStandardError); + rb_eSecurityError = rb_define_class("SecurityError", rb_eException); + rb_eNoMemError = rb_define_class("NoMemoryError", rb_eException); + rb_eEncodingError = rb_define_class("EncodingError", rb_eStandardError); + rb_eEncCompatError = rb_define_class_under(rb_cEncoding, "CompatibilityError", rb_eEncodingError); + + syserr_tbl = st_init_numtable(); + rb_eSystemCallError = rb_define_class("SystemCallError", rb_eStandardError); + rb_define_method(rb_eSystemCallError, "initialize", syserr_initialize, -1); + rb_define_method(rb_eSystemCallError, "errno", syserr_errno, 0); + rb_define_singleton_method(rb_eSystemCallError, "===", syserr_eqq, 1); + + rb_mErrno = rb_define_module("Errno"); + + rb_define_global_function("warn", rb_warn_m, 1); +} + +void +rb_raise(VALUE exc, const char *fmt, ...) +{ + va_list args; + VALUE mesg; + + va_start(args, fmt); + mesg = rb_vsprintf(fmt, args); + va_end(args); + rb_exc_raise(rb_exc_new3(exc, mesg)); +} + +void +rb_loaderror(const char *fmt, ...) +{ + va_list args; + VALUE mesg; + + va_start(args, fmt); + mesg = rb_vsprintf(fmt, args); + va_end(args); + rb_exc_raise(rb_exc_new3(rb_eLoadError, mesg)); +} + +void +rb_notimplement(void) +{ + rb_raise(rb_eNotImpError, + "%s() function is unimplemented on this machine", + rb_id2name(rb_frame_this_func())); +} + +void +rb_fatal(const char *fmt, ...) +{ + va_list args; + VALUE mesg; + + va_start(args, fmt); + mesg = rb_vsprintf(fmt, args); + va_end(args); + + rb_exc_fatal(rb_exc_new3(rb_eFatal, mesg)); +} + +void +rb_sys_fail(const char *mesg) +{ + int n = errno; + VALUE arg; + + errno = 0; + if (n == 0) { + rb_bug("rb_sys_fail(%s) - errno == 0", mesg ? mesg : ""); + } + + arg = mesg ? rb_str_new2(mesg) : Qnil; + rb_exc_raise(rb_class_new_instance(1, &arg, get_syserr(n))); +} + +void +rb_sys_warning(const char *fmt, ...) +{ + char buf[BUFSIZ]; + va_list args; + int errno_save; + + errno_save = errno; + + if (!RTEST(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + snprintf(buf+strlen(buf), BUFSIZ-strlen(buf), ": %s", strerror(errno_save)); + + va_start(args, fmt); + warn_print(buf, args); + va_end(args); + errno = errno_save; +} + +void +rb_load_fail(const char *path) +{ + rb_loaderror("%s -- %s", strerror(errno), path); +} + +void +rb_error_frozen(const char *what) +{ + rb_raise(rb_eRuntimeError, "can't modify frozen %s", what); +} + +void +rb_check_frozen(VALUE obj) +{ + if (OBJ_FROZEN(obj)) rb_error_frozen(rb_obj_classname(obj)); +} + +void Init_syserr(void) +{ + rb_eNOERROR = set_syserr(0, "NOERROR"); +#include "known_errors.inc" +} + +static void +err_append(const char *s) +{ + rb_thread_t *th = GET_THREAD(); + VALUE err = th->errinfo; + + if (th->mild_compile_error) { + if (!RTEST(err)) { + err = rb_exc_new2(rb_eSyntaxError, s); + th->errinfo = err; + } + else { + VALUE str = rb_obj_as_string(err); + + rb_str_cat2(str, "\n"); + rb_str_cat2(str, s); + th->errinfo = rb_exc_new3(rb_eSyntaxError, str); + } + } + else { + if (!RTEST(err)) { + err = rb_exc_new2(rb_eSyntaxError, "compile error"); + th->errinfo = err; + } + rb_write_error(s); + rb_write_error("\n"); + } +} diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..52867a3 --- /dev/null +++ b/eval.c @@ -0,0 +1,1077 @@ +/********************************************************************** + + eval.c - + + $Author: yugui $ + created at: Thu Jun 10 14:22:17 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "eval_intern.h" +#include "iseq.h" +#include "gc.h" + +VALUE proc_invoke(VALUE, VALUE, VALUE, VALUE); +VALUE rb_binding_new(void); +NORETURN(void rb_raise_jump(VALUE)); + +ID rb_frame_callee(void); +VALUE rb_eLocalJumpError; +VALUE rb_eSysStackError; + +#define exception_error GET_VM()->special_exceptions[ruby_error_reenter] + +#include "eval_error.c" +#include "eval_jump.c" + +/* initialize ruby */ + +#if defined(__APPLE__) +#define environ (*_NSGetEnviron()) +#elif !defined(_WIN32) +extern char **environ; +#endif +char **rb_origenviron; + +void rb_clear_trace_func(void); +void rb_thread_stop_timer_thread(void); + +void rb_call_inits(void); +void Init_heap(void); +void Init_ext(void); +void Init_BareVM(void); + +void +ruby_init(void) +{ + static int initialized = 0; + int state; + + if (initialized) + return; + initialized = 1; + + rb_origenviron = environ; + + Init_stack((void *)&state); + Init_BareVM(); + Init_heap(); + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + rb_call_inits(); + ruby_prog_init(); + } + POP_TAG(); + + if (state) { + error_print(); + exit(EXIT_FAILURE); + } + GET_VM()->running = 1; +} + +extern void rb_clear_trace_func(void); + +void * +ruby_options(int argc, char **argv) +{ + int state; + void *tree = 0; + + Init_stack((void *)&tree); + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + SAVE_ROOT_JMPBUF(GET_THREAD(), tree = ruby_process_options(argc, argv)); + } + else { + rb_clear_trace_func(); + state = error_handle(state); + tree = (void *)INT2FIX(state); + } + POP_TAG(); + return tree; +} + +static void +ruby_finalize_0(void) +{ + PUSH_TAG(); + if (EXEC_TAG() == 0) { + rb_trap_exit(); + } + POP_TAG(); + rb_exec_end_proc(); + rb_clear_trace_func(); +} + +static void +ruby_finalize_1(void) +{ + ruby_sig_finalize(); + GET_THREAD()->errinfo = Qnil; + rb_gc_call_finalizer_at_exit(); +} + +void +ruby_finalize(void) +{ + ruby_finalize_0(); + ruby_finalize_1(); +} + +void rb_thread_stop_timer_thread(void); + +int +ruby_cleanup(int ex) +{ + int state; + volatile VALUE errs[2]; + rb_thread_t *th = GET_THREAD(); + int nerr; + + errs[1] = th->errinfo; + th->safe_level = 0; + Init_stack(&errs[STACK_UPPER(errs, 0, 1)]); + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + SAVE_ROOT_JMPBUF(th, ruby_finalize_0()); + } + POP_TAG(); + + errs[0] = th->errinfo; + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + SAVE_ROOT_JMPBUF(th, rb_thread_terminate_all()); + } + else if (ex == 0) { + ex = state; + } + th->errinfo = errs[1]; + ex = error_handle(ex); + ruby_finalize_1(); + POP_TAG(); + rb_thread_stop_timer_thread(); + + for (nerr = 0; nerr < sizeof(errs) / sizeof(errs[0]); ++nerr) { + VALUE err = errs[nerr]; + + if (!RTEST(err)) continue; + + /* th->errinfo contains a NODE while break'ing */ + if (TYPE(err) == T_NODE) continue; + + if (rb_obj_is_kind_of(err, rb_eSystemExit)) { + return sysexit_status(err); + } + else if (rb_obj_is_kind_of(err, rb_eSignal)) { + VALUE sig = rb_iv_get(err, "signo"); + ruby_default_signal(NUM2INT(sig)); + } + else if (ex == 0) { + ex = 1; + } + } + +#if EXIT_SUCCESS != 0 || EXIT_FAILURE != 1 + switch (ex) { +#if EXIT_SUCCESS != 0 + case 0: return EXIT_SUCCESS; +#endif +#if EXIT_FAILURE != 1 + case 1: return EXIT_FAILURE; +#endif + } +#endif + + return ex; +} + +int +ruby_exec_node(void *n, const char *file) +{ + int state; + VALUE iseq = (VALUE)n; + rb_thread_t *th = GET_THREAD(); + + if (!n) return 0; + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + SAVE_ROOT_JMPBUF(th, { + th->base_block = 0; + rb_iseq_eval_main(iseq); + }); + } + POP_TAG(); + return state; +} + +void +ruby_stop(int ex) +{ + exit(ruby_cleanup(ex)); +} + +int +ruby_run_node(void *n) +{ + VALUE v = (VALUE)n; + + switch (v) { + case Qtrue: return EXIT_SUCCESS; + case Qfalse: return EXIT_FAILURE; + } + if (FIXNUM_P(v)) { + return FIX2INT(v); + } + Init_stack((void *)&n); + return ruby_cleanup(ruby_exec_node(n, 0)); +} + +/* + * call-seq: + * Module.nesting => array + * + * Returns the list of +Modules+ nested at the point of call. + * + * module M1 + * module M2 + * $a = Module.nesting + * end + * end + * $a #=> [M1::M2, M1] + * $a[0].name #=> "M1::M2" + */ + +static VALUE +rb_mod_nesting(void) +{ + VALUE ary = rb_ary_new(); + const NODE *cref = rb_vm_cref(); + + while (cref && cref->nd_next) { + VALUE klass = cref->nd_clss; + if (!NIL_P(klass)) { + rb_ary_push(ary, klass); + } + cref = cref->nd_next; + } + return ary; +} + +/* + * call-seq: + * Module.constants => array + * + * Returns an array of the names of all constants defined in the + * system. This list includes the names of all modules and classes. + * + * p Module.constants.sort[1..5] + * + * produces: + * + * ["ARGV", "ArgumentError", "Array", "Bignum", "Binding"] + */ + +static VALUE +rb_mod_s_constants(int argc, VALUE *argv, VALUE mod) +{ + const NODE *cref = rb_vm_cref(); + VALUE klass; + VALUE cbase = 0; + void *data = 0; + + if (argc > 0) { + return rb_mod_constants(argc, argv, rb_cModule); + } + + while (cref) { + klass = cref->nd_clss; + if (!NIL_P(klass)) { + data = rb_mod_const_at(cref->nd_clss, data); + if (!cbase) { + cbase = klass; + } + } + cref = cref->nd_next; + } + + if (cbase) { + data = rb_mod_const_of(cbase, data); + } + return rb_const_list(data); +} + +void +rb_frozen_class_p(VALUE klass) +{ + const char *desc = "something(?!)"; + + if (OBJ_FROZEN(klass)) { + if (FL_TEST(klass, FL_SINGLETON)) + desc = "object"; + else { + switch (TYPE(klass)) { + case T_MODULE: + case T_ICLASS: + desc = "module"; + break; + case T_CLASS: + desc = "class"; + break; + } + } + rb_error_frozen(desc); + } +} + +NORETURN(static void rb_longjmp(int, VALUE)); +VALUE rb_make_backtrace(void); + +static void +rb_longjmp(int tag, VALUE mesg) +{ + VALUE at; + VALUE e; + rb_thread_t *th = GET_THREAD(); + const char *file; + int line = 0; + + if (rb_thread_set_raised(th)) { + th->errinfo = exception_error; + rb_thread_reset_raised(th); + JUMP_TAG(TAG_FATAL); + } + + if (NIL_P(mesg)) + mesg = th->errinfo; + if (NIL_P(mesg)) { + mesg = rb_exc_new(rb_eRuntimeError, 0, 0); + } + + file = rb_sourcefile(); + if (file) line = rb_sourceline(); + if (file && !NIL_P(mesg)) { + at = get_backtrace(mesg); + if (NIL_P(at)) { + at = rb_make_backtrace(); + if (OBJ_FROZEN(mesg)) { + mesg = rb_obj_dup(mesg); + } + set_backtrace(mesg, at); + } + } + if (!NIL_P(mesg)) { + th->errinfo = mesg; + } + + if (RTEST(ruby_debug) && !NIL_P(e = th->errinfo) && + !rb_obj_is_kind_of(e, rb_eSystemExit)) { + int status; + + PUSH_TAG(); + if ((status = EXEC_TAG()) == 0) { + RB_GC_GUARD(e) = rb_obj_as_string(e); + if (file) { + warn_printf("Exception `%s' at %s:%d - %s\n", + rb_obj_classname(th->errinfo), + file, line, RSTRING_PTR(e)); + } + else { + warn_printf("Exception `%s' - %s\n", + rb_obj_classname(th->errinfo), + RSTRING_PTR(e)); + } + } + POP_TAG(); + if (status == TAG_FATAL && th->errinfo == exception_error) { + th->errinfo = mesg; + } + else if (status) { + rb_thread_reset_raised(th); + JUMP_TAG(status); + } + } + + rb_trap_restore_mask(); + + if (tag != TAG_FATAL) { + EXEC_EVENT_HOOK(th, RUBY_EVENT_RAISE, th->cfp->self, + 0 /* TODO: id */, 0 /* TODO: klass */); + } + + rb_thread_raised_clear(th); + JUMP_TAG(tag); +} + +void +rb_exc_raise(VALUE mesg) +{ + if (!NIL_P(mesg)) { + mesg = rb_make_exception(1, &mesg); + } + rb_longjmp(TAG_RAISE, mesg); +} + +void +rb_exc_fatal(VALUE mesg) +{ + if (!NIL_P(mesg)) { + mesg = rb_make_exception(1, &mesg); + } + rb_longjmp(TAG_FATAL, mesg); +} + +void +rb_interrupt(void) +{ + rb_raise(rb_eInterrupt, "%s", ""); +} + +static VALUE get_errinfo(void); + +/* + * call-seq: + * raise + * raise(string) + * raise(exception [, string [, array]]) + * fail + * fail(string) + * fail(exception [, string [, array]]) + * + * With no arguments, raises the exception in $! or raises + * a RuntimeError if $! is +nil+. + * With a single +String+ argument, raises a + * +RuntimeError+ with the string as a message. Otherwise, + * the first parameter should be the name of an +Exception+ + * class (or an object that returns an +Exception+ object when sent + * an +exception+ message). The optional second parameter sets the + * message associated with the exception, and the third parameter is an + * array of callback information. Exceptions are caught by the + * +rescue+ clause of begin...end blocks. + * + * raise "Failed to create socket" + * raise ArgumentError, "No parameters", caller + */ + +static VALUE +rb_f_raise(int argc, VALUE *argv) +{ + VALUE err; + if (argc == 0) { + err = get_errinfo(); + if (!NIL_P(err)) { + argc = 1; + argv = &err; + } + } + rb_raise_jump(rb_make_exception(argc, argv)); + return Qnil; /* not reached */ +} + +VALUE +rb_make_exception(int argc, VALUE *argv) +{ + VALUE mesg; + ID exception; + int n; + + mesg = Qnil; + switch (argc) { + case 0: + break; + case 1: + if (NIL_P(argv[0])) + break; + mesg = rb_check_string_type(argv[0]); + if (!NIL_P(mesg)) { + mesg = rb_exc_new3(rb_eRuntimeError, mesg); + break; + } + n = 0; + goto exception_call; + + case 2: + case 3: + n = 1; + exception_call: + CONST_ID(exception, "exception"); + if (!rb_respond_to(argv[0], exception)) { + rb_raise(rb_eTypeError, "exception class/object expected"); + } + mesg = rb_funcall(argv[0], exception, n, argv[1]); + break; + default: + rb_raise(rb_eArgError, "wrong number of arguments"); + break; + } + if (argc > 0) { + if (!rb_obj_is_kind_of(mesg, rb_eException)) + rb_raise(rb_eTypeError, "exception object expected"); + if (argc > 2) + set_backtrace(mesg, argv[2]); + } + + return mesg; +} + +void +rb_raise_jump(VALUE mesg) +{ + rb_thread_t *th = GET_THREAD(); + th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); + /* TODO: fix me */ + rb_longjmp(TAG_RAISE, mesg); +} + +void +rb_jump_tag(int tag) +{ + JUMP_TAG(tag); +} + +int +rb_block_given_p(void) +{ + rb_thread_t *th = GET_THREAD(); + + if ((th->cfp->lfp[0] & 0x02) == 0 && + GC_GUARDED_PTR_REF(th->cfp->lfp[0])) { + return Qtrue; + } + else { + return Qfalse; + } +} + +int +rb_iterator_p() +{ + return rb_block_given_p(); +} + +VALUE rb_eThreadError; + +void +rb_need_block() +{ + if (!rb_block_given_p()) { + rb_vm_localjump_error("no block given", Qnil, 0); + } +} + +VALUE +rb_rescue2(VALUE (* b_proc) (ANYARGS), VALUE data1, + VALUE (* r_proc) (ANYARGS), VALUE data2, ...) +{ + int state; + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = th->cfp; + volatile VALUE result; + volatile VALUE e_info = th->errinfo; + va_list args; + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + retry_entry: + result = (*b_proc) (data1); + } + else { + th->cfp = cfp; /* restore */ + + if (state == TAG_RAISE) { + int handle = Qfalse; + VALUE eclass; + + va_init_list(args, data2); + while ((eclass = va_arg(args, VALUE)) != 0) { + if (rb_obj_is_kind_of(th->errinfo, eclass)) { + handle = Qtrue; + break; + } + } + va_end(args); + + if (handle) { + if (r_proc) { + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + result = (*r_proc) (data2, th->errinfo); + } + POP_TAG(); + if (state == TAG_RETRY) { + state = 0; + th->errinfo = Qnil; + goto retry_entry; + } + } + else { + result = Qnil; + state = 0; + } + if (state == 0) { + th->errinfo = e_info; + } + } + } + } + POP_TAG(); + if (state) + JUMP_TAG(state); + + return result; +} + +VALUE +rb_rescue(VALUE (* b_proc)(ANYARGS), VALUE data1, + VALUE (* r_proc)(ANYARGS), VALUE data2) +{ + return rb_rescue2(b_proc, data1, r_proc, data2, rb_eStandardError, + (VALUE)0); +} + +VALUE +rb_protect(VALUE (* proc) (VALUE), VALUE data, int * state) +{ + volatile VALUE result = Qnil; + int status; + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = th->cfp; + struct rb_vm_trap_tag trap_tag; + rb_jmpbuf_t org_jmpbuf; + + trap_tag.prev = th->trap_tag; + + PUSH_TAG(); + th->trap_tag = &trap_tag; + MEMCPY(&org_jmpbuf, &(th)->root_jmpbuf, rb_jmpbuf_t, 1); + if ((status = EXEC_TAG()) == 0) { + SAVE_ROOT_JMPBUF(th, result = (*proc) (data)); + } + MEMCPY(&(th)->root_jmpbuf, &org_jmpbuf, rb_jmpbuf_t, 1); + th->trap_tag = trap_tag.prev; + POP_TAG(); + + if (state) { + *state = status; + } + if (status != 0) { + th->cfp = cfp; + return Qnil; + } + + return result; +} + +VALUE +rb_ensure(VALUE (*b_proc)(ANYARGS), VALUE data1, VALUE (*e_proc)(ANYARGS), VALUE data2) +{ + int state; + volatile VALUE result = Qnil; + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + result = (*b_proc) (data1); + } + POP_TAG(); + /* TODO: fix me */ + /* retval = prot_tag ? prot_tag->retval : Qnil; */ /* save retval */ + (*e_proc) (data2); + if (state) + JUMP_TAG(state); + return result; +} + +static ID +frame_func_id(rb_control_frame_t *cfp) +{ + rb_iseq_t *iseq = cfp->iseq; + if (!iseq) { + return cfp->method_id; + } + while (iseq) { + if (RUBY_VM_IFUNC_P(iseq)) { + return rb_intern(""); + } + if (iseq->defined_method_id) { + return iseq->defined_method_id; + } + if (iseq->local_iseq == iseq) { + break; + } + iseq = iseq->parent_iseq; + } + return 0; +} + +ID +rb_frame_this_func(void) +{ + return frame_func_id(GET_THREAD()->cfp); +} + +ID +rb_frame_callee(void) +{ + return frame_func_id(GET_THREAD()->cfp); +} + +static ID +rb_frame_caller(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); + /* check if prev_cfp can be accessible */ + if ((void *)(th->stack + th->stack_size) == (void *)(prev_cfp)) { + return 0; + } + return frame_func_id(prev_cfp); +} + +void +rb_frame_pop(void) +{ + rb_thread_t *th = GET_THREAD(); + th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); +} + +/* + * call-seq: + * append_features(mod) => mod + * + * When this module is included in another, Ruby calls + * append_features in this module, passing it the + * receiving module in _mod_. Ruby's default implementation is + * to add the constants, methods, and module variables of this module + * to _mod_ if this module has not already been added to + * _mod_ or one of its ancestors. See also Module#include. + */ + +static VALUE +rb_mod_append_features(VALUE module, VALUE include) +{ + switch (TYPE(include)) { + case T_CLASS: + case T_MODULE: + break; + default: + Check_Type(include, T_CLASS); + break; + } + rb_include_module(include, module); + + return module; +} + +/* + * call-seq: + * include(module, ...) => self + * + * Invokes Module.append_features on each parameter in turn. + */ + +static VALUE +rb_mod_include(int argc, VALUE *argv, VALUE module) +{ + int i; + + for (i = 0; i < argc; i++) + Check_Type(argv[i], T_MODULE); + while (argc--) { + rb_funcall(argv[argc], rb_intern("append_features"), 1, module); + rb_funcall(argv[argc], rb_intern("included"), 1, module); + } + return module; +} + +void +rb_obj_call_init(VALUE obj, int argc, VALUE *argv) +{ + PASS_PASSED_BLOCK(); + rb_funcall2(obj, idInitialize, argc, argv); +} + +void +rb_extend_object(VALUE obj, VALUE module) +{ + rb_include_module(rb_singleton_class(obj), module); +} + +/* + * call-seq: + * extend_object(obj) => obj + * + * Extends the specified object by adding this module's constants and + * methods (which are added as singleton methods). This is the callback + * method used by Object#extend. + * + * module Picky + * def Picky.extend_object(o) + * if String === o + * puts "Can't add Picky to a String" + * else + * puts "Picky added to #{o.class}" + * super + * end + * end + * end + * (s = Array.new).extend Picky # Call Object.extend + * (s = "quick brown fox").extend Picky + * + * produces: + * + * Picky added to Array + * Can't add Picky to a String + */ + +static VALUE +rb_mod_extend_object(VALUE mod, VALUE obj) +{ + rb_extend_object(obj, mod); + return obj; +} + +/* + * call-seq: + * obj.extend(module, ...) => obj + * + * Adds to _obj_ the instance methods from each module given as a + * parameter. + * + * module Mod + * def hello + * "Hello from Mod.\n" + * end + * end + * + * class Klass + * def hello + * "Hello from Klass.\n" + * end + * end + * + * k = Klass.new + * k.hello #=> "Hello from Klass.\n" + * k.extend(Mod) #=> # + * k.hello #=> "Hello from Mod.\n" + */ + +static VALUE +rb_obj_extend(int argc, VALUE *argv, VALUE obj) +{ + int i; + + if (argc == 0) { + rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)"); + } + for (i = 0; i < argc; i++) + Check_Type(argv[i], T_MODULE); + while (argc--) { + rb_funcall(argv[argc], rb_intern("extend_object"), 1, obj); + rb_funcall(argv[argc], rb_intern("extended"), 1, obj); + } + return obj; +} + +/* + * call-seq: + * include(module, ...) => self + * + * Invokes Module.append_features + * on each parameter in turn. Effectively adds the methods and constants + * in each module to the receiver. + */ + +static VALUE +top_include(int argc, VALUE *argv, VALUE self) +{ + rb_thread_t *th = GET_THREAD(); + + rb_secure(4); + if (th->top_wrapper) { + rb_warning + ("main#include in the wrapped load is effective only in wrapper module"); + return rb_mod_include(argc, argv, th->top_wrapper); + } + return rb_mod_include(argc, argv, rb_cObject); +} + +VALUE rb_f_trace_var(); +VALUE rb_f_untrace_var(); + +static VALUE * +errinfo_place(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = th->cfp; + rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th); + + while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp)) { + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + if (cfp->iseq->type == ISEQ_TYPE_RESCUE) { + return &cfp->dfp[-2]; + } + else if (cfp->iseq->type == ISEQ_TYPE_ENSURE && + TYPE(cfp->dfp[-2]) != T_NODE && + !FIXNUM_P(cfp->dfp[-2])) { + return &cfp->dfp[-2]; + } + } + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + return 0; +} + +static VALUE +get_errinfo(void) +{ + VALUE *ptr = errinfo_place(); + if (ptr) { + return *ptr; + } + else { + rb_thread_t *th = GET_THREAD(); + return th->errinfo; + } +} + +static VALUE +errinfo_getter(ID id) +{ + return get_errinfo(); +} + +#if 0 +static void +errinfo_setter(VALUE val, ID id, VALUE *var) +{ + if (!NIL_P(val) && !rb_obj_is_kind_of(val, rb_eException)) { + rb_raise(rb_eTypeError, "assigning non-exception to $!"); + } + else { + VALUE *ptr = errinfo_place(); + if (ptr) { + *ptr = val; + } + else { + rb_raise(rb_eRuntimeError, "errinfo_setter: not in rescue clause."); + } + } +} +#endif + +VALUE +rb_errinfo(void) +{ + rb_thread_t *th = GET_THREAD(); + return th->errinfo; +} + +void +rb_set_errinfo(VALUE err) +{ + if (!NIL_P(err) && !rb_obj_is_kind_of(err, rb_eException)) { + rb_raise(rb_eTypeError, "assigning non-exception to $!"); + } + GET_THREAD()->errinfo = err; +} + +VALUE +rb_rubylevel_errinfo(void) +{ + return get_errinfo(); +} + +static VALUE +errat_getter(ID id) +{ + VALUE err = get_errinfo(); + if (!NIL_P(err)) { + return get_backtrace(err); + } + else { + return Qnil; + } +} + +static void +errat_setter(VALUE val, ID id, VALUE *var) +{ + VALUE err = get_errinfo(); + if (NIL_P(err)) { + rb_raise(rb_eArgError, "$! not set"); + } + set_backtrace(err, val); +} + +/* + * call-seq: + * __method__ => symbol + * __callee__ => symbol + * + * Returns the name of the current method as a Symbol. + * If called outside of a method, it returns nil. + * + */ + +static VALUE +rb_f_method_name(void) +{ + ID fname = rb_frame_caller(); /* need *caller* ID */ + + if (fname) { + return ID2SYM(fname); + } + else { + return Qnil; + } +} + +void +Init_eval(void) +{ + rb_define_virtual_variable("$@", errat_getter, errat_setter); + rb_define_virtual_variable("$!", errinfo_getter, 0); + + rb_define_global_function("raise", rb_f_raise, -1); + rb_define_global_function("fail", rb_f_raise, -1); + + rb_define_global_function("global_variables", rb_f_global_variables, 0); /* in variable.c */ + + rb_define_global_function("__method__", rb_f_method_name, 0); + rb_define_global_function("__callee__", rb_f_method_name, 0); + + rb_define_private_method(rb_cModule, "append_features", rb_mod_append_features, 1); + rb_define_private_method(rb_cModule, "extend_object", rb_mod_extend_object, 1); + rb_define_private_method(rb_cModule, "include", rb_mod_include, -1); + + rb_undef_method(rb_cClass, "module_function"); + + { + extern void Init_vm_eval(void); + extern void Init_eval_method(void); + Init_vm_eval(); + Init_eval_method(); + } + + rb_define_singleton_method(rb_cModule, "nesting", rb_mod_nesting, 0); + rb_define_singleton_method(rb_cModule, "constants", rb_mod_s_constants, -1); + + rb_define_singleton_method(rb_vm_top_self(), "include", top_include, -1); + + rb_define_method(rb_mKernel, "extend", rb_obj_extend, -1); + + rb_define_global_function("trace_var", rb_f_trace_var, -1); /* in variable.c */ + rb_define_global_function("untrace_var", rb_f_untrace_var, -1); /* in variable.c */ + + exception_error = rb_exc_new3(rb_eFatal, + rb_obj_freeze(rb_str_new2("exception reentered"))); + OBJ_TAINT(exception_error); + OBJ_FREEZE(exception_error); +} diff --git a/eval_error.c b/eval_error.c new file mode 100644 index 0000000..65f1f4a --- /dev/null +++ b/eval_error.c @@ -0,0 +1,274 @@ +/* -*-c-*- */ +/* + * included by eval.c + */ + +static void +warn_printf(const char *fmt, ...) +{ + char buf[BUFSIZ]; + va_list args; + + va_init_list(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + rb_write_error(buf); +} + +#define warn_print(x) rb_write_error(x) +#define warn_print2(x,l) rb_write_error2(x,l) + +static void +error_pos(void) +{ + const char *sourcefile = rb_sourcefile(); + int sourceline = rb_sourceline(); + + if (sourcefile) { + if (sourceline == 0) { + warn_printf("%s", sourcefile); + } + else if (rb_frame_callee()) { + warn_printf("%s:%d:in `%s'", sourcefile, sourceline, + rb_id2name(rb_frame_callee())); + } + else { + warn_printf("%s:%d", sourcefile, sourceline); + } + } +} + +VALUE rb_check_backtrace(VALUE); + +static VALUE +get_backtrace(VALUE info) +{ + if (NIL_P(info)) + return Qnil; + info = rb_funcall(info, rb_intern("backtrace"), 0); + if (NIL_P(info)) + return Qnil; + return rb_check_backtrace(info); +} + +VALUE +rb_get_backtrace(VALUE info) +{ + return get_backtrace(info); +} + +static void +set_backtrace(VALUE info, VALUE bt) +{ + rb_funcall(info, rb_intern("set_backtrace"), 1, bt); +} + +static void +error_print(void) +{ + VALUE errat = Qnil; /* OK */ + VALUE errinfo = GET_THREAD()->errinfo; + volatile VALUE eclass, e; + const char *einfo; + long elen; + + if (NIL_P(errinfo)) + return; + + PUSH_TAG(); + if (EXEC_TAG() == 0) { + errat = get_backtrace(errinfo); + } + else { + errat = Qnil; + } + if (EXEC_TAG()) + goto error; + if (NIL_P(errat)) { + const char *file = rb_sourcefile(); + int line = rb_sourceline(); + if (!file) + warn_printf("%d", line); + else if (!line) + warn_printf("%s", file); + else + warn_printf("%s:%d", file, line); + } + else if (RARRAY_LEN(errat) == 0) { + error_pos(); + } + else { + VALUE mesg = RARRAY_PTR(errat)[0]; + + if (NIL_P(mesg)) + error_pos(); + else { + warn_print2(RSTRING_PTR(mesg), RSTRING_LEN(mesg)); + } + } + + eclass = CLASS_OF(errinfo); + if (EXEC_TAG() == 0) { + e = rb_funcall(errinfo, rb_intern("message"), 0, 0); + StringValue(e); + einfo = RSTRING_PTR(e); + elen = RSTRING_LEN(e); + } + else { + einfo = ""; + elen = 0; + } + if (EXEC_TAG()) + goto error; + if (eclass == rb_eRuntimeError && elen == 0) { + warn_print(": unhandled exception\n"); + } + else { + VALUE epath; + + epath = rb_class_name(eclass); + if (elen == 0) { + warn_print(": "); + warn_print2(RSTRING_PTR(epath), RSTRING_LEN(epath)); + warn_print("\n"); + } + else { + char *tail = 0; + long len = elen; + + if (RSTRING_PTR(epath)[0] == '#') + epath = 0; + if ((tail = memchr(einfo, '\n', elen)) != 0) { + len = tail - einfo; + tail++; /* skip newline */ + } + warn_print(": "); + warn_print2(einfo, len); + if (epath) { + warn_print(" ("); + warn_print2(RSTRING_PTR(epath), RSTRING_LEN(epath)); + warn_print(")\n"); + } + if (tail) { + warn_print2(tail, elen - len - 1); + if (einfo[elen-1] != '\n') warn_print2("\n", 1); + } + } + } + + if (!NIL_P(errat)) { + long i; + long len = RARRAY_LEN(errat); + VALUE *ptr = RARRAY_PTR(errat); + int skip = eclass == rb_eSysStackError; + +#define TRACE_MAX (TRACE_HEAD+TRACE_TAIL+5) +#define TRACE_HEAD 8 +#define TRACE_TAIL 5 + + for (i = 1; i < len; i++) { + if (TYPE(ptr[i]) == T_STRING) { + warn_printf("\tfrom %s\n", RSTRING_PTR(ptr[i])); + } + if (skip && i == TRACE_HEAD && len > TRACE_MAX) { + warn_printf("\t ... %ld levels...\n", + len - TRACE_HEAD - TRACE_TAIL); + i = len - TRACE_TAIL; + } + } + } + error: + POP_TAG(); +} + +void +ruby_error_print(void) +{ + error_print(); +} + +void +rb_print_undef(VALUE klass, ID id, int scope) +{ + const char *v; + + switch (scope) { + default: + case NOEX_PUBLIC: v = ""; break; + case NOEX_PRIVATE: v = " private"; break; + case NOEX_PROTECTED: v = " protected"; break; + } + rb_name_error(id, "undefined%s method `%s' for %s `%s'", v, + rb_id2name(id), + (TYPE(klass) == T_MODULE) ? "module" : "class", + rb_class2name(klass)); +} + +static int +sysexit_status(VALUE err) +{ + VALUE st = rb_iv_get(err, "status"); + return NUM2INT(st); +} + +static int +error_handle(int ex) +{ + int status = EXIT_FAILURE; + rb_thread_t *th = GET_THREAD(); + + if (rb_thread_set_raised(th)) + return EXIT_FAILURE; + switch (ex & TAG_MASK) { + case 0: + status = EXIT_SUCCESS; + break; + + case TAG_RETURN: + error_pos(); + warn_print(": unexpected return\n"); + break; + case TAG_NEXT: + error_pos(); + warn_print(": unexpected next\n"); + break; + case TAG_BREAK: + error_pos(); + warn_print(": unexpected break\n"); + break; + case TAG_REDO: + error_pos(); + warn_print(": unexpected redo\n"); + break; + case TAG_RETRY: + error_pos(); + warn_print(": retry outside of rescue clause\n"); + break; + case TAG_THROW: + /* TODO: fix me */ + error_pos(); + warn_printf(": unexpected throw\n"); + break; + case TAG_RAISE: { + VALUE errinfo = GET_THREAD()->errinfo; + if (rb_obj_is_kind_of(errinfo, rb_eSystemExit)) { + status = sysexit_status(errinfo); + } + else if (rb_obj_is_instance_of(errinfo, rb_eSignal)) { + /* no message when exiting by signal */ + } + else { + error_print(); + } + break; + } + case TAG_FATAL: + error_print(); + break; + default: + rb_bug("Unknown longjmp status %d", ex); + break; + } + rb_thread_reset_raised(th); + return status; +} diff --git a/eval_intern.h b/eval_intern.h new file mode 100644 index 0000000..7d0cf32 --- /dev/null +++ b/eval_intern.h @@ -0,0 +1,215 @@ +#ifndef RUBY_EVAL_INTERN_H +#define RUBY_EVAL_INTERN_H + +#include "ruby/ruby.h" +#include "vm_core.h" + +#define PASS_PASSED_BLOCK_TH(th) do { \ + (th)->passed_block = GC_GUARDED_PTR_REF((rb_block_t *)(th)->cfp->lfp[0]); \ + (th)->cfp->flag |= VM_FRAME_FLAG_PASSED; \ +} while (0) + +#define PASS_PASSED_BLOCK() do { \ + rb_thread_t * const __th__ = GET_THREAD(); \ + PASS_PASSED_BLOCK_TH(__th__); \ +} while (0) + +#ifdef HAVE_STDLIB_H +#include +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +#include +#include + +#ifdef __APPLE__ +#include +#endif + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +# ifndef atarist +# ifndef alloca +# define alloca __builtin_alloca +# endif +# endif /* atarist */ +#else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX +#pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca(); +# endif +# endif /* AIX */ +# endif /* HAVE_ALLOCA_H */ +#endif /* __GNUC__ */ + +#ifndef HAVE_STRING_H +char *strrchr(const char *, const char); +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_NET_SOCKET_H +#include +#endif + +#define ruby_setjmp(env) RUBY_SETJMP(env) +#define ruby_longjmp(env,val) RUBY_LONGJMP(env,val) +#ifdef __CYGWIN__ +# ifndef _setjmp +int _setjmp(jmp_buf); +# endif +# ifndef _longjmp +NORETURN(void _longjmp(jmp_buf, int)); +# endif +#endif + +#include +#include +#include + +#ifdef HAVE_SYS_SELECT_H +#include +#endif + +/* + Solaris sys/select.h switches select to select_large_fdset to support larger + file descriptors if FD_SETSIZE is larger than 1024 on 32bit environment. + But Ruby doesn't change FD_SETSIZE because fd_set is allocated dynamically. + So following definition is required to use select_large_fdset. +*/ +#ifdef HAVE_SELECT_LARGE_FDSET +#define select(n, r, w, e, t) select_large_fdset(n, r, w, e, t) +#endif + +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +#include + +#define SAVE_ROOT_JMPBUF(th, stmt) do \ + if (ruby_setjmp((th)->root_jmpbuf) == 0) { \ + stmt; \ + } \ + else { \ + rb_fiber_start(); \ + } while (0) + +#define TH_PUSH_TAG(th) do { \ + rb_thread_t * const _th = th; \ + struct rb_vm_tag _tag; \ + _tag.tag = 0; \ + _tag.prev = _th->tag; \ + _th->tag = &_tag; + +#define TH_POP_TAG() \ + _th->tag = _tag.prev; \ +} while (0) + +#define TH_POP_TAG2() \ + _th->tag = _tag.prev + +#define PUSH_TAG() TH_PUSH_TAG(GET_THREAD()) +#define POP_TAG() TH_POP_TAG() + +#define TH_EXEC_TAG() ruby_setjmp(_th->tag->buf) + +#define EXEC_TAG() \ + TH_EXEC_TAG() + +#define TH_JUMP_TAG(th, st) do { \ + ruby_longjmp(th->tag->buf,(st)); \ +} while (0) + +#define JUMP_TAG(st) TH_JUMP_TAG(GET_THREAD(), st) + +enum ruby_tag_type { + RUBY_TAG_RETURN = 0x1, + RUBY_TAG_BREAK = 0x2, + RUBY_TAG_NEXT = 0x3, + RUBY_TAG_RETRY = 0x4, + RUBY_TAG_REDO = 0x5, + RUBY_TAG_RAISE = 0x6, + RUBY_TAG_THROW = 0x7, + RUBY_TAG_FATAL = 0x8, + RUBY_TAG_MASK = 0xf +}; +#define TAG_RETURN RUBY_TAG_RETURN +#define TAG_BREAK RUBY_TAG_BREAK +#define TAG_NEXT RUBY_TAG_NEXT +#define TAG_RETRY RUBY_TAG_RETRY +#define TAG_REDO RUBY_TAG_REDO +#define TAG_RAISE RUBY_TAG_RAISE +#define TAG_THROW RUBY_TAG_THROW +#define TAG_FATAL RUBY_TAG_FATAL +#define TAG_MASK RUBY_TAG_MASK + +#define NEW_THROW_OBJECT(val, pt, st) \ + ((VALUE)NEW_NODE(NODE_LIT, (val), (pt), (st))) +#define SET_THROWOBJ_CATCH_POINT(obj, val) \ + (RNODE((obj))->u2.value = (val)) +#define SET_THROWOBJ_STATE(obj, val) \ + (RNODE((obj))->u3.value = (val)) + +#define GET_THROWOBJ_VAL(obj) ((VALUE)RNODE((obj))->u1.value) +#define GET_THROWOBJ_CATCH_POINT(obj) ((VALUE*)RNODE((obj))->u2.value) +#define GET_THROWOBJ_STATE(obj) ((int)RNODE((obj))->u3.value) + +#define SCOPE_TEST(f) (rb_vm_cref()->nd_visi & (f)) +#define SCOPE_CHECK(f) (rb_vm_cref()->nd_visi == (f)) +#define SCOPE_SET(f) (rb_vm_cref()->nd_visi = (f)) + +#define CHECK_STACK_OVERFLOW(cfp, margin) do \ + if (((VALUE *)(cfp)->sp) + (margin) + sizeof(rb_control_frame_t) >= ((VALUE *)cfp)) { \ + rb_exc_raise(sysstack_error); \ + } \ +while (0) + +void rb_thread_cleanup(void); +void rb_thread_wait_other_threads(void); + +enum { + RAISED_EXCEPTION = 1, + RAISED_STACKOVERFLOW = 2, + RAISED_NOMEMORY = 4 +}; +int rb_thread_set_raised(rb_thread_t *th); +int rb_thread_reset_raised(rb_thread_t *th); +#define rb_thread_raised_set(th, f) ((th)->raised_flag |= (f)) +#define rb_thread_raised_reset(th, f) ((th)->raised_flag &= ~(f)) +#define rb_thread_raised_p(th, f) (((th)->raised_flag & (f)) != 0) +#define rb_thread_raised_clear(th) ((th)->raised_flag = 0) + +VALUE rb_f_eval(int argc, VALUE *argv, VALUE self); +VALUE rb_make_exception(int argc, VALUE *argv); + +NORETURN(void rb_fiber_start(void)); + +NORETURN(void rb_print_undef(VALUE, ID, int)); +NORETURN(void rb_vm_localjump_error(const char *,VALUE, int)); +NORETURN(void rb_vm_jump_tag_but_local_jump(int, VALUE)); +NORETURN(void rb_raise_method_missing(rb_thread_t *th, int argc, VALUE *argv, + VALUE obj, int call_status)); + +VALUE rb_vm_make_jump_tag_but_local_jump(int state, VALUE val); +NODE *rb_vm_cref(void); +VALUE rb_obj_is_proc(VALUE); +VALUE rb_vm_call_cfunc(VALUE recv, VALUE (*func)(VALUE), VALUE arg, const rb_block_t *blockptr, VALUE filename); +void rb_thread_terminate_all(void); +VALUE rb_vm_top_self(); +VALUE rb_vm_cbase(void); +void rb_trap_restore_mask(void); + +#endif /* RUBY_EVAL_INTERN_H */ diff --git a/eval_jump.c b/eval_jump.c new file mode 100644 index 0000000..f474844 --- /dev/null +++ b/eval_jump.c @@ -0,0 +1,151 @@ +/* -*-c-*- */ +/* + * from eval.c + */ + +#include "eval_intern.h" + +/* exit */ + +void +rb_call_end_proc(VALUE data) +{ + rb_proc_call(data, rb_ary_new()); +} + +/* + * call-seq: + * at_exit { block } -> proc + * + * Converts _block_ to a +Proc+ object (and therefore + * binds it at the point of call) and registers it for execution when + * the program exits. If multiple handlers are registered, they are + * executed in reverse order of registration. + * + * def do_at_exit(str1) + * at_exit { print str1 } + * end + * at_exit { puts "cruel world" } + * do_at_exit("goodbye ") + * exit + * + * produces: + * + * goodbye cruel world + */ + +static VALUE +rb_f_at_exit(void) +{ + VALUE proc; + + if (!rb_block_given_p()) { + rb_raise(rb_eArgError, "called without a block"); + } + proc = rb_block_proc(); + rb_set_end_proc(rb_call_end_proc, proc); + return proc; +} + +struct end_proc_data { + void (*func) (); + VALUE data; + int safe; + struct end_proc_data *next; +}; + +static struct end_proc_data *end_procs, *ephemeral_end_procs, *tmp_end_procs; + +void +rb_set_end_proc(void (*func)(VALUE), VALUE data) +{ + struct end_proc_data *link = ALLOC(struct end_proc_data); + struct end_proc_data **list; + rb_thread_t *th = GET_THREAD(); + + if (th->top_wrapper) { + list = &ephemeral_end_procs; + } + else { + list = &end_procs; + } + link->next = *list; + link->func = func; + link->data = data; + link->safe = rb_safe_level(); + *list = link; +} + +void +rb_mark_end_proc(void) +{ + struct end_proc_data *link; + + link = end_procs; + while (link) { + rb_gc_mark(link->data); + link = link->next; + } + link = ephemeral_end_procs; + while (link) { + rb_gc_mark(link->data); + link = link->next; + } + link = tmp_end_procs; + while (link) { + rb_gc_mark(link->data); + link = link->next; + } +} + +void +rb_exec_end_proc(void) +{ + struct end_proc_data *link, *tmp; + int status; + volatile int safe = rb_safe_level(); + + while (ephemeral_end_procs) { + tmp_end_procs = link = ephemeral_end_procs; + ephemeral_end_procs = 0; + while (link) { + PUSH_TAG(); + if ((status = EXEC_TAG()) == 0) { + rb_set_safe_level_force(link->safe); + (*link->func) (link->data); + } + POP_TAG(); + if (status) { + error_handle(status); + } + tmp = link; + tmp_end_procs = link = link->next; + xfree(tmp); + } + } + while (end_procs) { + tmp_end_procs = link = end_procs; + end_procs = 0; + while (link) { + PUSH_TAG(); + if ((status = EXEC_TAG()) == 0) { + rb_set_safe_level_force(link->safe); + (*link->func) (link->data); + } + POP_TAG(); + if (status) { + error_handle(status); + } + tmp = link; + tmp_end_procs = link = link->next; + xfree(tmp); + } + } + rb_set_safe_level_force(safe); +} + +void +Init_jump(void) +{ + rb_define_global_function("at_exit", rb_f_at_exit, 0); +} diff --git a/ext/Setup b/ext/Setup new file mode 100644 index 0000000..b0f29b9 --- /dev/null +++ b/ext/Setup @@ -0,0 +1,32 @@ +#option nodynamic + +#Win32API +#bigdecimal +#curses +#dbm +#digest +#digest/md5 +#digest/rmd160 +#digest/sha1 +#digest/sha2 +#dl +#etc +#fcntl +#gdbm +#iconv +#io/wait +#nkf +#openssl +#pty +#racc/cparse +#readline +#ripper +#sdbm +#socket +#stringio +#strscan +#syck +#syslog +#tk +#win32ole +#zlib diff --git a/ext/Setup.atheos b/ext/Setup.atheos new file mode 100644 index 0000000..6e0b8ae --- /dev/null +++ b/ext/Setup.atheos @@ -0,0 +1,33 @@ +option nodynamic + +#Win32API +bigdecimal +curses +dbm +digest +digest/md5 +digest/rmd160 +digest/sha1 +digest/sha2 +dl +enumerator +etc +fcntl +gdbm +iconv +io/wait +nkf +#openssl +pty +racc/parse +readline +ripper +sdbm +socket +stringio +strscan +syck +syslog +#tk +#win32ole +zlib diff --git a/ext/Setup.emx b/ext/Setup.emx new file mode 100644 index 0000000..fade917 --- /dev/null +++ b/ext/Setup.emx @@ -0,0 +1,33 @@ +option nodynamic + +#Win32API +bigdecimal +curses +#dbm +digest +digest/md5 +digest/rmd160 +digest/sha1 +digest/sha2 +#dl +enumerator +etc +fcntl +#gdbm +#iconv +#io/wait +nkf +#openssl +#pty +racc/cparse +#readline +#ripper +#sdbm +socket +stringio +strscan +#syck +#syslog +#tk +#win32ole +#zlib diff --git a/ext/Setup.nt b/ext/Setup.nt new file mode 100644 index 0000000..c8574ba --- /dev/null +++ b/ext/Setup.nt @@ -0,0 +1,33 @@ +#option nodynamic + +Win32API +bigdecimal +#curses +#dbm +digest +digest/md5 +digest/rmd160 +digest/sha1 +digest/sha2 +dl +enumerator +etc +fcntl +#gdbm +#iconv +#io/wait +nkf +#openssl +#pty +racc/cparse +#readline +#ripper +sdbm +socket +stringio +strscan +syck +#syslog +#tk +win32ole +#zlib diff --git a/ext/continuation/continuation.c b/ext/continuation/continuation.c new file mode 100644 index 0000000..ae69b66 --- /dev/null +++ b/ext/continuation/continuation.c @@ -0,0 +1,8 @@ + +void ruby_Init_Continuation_body(void); + +void +Init_continuation(void) +{ + ruby_Init_Continuation_body(); +} diff --git a/ext/continuation/extconf.rb b/ext/continuation/extconf.rb new file mode 100644 index 0000000..17e2d05 --- /dev/null +++ b/ext/continuation/extconf.rb @@ -0,0 +1,3 @@ +require 'mkmf' +create_makefile('continuation') + diff --git a/ext/coverage/coverage.c b/ext/coverage/coverage.c new file mode 100644 index 0000000..145be18 --- /dev/null +++ b/ext/coverage/coverage.c @@ -0,0 +1,101 @@ +/************************************************ + + coverage.c - + + $Author: $ + + Copyright (c) 2008 Yusuke Endoh + +************************************************/ + +#include "ruby.h" + +extern VALUE rb_get_coverages(void); +extern void rb_set_coverages(VALUE); +extern void rb_reset_coverages(void); + +/* + * call-seq: + * Coverage.start => nil + * + * Enables coverage measurement. + */ +static VALUE +rb_coverage_start(VALUE klass) +{ + if (!RTEST(rb_get_coverages())) { + VALUE coverages = rb_hash_new(); + RBASIC(coverages)->klass = 0; + rb_set_coverages(coverages); + } + return Qnil; +} + +static int +coverage_result_i(st_data_t key, st_data_t val, st_data_t dummy) +{ + VALUE coverage = (VALUE)val; + RBASIC(coverage)->klass = rb_cArray; + rb_ary_freeze(coverage); + return ST_CONTINUE; +} + +/* + * call-seq: + * Coverage.result => hash + * + * Returns a hash that contains filename as key and coverage array as value + * and disables coverage measurement. + */ +static VALUE +rb_coverage_result(VALUE klass) +{ + VALUE coverages = rb_get_coverages(); + if (!RTEST(coverages)) { + rb_raise(rb_eRuntimeError, "coverage measurement is not enabled"); + } + RBASIC(coverages)->klass = rb_cHash; + st_foreach(RHASH_TBL(coverages), coverage_result_i, 0); + rb_hash_freeze(coverages); + rb_reset_coverages(); + return coverages; +} + +/* Coverage provides coverage measurement feature for Ruby. + * This feature is experimental, so these APIs may be changed in future. + * + * = Usage + * + * (1) require "coverage.so" + * (2) do Coverage.start + * (3) require or load Ruby source file + * (4) Coverage.result will return a hash that contains filename as key and + * coverage array as value. + * + * = Example + * + * [foo.rb] + * s = 0 + * 10.times do |x| + * s += x + * end + * + * if s == 45 + * p :ok + * else + * p :ng + * end + * [EOF] + * + * require "coverage.so" + * Coverage.start + * require "foo.rb" + * p Coverage.result #=> {"foo.rb"=>[1, 1, 10, nil, nil, 1, 1, nil, 0, nil]} + */ +void +Init_coverage(void) +{ + VALUE rb_mCoverage = rb_define_module("Coverage"); + rb_define_module_function(rb_mCoverage, "start", rb_coverage_start, 0); + rb_define_module_function(rb_mCoverage, "result", rb_coverage_result, 0); +} diff --git a/ext/coverage/extconf.rb b/ext/coverage/extconf.rb new file mode 100644 index 0000000..3ebced5 --- /dev/null +++ b/ext/coverage/extconf.rb @@ -0,0 +1,2 @@ +require 'mkmf' +create_makefile('coverage') diff --git a/ext/curses/curses.c b/ext/curses/curses.c new file mode 100644 index 0000000..9fbe125 --- /dev/null +++ b/ext/curses/curses.c @@ -0,0 +1,2064 @@ +/* -*- C -*- + * $Id: curses.c 21399 2009-01-09 02:18:20Z yugui $ + * + * ext/curses/curses.c + * + * by MAEDA Shugo (ender@pic-internet.or.jp) + * modified by Yukihiro Matsumoto (matz@netlab.co.jp), + * Toki Yoshinori, + * Hitoshi Takahashi, + * and Takaaki Tateishi (ttate@kt.jaist.ac.jp) + * + * maintainers: + * - Takaaki Tateishi (ttate@kt.jaist.ac.jp) + */ + +#include "ruby.h" +#include "ruby/io.h" + +#if defined(HAVE_NCURSES_H) +# include +#elif defined(HAVE_NCURSES_CURSES_H) +# include +#elif defined(HAVE_CURSES_COLR_CURSES_H) +# ifdef HAVE_STDARG_PROTOTYPES +# include +# else +# include +# endif +# include +#else +# include +# if defined(__bsdi__) || defined(__NetBSD__) || defined(__APPLE__) +# if !defined(_maxx) +# define _maxx maxx +# endif +# if !defined(_maxy) +# define _maxy maxy +# endif +# if !defined(_begx) +# define _begx begx +# endif +# if !defined(_begy) +# define _begy begy +# endif +# endif +#endif + +#ifdef HAVE_INIT_COLOR +# define USE_COLOR 1 +#endif + +/* supports only ncurses mouse routines */ +#ifdef NCURSES_MOUSE_VERSION +# define USE_MOUSE 1 +#endif + +#define NUM2CH NUM2CHR +#define CH2FIX CHR2FIX + +static VALUE mCurses; +static VALUE mKey; +static VALUE cWindow; +#ifdef USE_MOUSE +static VALUE cMouseEvent; +#endif + +static VALUE rb_stdscr; + +struct windata { + WINDOW *window; +}; + +#define CHECK(c) c + +static VALUE window_attroff(VALUE obj, VALUE attrs); +static VALUE window_attron(VALUE obj, VALUE attrs); +static VALUE window_attrset(VALUE obj, VALUE attrs); + +static void +no_window(void) +{ + rb_raise(rb_eRuntimeError, "already closed window"); +} + +#define GetWINDOW(obj, winp) do {\ + if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)\ + rb_raise(rb_eSecurityError, "Insecure: operation on untainted window");\ + Data_Get_Struct(obj, struct windata, winp);\ + if (winp->window == 0) no_window();\ +} while (0) + +static void +free_window(struct windata *winp) +{ + if (winp->window && winp->window != stdscr) delwin(winp->window); + winp->window = 0; + xfree(winp); +} + +static VALUE +prep_window(VALUE class, WINDOW *window) +{ + VALUE obj; + struct windata *winp; + + if (window == NULL) { + rb_raise(rb_eRuntimeError, "failed to create window"); + } + + obj = rb_obj_alloc(class); + Data_Get_Struct(obj, struct windata, winp); + winp->window = window; + + return obj; +} + +/*-------------------------- module Curses --------------------------*/ + +/* def init_screen */ +static VALUE +curses_init_screen(void) +{ + rb_secure(4); + if (rb_stdscr) return rb_stdscr; + initscr(); + if (stdscr == 0) { + rb_raise(rb_eRuntimeError, "can't initialize curses"); + } + clear(); + rb_stdscr = prep_window(cWindow, stdscr); + return rb_stdscr; +} + +/* def stdscr */ +#define curses_stdscr curses_init_screen + +/* def close_screen */ +static VALUE +curses_close_screen(void) +{ +#ifdef HAVE_ISENDWIN + if (!isendwin()) +#endif + endwin(); + rb_stdscr = 0; + return Qnil; +} + +static void +curses_finalize(VALUE dummy) +{ + if (stdscr +#ifdef HAVE_ISENDWIN + && !isendwin() +#endif + ) + endwin(); + rb_stdscr = 0; + rb_gc_unregister_address(&rb_stdscr); +} + +/* def closed? */ +static VALUE +curses_closed(void) +{ +#ifdef HAVE_ISENDWIN + if (isendwin()) { + return Qtrue; + } + return Qfalse; +#else + rb_notimplement(); +#endif +} + +/* def clear */ +static VALUE +curses_clear(VALUE obj) +{ + curses_stdscr(); + wclear(stdscr); + return Qnil; +} + +/* def clrtoeol */ +static VALUE +curses_clrtoeol(void) +{ + curses_stdscr(); + clrtoeol(); + return Qnil; +} + +/* def refresh */ +static VALUE +curses_refresh(VALUE obj) +{ + curses_stdscr(); + refresh(); + return Qnil; +} + +/* def doupdate */ +static VALUE +curses_doupdate(VALUE obj) +{ + curses_stdscr(); +#ifdef HAVE_DOUPDATE + doupdate(); +#else + refresh(); +#endif + return Qnil; +} + +/* def echo */ +static VALUE +curses_echo(VALUE obj) +{ + curses_stdscr(); + echo(); + return Qnil; +} + +/* def noecho */ +static VALUE +curses_noecho(VALUE obj) +{ + curses_stdscr(); + noecho(); + return Qnil; +} + +/* def raw */ +static VALUE +curses_raw(VALUE obj) +{ + curses_stdscr(); + raw(); + return Qnil; +} + +/* def noraw */ +static VALUE +curses_noraw(VALUE obj) +{ + curses_stdscr(); + noraw(); + return Qnil; +} + +/* def cbreak */ +static VALUE +curses_cbreak(VALUE obj) +{ + curses_stdscr(); + cbreak(); + return Qnil; +} + +/* def nocbreak */ +static VALUE +curses_nocbreak(VALUE obj) +{ + curses_stdscr(); + nocbreak(); + return Qnil; +} + +/* def nl */ +static VALUE +curses_nl(VALUE obj) +{ + curses_stdscr(); + nl(); + return Qnil; +} + +/* def nonl */ +static VALUE +curses_nonl(VALUE obj) +{ + curses_stdscr(); + nonl(); + return Qnil; +} + +/* def beep */ +static VALUE +curses_beep(VALUE obj) +{ +#ifdef HAVE_BEEP + curses_stdscr(); + beep(); +#endif + return Qnil; +} + +/* def flash */ +static VALUE +curses_flash(VALUE obj) +{ +#ifdef HAVE_FLASH + curses_stdscr(); + flash(); +#endif + return Qnil; +} + +static int +curses_char(VALUE c) +{ + if (FIXNUM_P(c)) { + return NUM2INT(c); + } + else { + int cc; + + StringValue(c); + if (RSTRING_LEN(c) == 0 || RSTRING_LEN(c) > 1) { + rb_raise(rb_eArgError, "string not corresponding a character"); + } + cc = RSTRING_PTR(c)[0]; + if (cc > 0x7f) { + rb_raise(rb_eArgError, "no multibyte string supported (yet)"); + } + return cc; + } +} + +/* def ungetch */ +static VALUE +curses_ungetch(VALUE obj, VALUE ch) +{ +#ifdef HAVE_UNGETCH + int c = curses_char(ch); + curses_stdscr(); + ungetch(c); +#else + rb_notimplement(); +#endif + return Qnil; +} + +/* def setpos(y, x) */ +static VALUE +curses_setpos(VALUE obj, VALUE y, VALUE x) +{ + curses_stdscr(); + move(NUM2INT(y), NUM2INT(x)); + return Qnil; +} + +/* def standout */ +static VALUE +curses_standout(VALUE obj) +{ + standout(); + return Qnil; +} + +/* def standend */ +static VALUE +curses_standend(VALUE obj) +{ + standend(); + return Qnil; +} + +/* def inch */ +static VALUE +curses_inch(VALUE obj) +{ + curses_stdscr(); + return CH2FIX(inch()); +} + +/* def addch(ch) */ +static VALUE +curses_addch(VALUE obj, VALUE ch) +{ + curses_stdscr(); + addch(NUM2CH(ch)); + return Qnil; +} + +/* def insch(ch) */ +static VALUE +curses_insch(VALUE obj, VALUE ch) +{ + curses_stdscr(); + insch(NUM2CH(ch)); + return Qnil; +} + +/* def addstr(str) */ +static VALUE +curses_addstr(VALUE obj, VALUE str) +{ + StringValue(str); + str = rb_str_export_locale(str); + curses_stdscr(); + if (!NIL_P(str)) { + addstr(StringValueCStr(str)); + } + return Qnil; +} + +/* def getch */ +static VALUE +curses_getch(VALUE obj) +{ + int c; + + rb_read_check(stdin); + curses_stdscr(); + c = getch(); + if (c == EOF) return Qnil; + if (rb_isprint(c)) { + char ch = (char)c; + + return rb_locale_str_new(&ch, 1); + } + return UINT2NUM(c); +} + +/* def getstr */ +static VALUE +curses_getstr(VALUE obj) +{ + char rtn[1024]; /* This should be big enough.. I hope */ + + rb_read_check(stdin); +#if defined(HAVE_GETNSTR) + getnstr(rtn,1023); +#else + getstr(rtn); +#endif + return rb_locale_str_new_cstr(rtn); +} + +/* def delch */ +static VALUE +curses_delch(VALUE obj) +{ + delch(); + return Qnil; +} + +/* def delelteln */ +static VALUE +curses_deleteln(VALUE obj) +{ +#if defined(HAVE_DELETELN) || defined(deleteln) + deleteln(); +#endif + return Qnil; +} + +/* def insertln */ +static VALUE +curses_insertln(VALUE obj) +{ +#if defined(HAVE_INSERTLN) || defined(insertln) + insertln(); +#endif + return Qnil; +} + +/* def keyname */ +static VALUE +curses_keyname(VALUE obj, VALUE c) +{ +#ifdef HAVE_KEYNAME + int cc = curses_char(c); + const char *name; + + name = keyname(cc); + if (name) { + return rb_str_new_cstr(name); + } + else { + return Qnil; + } +#else + return Qnil; +#endif +} + +static VALUE +curses_lines(void) +{ + return INT2FIX(LINES); +} + +static VALUE +curses_cols(void) +{ + return INT2FIX(COLS); +} + +static VALUE +curses_curs_set(VALUE obj, VALUE visibility) +{ +#ifdef HAVE_CURS_SET + int n; + return (n = curs_set(NUM2INT(visibility)) != ERR) ? INT2FIX(n) : Qnil; +#else + return Qnil; +#endif +} + +static VALUE +curses_scrl(VALUE obj, VALUE n) +{ + /* may have to raise exception on ERR */ +#ifdef HAVE_SCRL + return (scrl(NUM2INT(n)) == OK) ? Qtrue : Qfalse; +#else + return Qfalse; +#endif +} + +static VALUE +curses_setscrreg(VALUE obj, VALUE top, VALUE bottom) +{ + /* may have to raise exception on ERR */ +#ifdef HAVE_SETSCRREG + return (setscrreg(NUM2INT(top), NUM2INT(bottom)) == OK) ? Qtrue : Qfalse; +#else + return Qfalse; +#endif +} + +static VALUE +curses_attroff(VALUE obj, VALUE attrs) +{ + return window_attroff(rb_stdscr,attrs); + /* return INT2FIX(attroff(NUM2INT(attrs))); */ +} + +static VALUE +curses_attron(VALUE obj, VALUE attrs) +{ + return window_attron(rb_stdscr,attrs); + /* return INT2FIX(attroff(NUM2INT(attrs))); */ +} + +static VALUE +curses_attrset(VALUE obj, VALUE attrs) +{ + return window_attrset(rb_stdscr,attrs); + /* return INT2FIX(attroff(NUM2INT(attrs))); */ +} + +static VALUE +curses_bkgdset(VALUE obj, VALUE ch) +{ +#ifdef HAVE_BKGDSET + bkgdset(NUM2CH(ch)); +#endif + return Qnil; +} + +static VALUE +curses_bkgd(VALUE obj, VALUE ch) +{ +#ifdef HAVE_BKGD + return (bkgd(NUM2CH(ch)) == OK) ? Qtrue : Qfalse; +#else + return Qfalse; +#endif +} + +static VALUE +curses_resizeterm(VALUE obj, VALUE lin, VALUE col) +{ +#if defined(HAVE_RESIZETERM) + return (resizeterm(NUM2INT(lin),NUM2INT(col)) == OK) ? Qtrue : Qfalse; +#else + return Qnil; +#endif +} + +#ifdef USE_COLOR +static VALUE +curses_start_color(VALUE obj) +{ + /* may have to raise exception on ERR */ + return (start_color() == OK) ? Qtrue : Qfalse; +} + +static VALUE +curses_init_pair(VALUE obj, VALUE pair, VALUE f, VALUE b) +{ + /* may have to raise exception on ERR */ + return (init_pair(NUM2INT(pair),NUM2INT(f),NUM2INT(b)) == OK) ? Qtrue : Qfalse; +} + +static VALUE +curses_init_color(VALUE obj, VALUE color, VALUE r, VALUE g, VALUE b) +{ + /* may have to raise exception on ERR */ + return (init_color(NUM2INT(color),NUM2INT(r), + NUM2INT(g),NUM2INT(b)) == OK) ? Qtrue : Qfalse; +} + +static VALUE +curses_has_colors(VALUE obj) +{ + return has_colors() ? Qtrue : Qfalse; +} + +static VALUE +curses_can_change_color(VALUE obj) +{ + return can_change_color() ? Qtrue : Qfalse; +} + +static VALUE +curses_color_content(VALUE obj, VALUE color) +{ + short r,g,b; + + color_content(NUM2INT(color),&r,&g,&b); + return rb_ary_new3(3,INT2FIX(r),INT2FIX(g),INT2FIX(b)); +} + +static VALUE +curses_pair_content(VALUE obj, VALUE pair) +{ + short f,b; + + pair_content(NUM2INT(pair),&f,&b); + return rb_ary_new3(2,INT2FIX(f),INT2FIX(b)); +} + +static VALUE +curses_color_pair(VALUE obj, VALUE attrs) +{ + return INT2FIX(COLOR_PAIR(NUM2INT(attrs))); +} + +static VALUE +curses_pair_number(VALUE obj, VALUE attrs) +{ + return INT2FIX(PAIR_NUMBER(NUM2INT(attrs))); +} +#endif /* USE_COLOR */ + +#ifdef USE_MOUSE +struct mousedata { + MEVENT *mevent; +}; + +static void +no_mevent(void) +{ + rb_raise(rb_eRuntimeError, "no such mouse event"); +} + +#define GetMOUSE(obj, data) do {\ + if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)\ + rb_raise(rb_eSecurityError, "Insecure: operation on untainted mouse");\ + Data_Get_Struct(obj, struct mousedata, data);\ + if (data->mevent == 0) no_mevent();\ +} while (0) + +static void +curses_mousedata_free(struct mousedata *mdata) +{ + if (mdata->mevent) + xfree(mdata->mevent); +} + +static VALUE +curses_getmouse(VALUE obj) +{ + struct mousedata *mdata; + VALUE val; + + val = Data_Make_Struct(cMouseEvent,struct mousedata, + 0,curses_mousedata_free,mdata); + mdata->mevent = (MEVENT*)xmalloc(sizeof(MEVENT)); + return (getmouse(mdata->mevent) == OK) ? val : Qnil; +} + +static VALUE +curses_ungetmouse(VALUE obj, VALUE mevent) +{ + struct mousedata *mdata; + + GetMOUSE(mevent,mdata); + return (ungetmouse(mdata->mevent) == OK) ? Qtrue : Qfalse; +} + +static VALUE +curses_mouseinterval(VALUE obj, VALUE interval) +{ + return mouseinterval(NUM2INT(interval)) ? Qtrue : Qfalse; +} + +static VALUE +curses_mousemask(VALUE obj, VALUE mask) +{ + return INT2NUM(mousemask(NUM2UINT(mask),NULL)); +} + +#define DEFINE_MOUSE_GET_MEMBER(func_name,mem) \ +static VALUE func_name (VALUE mouse) \ +{ \ + struct mousedata *mdata; \ + GetMOUSE(mouse, mdata); \ + return (UINT2NUM(mdata->mevent -> mem)); \ +} + +DEFINE_MOUSE_GET_MEMBER(curs_mouse_id, id) +DEFINE_MOUSE_GET_MEMBER(curs_mouse_x, x) +DEFINE_MOUSE_GET_MEMBER(curs_mouse_y, y) +DEFINE_MOUSE_GET_MEMBER(curs_mouse_z, z) +DEFINE_MOUSE_GET_MEMBER(curs_mouse_bstate, bstate) +#undef define_curs_mouse_member +#endif /* USE_MOUSE */ + +static VALUE +curses_timeout(VALUE obj, VALUE delay) +{ +#ifdef HAVE_TIMEOUT + timeout(NUM2INT(delay)); + return Qnil; +#else + rb_notimplement(); +#endif +} + +static VALUE +curses_def_prog_mode(VALUE obj) +{ +#ifdef HAVE_DEF_PROG_MODE + return def_prog_mode() == OK ? Qtrue : Qfalse; +#else + rb_notimplement(); +#endif +} + +static VALUE +curses_reset_prog_mode(VALUE obj) +{ +#ifdef HAVE_RESET_PROG_MODE + return reset_prog_mode() == OK ? Qtrue : Qfalse; +#else + rb_notimplement(); +#endif +} + +/*-------------------------- class Window --------------------------*/ + +/* def self.allocate */ +static VALUE +window_s_allocate(VALUE class) +{ + struct windata *winp; + + return Data_Make_Struct(class, struct windata, 0, free_window, winp); +} + +/* def initialize(h, w, top, left) */ +static VALUE +window_initialize(VALUE obj, VALUE h, VALUE w, VALUE top, VALUE left) +{ + struct windata *winp; + WINDOW *window; + + rb_secure(4); + curses_init_screen(); + Data_Get_Struct(obj, struct windata, winp); + if (winp->window) delwin(winp->window); + window = newwin(NUM2INT(h), NUM2INT(w), NUM2INT(top), NUM2INT(left)); + wclear(window); + winp->window = window; + + return obj; +} + +/* def subwin(height, width, top, left) */ +static VALUE +window_subwin(VALUE obj, VALUE height, VALUE width, VALUE top, VALUE left) +{ + struct windata *winp; + WINDOW *window; + VALUE win; + int h, w, t, l; + + h = NUM2INT(height); + w = NUM2INT(width); + t = NUM2INT(top); + l = NUM2INT(left); + GetWINDOW(obj, winp); + window = subwin(winp->window, h, w, t, l); + win = prep_window(rb_obj_class(obj), window); + + return win; +} + +/* def close */ +static VALUE +window_close(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + delwin(winp->window); + winp->window = 0; + + return Qnil; +} + +/* def clear */ +static VALUE +window_clear(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wclear(winp->window); + + return Qnil; +} + +/* def clrtoeol */ +static VALUE +window_clrtoeol(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wclrtoeol(winp->window); + + return Qnil; +} + +/* def refresh */ +static VALUE +window_refresh(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wrefresh(winp->window); + + return Qnil; +} + +/* def noutrefresh */ +static VALUE +window_noutrefresh(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); +#ifdef HAVE_DOUPDATE + wnoutrefresh(winp->window); +#else + wrefresh(winp->window); +#endif + + return Qnil; +} + +/* def move(y, x) */ +static VALUE +window_move(VALUE obj, VALUE y, VALUE x) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + mvwin(winp->window, NUM2INT(y), NUM2INT(x)); + + return Qnil; +} + +/* def setpos(y, x) */ +static VALUE +window_setpos(VALUE obj, VALUE y, VALUE x) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wmove(winp->window, NUM2INT(y), NUM2INT(x)); + return Qnil; +} + +/* def cury */ +static VALUE +window_cury(VALUE obj) +{ + struct windata *winp; + int x, y; + + GetWINDOW(obj, winp); + getyx(winp->window, y, x); + return INT2FIX(y); +} + +/* def curx */ +static VALUE +window_curx(VALUE obj) +{ + struct windata *winp; + int x, y; + + GetWINDOW(obj, winp); + getyx(winp->window, y, x); + return INT2FIX(x); +} + +/* def maxy */ +static VALUE +window_maxy(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); +#if defined(getmaxy) + return INT2FIX(getmaxy(winp->window)); +#elif defined(getmaxyx) + { + int x, y; + getmaxyx(winp->window, y, x); + return INT2FIX(y); + } +#else + return INT2FIX(winp->window->_maxy+1); +#endif +} + +/* def maxx */ +static VALUE +window_maxx(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); +#if defined(getmaxx) + return INT2FIX(getmaxx(winp->window)); +#elif defined(getmaxyx) + { + int x, y; + getmaxyx(winp->window, y, x); + return INT2FIX(x); + } +#else + return INT2FIX(winp->window->_maxx+1); +#endif +} + +/* def begy */ +static VALUE +window_begy(VALUE obj) +{ + struct windata *winp; + int x, y; + + GetWINDOW(obj, winp); +#ifdef getbegyx + getbegyx(winp->window, y, x); + return INT2FIX(y); +#else + return INT2FIX(winp->window->_begy); +#endif +} + +/* def begx */ +static VALUE +window_begx(VALUE obj) +{ + struct windata *winp; + int x, y; + + GetWINDOW(obj, winp); +#ifdef getbegyx + getbegyx(winp->window, y, x); + return INT2FIX(x); +#else + return INT2FIX(winp->window->_begx); +#endif +} + +/* def box(vert, hor) */ +static VALUE +window_box(int argc, VALUE *argv, VALUE self) +{ + struct windata *winp; + VALUE vert, hor, corn; + + rb_scan_args(argc, argv, "21", &vert, &hor, &corn); + + GetWINDOW(self, winp); + box(winp->window, NUM2CH(vert), NUM2CH(hor)); + + if (!NIL_P(corn)) { + int cur_x, cur_y, x, y; + chtype c; + + c = NUM2CH(corn); + getyx(winp->window, cur_y, cur_x); + x = NUM2INT(window_maxx(self)) - 1; + y = NUM2INT(window_maxy(self)) - 1; + wmove(winp->window, 0, 0); + waddch(winp->window, c); + wmove(winp->window, y, 0); + waddch(winp->window, c); + wmove(winp->window, y, x); + waddch(winp->window, c); + wmove(winp->window, 0, x); + waddch(winp->window, c); + wmove(winp->window, cur_y, cur_x); + } + + return Qnil; +} + +/* def standout */ +static VALUE +window_standout(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wstandout(winp->window); + return Qnil; +} + +/* def standend */ +static VALUE +window_standend(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wstandend(winp->window); + return Qnil; +} + +/* def inch */ +static VALUE +window_inch(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + return CH2FIX(winch(winp->window)); +} + +/* def addch(ch) */ +static VALUE +window_addch(VALUE obj, VALUE ch) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + waddch(winp->window, NUM2CH(ch)); + + return Qnil; +} + +/* def insch(ch) */ +static VALUE +window_insch(VALUE obj, VALUE ch) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + winsch(winp->window, NUM2CH(ch)); + + return Qnil; +} + +/* def addstr(str) */ +static VALUE +window_addstr(VALUE obj, VALUE str) +{ + if (!NIL_P(str)) { + struct windata *winp; + + StringValue(str); + str = rb_str_export_locale(str); + GetWINDOW(obj, winp); + waddstr(winp->window, StringValueCStr(str)); + } + return Qnil; +} + +/* def <<(str) */ +static VALUE +window_addstr2(VALUE obj, VALUE str) +{ + window_addstr(obj, str); + return obj; +} + +/* def getch */ +static VALUE +window_getch(VALUE obj) +{ + struct windata *winp; + int c; + + rb_read_check(stdin); + GetWINDOW(obj, winp); + c = wgetch(winp->window); + if (c == EOF) return Qnil; + if (rb_isprint(c)) { + char ch = (char)c; + + return rb_locale_str_new(&ch, 1); + } + return UINT2NUM(c); +} + +/* def getstr */ +static VALUE +window_getstr(VALUE obj) +{ + struct windata *winp; + char rtn[1024]; /* This should be big enough.. I hope */ + + GetWINDOW(obj, winp); + rb_read_check(stdin); +#if defined(HAVE_WGETNSTR) + wgetnstr(winp->window, rtn, 1023); +#else + wgetstr(winp->window, rtn); +#endif + return rb_locale_str_new_cstr(rtn); +} + +/* def delch */ +static VALUE +window_delch(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + wdelch(winp->window); + return Qnil; +} + +/* def delelteln */ +static VALUE +window_deleteln(VALUE obj) +{ +#if defined(HAVE_WDELETELN) || defined(wdeleteln) + struct windata *winp; + + GetWINDOW(obj, winp); + wdeleteln(winp->window); +#endif + return Qnil; +} + +/* def insertln */ +static VALUE +window_insertln(VALUE obj) +{ +#if defined(HAVE_WINSERTLN) || defined(winsertln) + struct windata *winp; + + GetWINDOW(obj, winp); + winsertln(winp->window); +#endif + return Qnil; +} + +static VALUE +window_scrollok(VALUE obj, VALUE bf) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + scrollok(winp->window, RTEST(bf) ? TRUE : FALSE); + return Qnil; +} + +static VALUE +window_idlok(VALUE obj, VALUE bf) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + idlok(winp->window, RTEST(bf) ? TRUE : FALSE); + return Qnil; +} + +static VALUE +window_setscrreg(VALUE obj, VALUE top, VALUE bottom) +{ +#ifdef HAVE_WSETSCRREG + struct windata *winp; + int res; + + GetWINDOW(obj, winp); + res = wsetscrreg(winp->window, NUM2INT(top), NUM2INT(bottom)); + /* may have to raise exception on ERR */ + return (res == OK) ? Qtrue : Qfalse; +#else + return Qfalse; +#endif +} + +#if defined(USE_COLOR) && defined(HAVE_WCOLOR_SET) +static VALUE +window_color_set(VALUE obj, VALUE col) +{ + struct windata *winp; + int res; + + GetWINDOW(obj, winp); + res = wcolor_set(winp->window, NUM2INT(col), NULL); + return (res == OK) ? Qtrue : Qfalse; +} +#endif /* defined(USE_COLOR) && defined(HAVE_WCOLOR_SET) */ + +static VALUE +window_scroll(VALUE obj) +{ + struct windata *winp; + + GetWINDOW(obj, winp); + /* may have to raise exception on ERR */ + return (scroll(winp->window) == OK) ? Qtrue : Qfalse; +} + +static VALUE +window_scrl(VALUE obj, VALUE n) +{ +#ifdef HAVE_WSCRL + struct windata *winp; + + GetWINDOW(obj, winp); + /* may have to raise exception on ERR */ + return (wscrl(winp->window,NUM2INT(n)) == OK) ? Qtrue : Qfalse; +#else + return Qfalse; +#endif +} + +static VALUE +window_attroff(VALUE obj, VALUE attrs) +{ +#ifdef HAVE_WATTROFF + struct windata *winp; + + GetWINDOW(obj,winp); + return INT2FIX(wattroff(winp->window,NUM2INT(attrs))); +#else + return Qtrue; +#endif +} + +static VALUE +window_attron(VALUE obj, VALUE attrs) +{ +#ifdef HAVE_WATTRON + struct windata *winp; + VALUE val; + + GetWINDOW(obj,winp); + val = INT2FIX(wattron(winp->window,NUM2INT(attrs))); + if( rb_block_given_p() ){ + rb_yield(val); + wattroff(winp->window,NUM2INT(attrs)); + return val; + } + else{ + return val; + } +#else + return Qtrue; +#endif +} + +static VALUE +window_attrset(VALUE obj, VALUE attrs) +{ +#ifdef HAVE_WATTRSET + struct windata *winp; + + GetWINDOW(obj,winp); + return INT2FIX(wattrset(winp->window,NUM2INT(attrs))); +#else + return Qtrue; +#endif +} + +static VALUE +window_bkgdset(VALUE obj, VALUE ch) +{ +#ifdef HAVE_WBKGDSET + struct windata *winp; + + GetWINDOW(obj,winp); + wbkgdset(winp->window, NUM2CH(ch)); +#endif + return Qnil; +} + +static VALUE +window_bkgd(VALUE obj, VALUE ch) +{ +#ifdef HAVE_WBKGD + struct windata *winp; + + GetWINDOW(obj,winp); + return (wbkgd(winp->window, NUM2CH(ch)) == OK) ? Qtrue : Qfalse; +#else + return Qfalse; +#endif +} + +static VALUE +window_getbkgd(VALUE obj) +{ +#ifdef HAVE_WGETBKGD + chtype c; + struct windata *winp; + + GetWINDOW(obj,winp); + return (c = getbkgd(winp->window) != ERR) ? CH2FIX(c) : Qnil; +#else + return Qnil; +#endif +} + +static VALUE +window_resize(VALUE obj, VALUE lin, VALUE col) +{ +#if defined(HAVE_WRESIZE) + struct windata *winp; + + GetWINDOW(obj,winp); + return wresize(winp->window, NUM2INT(lin), NUM2INT(col)) == OK ? Qtrue : Qfalse; +#else + return Qnil; +#endif +} + + +static VALUE +window_keypad(VALUE obj, VALUE val) +{ +#ifdef HAVE_KEYPAD + struct windata *winp; + + GetWINDOW(obj,winp); + /* keypad() of NetBSD's libcurses returns no value */ +#if defined(__NetBSD__) && !defined(NCURSES_VERSION) + keypad(winp->window,(RTEST(val) ? TRUE : FALSE)); + return Qnil; +#else + /* may have to raise exception on ERR */ + return (keypad(winp->window,RTEST(val) ? TRUE : FALSE)) == OK ? + Qtrue : Qfalse; +#endif +#else + rb_notimplement(); +#endif /* HAVE_KEYPAD */ +} + +static VALUE +window_nodelay(VALUE obj, VALUE val) +{ +#ifdef HAVE_NODELAY + struct windata *winp; + GetWINDOW(obj,winp); + + /* nodelay() of NetBSD's libcurses returns no value */ +#if defined(__NetBSD__) && !defined(NCURSES_VERSION) + nodelay(winp->window, RTEST(val) ? TRUE : FALSE); + return Qnil; +#else + return nodelay(winp->window,RTEST(val) ? TRUE : FALSE) == OK ? Qtrue : Qfalse; +#endif +#else + rb_notimplement(); +#endif +} + +static VALUE +window_timeout(VALUE obj, VALUE delay) +{ +#ifdef HAVE_WTIMEOUT + struct windata *winp; + GetWINDOW(obj,winp); + + wtimeout(winp->window,NUM2INT(delay)); + return Qnil; +#else + rb_notimplement(); +#endif +} + +/*------------------------- Initialization -------------------------*/ +void +Init_curses(void) +{ + mCurses = rb_define_module("Curses"); + mKey = rb_define_module_under(mCurses, "Key"); + + rb_gc_register_address(&rb_stdscr); + +#ifdef USE_MOUSE + cMouseEvent = rb_define_class_under(mCurses,"MouseEvent",rb_cObject); + rb_undef_method(CLASS_OF(cMouseEvent),"new"); + rb_define_method(cMouseEvent, "eid", curs_mouse_id, 0); + rb_define_method(cMouseEvent, "x", curs_mouse_x, 0); + rb_define_method(cMouseEvent, "y", curs_mouse_y, 0); + rb_define_method(cMouseEvent, "z", curs_mouse_z, 0); + rb_define_method(cMouseEvent, "bstate", curs_mouse_bstate, 0); +#endif /* USE_MOUSE */ + + rb_define_module_function(mCurses, "init_screen", curses_init_screen, 0); + rb_define_module_function(mCurses, "close_screen", curses_close_screen, 0); + rb_define_module_function(mCurses, "closed?", curses_closed, 0); + rb_define_module_function(mCurses, "stdscr", curses_stdscr, 0); + rb_define_module_function(mCurses, "refresh", curses_refresh, 0); + rb_define_module_function(mCurses, "doupdate", curses_doupdate, 0); + rb_define_module_function(mCurses, "clear", curses_clear, 0); + rb_define_module_function(mCurses, "clrtoeol", curses_clrtoeol, 0); + rb_define_module_function(mCurses, "echo", curses_echo, 0); + rb_define_module_function(mCurses, "noecho", curses_noecho, 0); + rb_define_module_function(mCurses, "raw", curses_raw, 0); + rb_define_module_function(mCurses, "noraw", curses_noraw, 0); + rb_define_module_function(mCurses, "cbreak", curses_cbreak, 0); + rb_define_module_function(mCurses, "nocbreak", curses_nocbreak, 0); + rb_define_alias(mCurses, "crmode", "cbreak"); + rb_define_alias(mCurses, "nocrmode", "nocbreak"); + rb_define_module_function(mCurses, "nl", curses_nl, 0); + rb_define_module_function(mCurses, "nonl", curses_nonl, 0); + rb_define_module_function(mCurses, "beep", curses_beep, 0); + rb_define_module_function(mCurses, "flash", curses_flash, 0); + rb_define_module_function(mCurses, "ungetch", curses_ungetch, 1); + rb_define_module_function(mCurses, "setpos", curses_setpos, 2); + rb_define_module_function(mCurses, "standout", curses_standout, 0); + rb_define_module_function(mCurses, "standend", curses_standend, 0); + rb_define_module_function(mCurses, "inch", curses_inch, 0); + rb_define_module_function(mCurses, "addch", curses_addch, 1); + rb_define_module_function(mCurses, "insch", curses_insch, 1); + rb_define_module_function(mCurses, "addstr", curses_addstr, 1); + rb_define_module_function(mCurses, "getch", curses_getch, 0); + rb_define_module_function(mCurses, "getstr", curses_getstr, 0); + rb_define_module_function(mCurses, "delch", curses_delch, 0); + rb_define_module_function(mCurses, "deleteln", curses_deleteln, 0); + rb_define_module_function(mCurses, "insertln", curses_insertln, 0); + rb_define_module_function(mCurses, "keyname", curses_keyname, 1); + rb_define_module_function(mCurses, "lines", curses_lines, 0); + rb_define_module_function(mCurses, "cols", curses_cols, 0); + rb_define_module_function(mCurses, "curs_set", curses_curs_set, 1); + rb_define_module_function(mCurses, "scrl", curses_scrl, 1); + rb_define_module_function(mCurses, "setscrreg", curses_setscrreg, 2); + rb_define_module_function(mCurses, "attroff", curses_attroff, 1); + rb_define_module_function(mCurses, "attron", curses_attron, 1); + rb_define_module_function(mCurses, "attrset", curses_attrset, 1); + rb_define_module_function(mCurses, "bkgdset", curses_bkgdset, 1); + rb_define_module_function(mCurses, "bkgd", curses_bkgd, 1); + rb_define_module_function(mCurses, "resizeterm", curses_resizeterm, 2); + rb_define_module_function(mCurses, "resize", curses_resizeterm, 2); +#ifdef USE_COLOR + rb_define_module_function(mCurses, "start_color", curses_start_color, 0); + rb_define_module_function(mCurses, "init_pair", curses_init_pair, 3); + rb_define_module_function(mCurses, "init_color", curses_init_color, 4); + rb_define_module_function(mCurses, "has_colors?", curses_has_colors, 0); + rb_define_module_function(mCurses, "can_change_color?", + curses_can_change_color, 0); + rb_define_module_function(mCurses, "color_content", curses_color_content, 1); + rb_define_module_function(mCurses, "pair_content", curses_pair_content, 1); + rb_define_module_function(mCurses, "color_pair", curses_color_pair, 1); + rb_define_module_function(mCurses, "pair_number", curses_pair_number, 1); +#endif /* USE_COLOR */ +#ifdef USE_MOUSE + rb_define_module_function(mCurses, "getmouse", curses_getmouse, 0); + rb_define_module_function(mCurses, "ungetmouse", curses_ungetmouse, 1); + rb_define_module_function(mCurses, "mouseinterval", curses_mouseinterval, 1); + rb_define_module_function(mCurses, "mousemask", curses_mousemask, 1); +#endif /* USE_MOUSE */ + + rb_define_module_function(mCurses, "timeout=", curses_timeout, 1); + rb_define_module_function(mCurses, "def_prog_mode", curses_def_prog_mode, 0); + rb_define_module_function(mCurses, "reset_prog_mode", curses_reset_prog_mode, 0); + + cWindow = rb_define_class_under(mCurses, "Window", rb_cData); + rb_define_alloc_func(cWindow, window_s_allocate); + rb_define_method(cWindow, "initialize", window_initialize, 4); + rb_define_method(cWindow, "subwin", window_subwin, 4); + rb_define_method(cWindow, "close", window_close, 0); + rb_define_method(cWindow, "clear", window_clear, 0); + rb_define_method(cWindow, "clrtoeol", window_clrtoeol, 0); + rb_define_method(cWindow, "refresh", window_refresh, 0); + rb_define_method(cWindow, "noutrefresh", window_noutrefresh, 0); + rb_define_method(cWindow, "box", window_box, -1); + rb_define_method(cWindow, "move", window_move, 2); + rb_define_method(cWindow, "setpos", window_setpos, 2); +#if defined(USE_COLOR) && defined(HAVE_WCOLOR_SET) + rb_define_method(cWindow, "color_set", window_color_set, 1); +#endif /* USE_COLOR && HAVE_WCOLOR_SET */ + rb_define_method(cWindow, "cury", window_cury, 0); + rb_define_method(cWindow, "curx", window_curx, 0); + rb_define_method(cWindow, "maxy", window_maxy, 0); + rb_define_method(cWindow, "maxx", window_maxx, 0); + rb_define_method(cWindow, "begy", window_begy, 0); + rb_define_method(cWindow, "begx", window_begx, 0); + rb_define_method(cWindow, "standout", window_standout, 0); + rb_define_method(cWindow, "standend", window_standend, 0); + rb_define_method(cWindow, "inch", window_inch, 0); + rb_define_method(cWindow, "addch", window_addch, 1); + rb_define_method(cWindow, "insch", window_insch, 1); + rb_define_method(cWindow, "addstr", window_addstr, 1); + rb_define_method(cWindow, "<<", window_addstr2, 1); + rb_define_method(cWindow, "getch", window_getch, 0); + rb_define_method(cWindow, "getstr", window_getstr, 0); + rb_define_method(cWindow, "delch", window_delch, 0); + rb_define_method(cWindow, "deleteln", window_deleteln, 0); + rb_define_method(cWindow, "insertln", window_insertln, 0); + rb_define_method(cWindow, "scroll", window_scroll, 0); + rb_define_method(cWindow, "scrollok", window_scrollok, 1); + rb_define_method(cWindow, "idlok", window_idlok, 1); + rb_define_method(cWindow, "setscrreg", window_setscrreg, 2); + rb_define_method(cWindow, "scrl", window_scrl, 1); + rb_define_method(cWindow, "resize", window_resize, 2); + rb_define_method(cWindow, "keypad", window_keypad, 1); + rb_define_method(cWindow, "keypad=", window_keypad, 1); + + rb_define_method(cWindow, "attroff", window_attroff, 1); + rb_define_method(cWindow, "attron", window_attron, 1); + rb_define_method(cWindow, "attrset", window_attrset, 1); + rb_define_method(cWindow, "bkgdset", window_bkgdset, 1); + rb_define_method(cWindow, "bkgd", window_bkgd, 1); + rb_define_method(cWindow, "getbkgd", window_getbkgd, 0); + + rb_define_method(cWindow, "nodelay=", window_nodelay, 1); + rb_define_method(cWindow, "timeout=", window_timeout, 1); + +#define rb_curses_define_const(c) rb_define_const(mCurses,#c,UINT2NUM(c)) + +#ifdef USE_COLOR + rb_curses_define_const(A_ATTRIBUTES); +#ifdef A_NORMAL + rb_curses_define_const(A_NORMAL); +#endif + rb_curses_define_const(A_STANDOUT); + rb_curses_define_const(A_UNDERLINE); + rb_curses_define_const(A_REVERSE); + rb_curses_define_const(A_BLINK); + rb_curses_define_const(A_DIM); + rb_curses_define_const(A_BOLD); + rb_curses_define_const(A_PROTECT); +#ifdef A_INVIS /* for NetBSD */ + rb_curses_define_const(A_INVIS); +#endif + rb_curses_define_const(A_ALTCHARSET); + rb_curses_define_const(A_CHARTEXT); +#ifdef A_HORIZONTAL + rb_curses_define_const(A_HORIZONTAL); +#endif +#ifdef A_LEFT + rb_curses_define_const(A_LEFT); +#endif +#ifdef A_LOW + rb_curses_define_const(A_LOW); +#endif +#ifdef A_RIGHT + rb_curses_define_const(A_RIGHT); +#endif +#ifdef A_TOP + rb_curses_define_const(A_TOP); +#endif +#ifdef A_VERTICAL + rb_curses_define_const(A_VERTICAL); +#endif + rb_curses_define_const(A_COLOR); + +#ifdef COLORS + rb_curses_define_const(COLORS); +#endif + rb_curses_define_const(COLOR_BLACK); + rb_curses_define_const(COLOR_RED); + rb_curses_define_const(COLOR_GREEN); + rb_curses_define_const(COLOR_YELLOW); + rb_curses_define_const(COLOR_BLUE); + rb_curses_define_const(COLOR_MAGENTA); + rb_curses_define_const(COLOR_CYAN); + rb_curses_define_const(COLOR_WHITE); +#endif /* USE_COLOR */ +#ifdef USE_MOUSE +#ifdef BUTTON1_PRESSED + rb_curses_define_const(BUTTON1_PRESSED); +#endif +#ifdef BUTTON1_RELEASED + rb_curses_define_const(BUTTON1_RELEASED); +#endif +#ifdef BUTTON1_CLICKED + rb_curses_define_const(BUTTON1_CLICKED); +#endif +#ifdef BUTTON1_DOUBLE_CLICKED + rb_curses_define_const(BUTTON1_DOUBLE_CLICKED); +#endif +#ifdef BUTTON1_TRIPLE_CLICKED + rb_curses_define_const(BUTTON1_TRIPLE_CLICKED); +#endif +#ifdef BUTTON2_PRESSED + rb_curses_define_const(BUTTON2_PRESSED); +#endif +#ifdef BUTTON2_RELEASED + rb_curses_define_const(BUTTON2_RELEASED); +#endif +#ifdef BUTTON2_CLICKED + rb_curses_define_const(BUTTON2_CLICKED); +#endif +#ifdef BUTTON2_DOUBLE_CLICKED + rb_curses_define_const(BUTTON2_DOUBLE_CLICKED); +#endif +#ifdef BUTTON2_TRIPLE_CLICKED + rb_curses_define_const(BUTTON2_TRIPLE_CLICKED); +#endif +#ifdef BUTTON3_PRESSED + rb_curses_define_const(BUTTON3_PRESSED); +#endif +#ifdef BUTTON3_RELEASED + rb_curses_define_const(BUTTON3_RELEASED); +#endif +#ifdef BUTTON3_CLICKED + rb_curses_define_const(BUTTON3_CLICKED); +#endif +#ifdef BUTTON3_DOUBLE_CLICKED + rb_curses_define_const(BUTTON3_DOUBLE_CLICKED); +#endif +#ifdef BUTTON3_TRIPLE_CLICKED + rb_curses_define_const(BUTTON3_TRIPLE_CLICKED); +#endif +#ifdef BUTTON4_PRESSED + rb_curses_define_const(BUTTON4_PRESSED); +#endif +#ifdef BUTTON4_RELEASED + rb_curses_define_const(BUTTON4_RELEASED); +#endif +#ifdef BUTTON4_CLICKED + rb_curses_define_const(BUTTON4_CLICKED); +#endif +#ifdef BUTTON4_DOUBLE_CLICKED + rb_curses_define_const(BUTTON4_DOUBLE_CLICKED); +#endif +#ifdef BUTTON4_TRIPLE_CLICKED + rb_curses_define_const(BUTTON4_TRIPLE_CLICKED); +#endif +#ifdef BUTTON_SHIFT + rb_curses_define_const(BUTTON_SHIFT); +#endif +#ifdef BUTTON_CTRL + rb_curses_define_const(BUTTON_CTRL); +#endif +#ifdef BUTTON_ALT + rb_curses_define_const(BUTTON_ALT); +#endif +#ifdef ALL_MOUSE_EVENTS + rb_curses_define_const(ALL_MOUSE_EVENTS); +#endif +#ifdef REPORT_MOUSE_POSITION + rb_curses_define_const(REPORT_MOUSE_POSITION); +#endif +#endif /* USE_MOUSE */ + +#if defined(KEY_MOUSE) && defined(USE_MOUSE) + rb_curses_define_const(KEY_MOUSE); + rb_define_const(mKey, "MOUSE", INT2NUM(KEY_MOUSE)); +#endif +#ifdef KEY_MIN + rb_curses_define_const(KEY_MIN); + rb_define_const(mKey, "MIN", INT2NUM(KEY_MIN)); +#endif +#ifdef KEY_BREAK + rb_curses_define_const(KEY_BREAK); + rb_define_const(mKey, "BREAK", INT2NUM(KEY_BREAK)); +#endif +#ifdef KEY_DOWN + rb_curses_define_const(KEY_DOWN); + rb_define_const(mKey, "DOWN", INT2NUM(KEY_DOWN)); +#endif +#ifdef KEY_UP + rb_curses_define_const(KEY_UP); + rb_define_const(mKey, "UP", INT2NUM(KEY_UP)); +#endif +#ifdef KEY_LEFT + rb_curses_define_const(KEY_LEFT); + rb_define_const(mKey, "LEFT", INT2NUM(KEY_LEFT)); +#endif +#ifdef KEY_RIGHT + rb_curses_define_const(KEY_RIGHT); + rb_define_const(mKey, "RIGHT", INT2NUM(KEY_RIGHT)); +#endif +#ifdef KEY_HOME + rb_curses_define_const(KEY_HOME); + rb_define_const(mKey, "HOME", INT2NUM(KEY_HOME)); +#endif +#ifdef KEY_BACKSPACE + rb_curses_define_const(KEY_BACKSPACE); + rb_define_const(mKey, "BACKSPACE", INT2NUM(KEY_BACKSPACE)); +#endif +#ifdef KEY_F + /* KEY_F(n) : 0 <= n <= 63 */ + { + int i; + char c[8]; + for( i=0; i<64; i++ ){ + sprintf(c, "KEY_F%d", i); + rb_define_const(mCurses, c, INT2NUM(KEY_F(i))); + sprintf(c, "F%d", i); + rb_define_const(mKey, c, INT2NUM(KEY_F(i))); + } + } +#endif +#ifdef KEY_DL + rb_curses_define_const(KEY_DL); + rb_define_const(mKey, "DL", INT2NUM(KEY_DL)); +#endif +#ifdef KEY_IL + rb_curses_define_const(KEY_IL); + rb_define_const(mKey, "IL", INT2NUM(KEY_IL)); +#endif +#ifdef KEY_DC + rb_curses_define_const(KEY_DC); + rb_define_const(mKey, "DC", INT2NUM(KEY_DC)); +#endif +#ifdef KEY_IC + rb_curses_define_const(KEY_IC); + rb_define_const(mKey, "IC", INT2NUM(KEY_IC)); +#endif +#ifdef KEY_EIC + rb_curses_define_const(KEY_EIC); + rb_define_const(mKey, "EIC", INT2NUM(KEY_EIC)); +#endif +#ifdef KEY_CLEAR + rb_curses_define_const(KEY_CLEAR); + rb_define_const(mKey, "CLEAR", INT2NUM(KEY_CLEAR)); +#endif +#ifdef KEY_EOS + rb_curses_define_const(KEY_EOS); + rb_define_const(mKey, "EOS", INT2NUM(KEY_EOS)); +#endif +#ifdef KEY_EOL + rb_curses_define_const(KEY_EOL); + rb_define_const(mKey, "EOL", INT2NUM(KEY_EOL)); +#endif +#ifdef KEY_SF + rb_curses_define_const(KEY_SF); + rb_define_const(mKey, "SF", INT2NUM(KEY_SF)); +#endif +#ifdef KEY_SR + rb_curses_define_const(KEY_SR); + rb_define_const(mKey, "SR", INT2NUM(KEY_SR)); +#endif +#ifdef KEY_NPAGE + rb_curses_define_const(KEY_NPAGE); + rb_define_const(mKey, "NPAGE", INT2NUM(KEY_NPAGE)); +#endif +#ifdef KEY_PPAGE + rb_curses_define_const(KEY_PPAGE); + rb_define_const(mKey, "PPAGE", INT2NUM(KEY_PPAGE)); +#endif +#ifdef KEY_STAB + rb_curses_define_const(KEY_STAB); + rb_define_const(mKey, "STAB", INT2NUM(KEY_STAB)); +#endif +#ifdef KEY_CTAB + rb_curses_define_const(KEY_CTAB); + rb_define_const(mKey, "CTAB", INT2NUM(KEY_CTAB)); +#endif +#ifdef KEY_CATAB + rb_curses_define_const(KEY_CATAB); + rb_define_const(mKey, "CATAB", INT2NUM(KEY_CATAB)); +#endif +#ifdef KEY_ENTER + rb_curses_define_const(KEY_ENTER); + rb_define_const(mKey, "ENTER", INT2NUM(KEY_ENTER)); +#endif +#ifdef KEY_SRESET + rb_curses_define_const(KEY_SRESET); + rb_define_const(mKey, "SRESET", INT2NUM(KEY_SRESET)); +#endif +#ifdef KEY_RESET + rb_curses_define_const(KEY_RESET); + rb_define_const(mKey, "RESET", INT2NUM(KEY_RESET)); +#endif +#ifdef KEY_PRINT + rb_curses_define_const(KEY_PRINT); + rb_define_const(mKey, "PRINT", INT2NUM(KEY_PRINT)); +#endif +#ifdef KEY_LL + rb_curses_define_const(KEY_LL); + rb_define_const(mKey, "LL", INT2NUM(KEY_LL)); +#endif +#ifdef KEY_A1 + rb_curses_define_const(KEY_A1); + rb_define_const(mKey, "A1", INT2NUM(KEY_A1)); +#endif +#ifdef KEY_A3 + rb_curses_define_const(KEY_A3); + rb_define_const(mKey, "A3", INT2NUM(KEY_A3)); +#endif +#ifdef KEY_B2 + rb_curses_define_const(KEY_B2); + rb_define_const(mKey, "B2", INT2NUM(KEY_B2)); +#endif +#ifdef KEY_C1 + rb_curses_define_const(KEY_C1); + rb_define_const(mKey, "C1", INT2NUM(KEY_C1)); +#endif +#ifdef KEY_C3 + rb_curses_define_const(KEY_C3); + rb_define_const(mKey, "C3", INT2NUM(KEY_C3)); +#endif +#ifdef KEY_BTAB + rb_curses_define_const(KEY_BTAB); + rb_define_const(mKey, "BTAB", INT2NUM(KEY_BTAB)); +#endif +#ifdef KEY_BEG + rb_curses_define_const(KEY_BEG); + rb_define_const(mKey, "BEG", INT2NUM(KEY_BEG)); +#endif +#ifdef KEY_CANCEL + rb_curses_define_const(KEY_CANCEL); + rb_define_const(mKey, "CANCEL", INT2NUM(KEY_CANCEL)); +#endif +#ifdef KEY_CLOSE + rb_curses_define_const(KEY_CLOSE); + rb_define_const(mKey, "CLOSE", INT2NUM(KEY_CLOSE)); +#endif +#ifdef KEY_COMMAND + rb_curses_define_const(KEY_COMMAND); + rb_define_const(mKey, "COMMAND", INT2NUM(KEY_COMMAND)); +#endif +#ifdef KEY_COPY + rb_curses_define_const(KEY_COPY); + rb_define_const(mKey, "COPY", INT2NUM(KEY_COPY)); +#endif +#ifdef KEY_CREATE + rb_curses_define_const(KEY_CREATE); + rb_define_const(mKey, "CREATE", INT2NUM(KEY_CREATE)); +#endif +#ifdef KEY_END + rb_curses_define_const(KEY_END); + rb_define_const(mKey, "END", INT2NUM(KEY_END)); +#endif +#ifdef KEY_EXIT + rb_curses_define_const(KEY_EXIT); + rb_define_const(mKey, "EXIT", INT2NUM(KEY_EXIT)); +#endif +#ifdef KEY_FIND + rb_curses_define_const(KEY_FIND); + rb_define_const(mKey, "FIND", INT2NUM(KEY_FIND)); +#endif +#ifdef KEY_HELP + rb_curses_define_const(KEY_HELP); + rb_define_const(mKey, "HELP", INT2NUM(KEY_HELP)); +#endif +#ifdef KEY_MARK + rb_curses_define_const(KEY_MARK); + rb_define_const(mKey, "MARK", INT2NUM(KEY_MARK)); +#endif +#ifdef KEY_MESSAGE + rb_curses_define_const(KEY_MESSAGE); + rb_define_const(mKey, "MESSAGE", INT2NUM(KEY_MESSAGE)); +#endif +#ifdef KEY_MOVE + rb_curses_define_const(KEY_MOVE); + rb_define_const(mKey, "MOVE", INT2NUM(KEY_MOVE)); +#endif +#ifdef KEY_NEXT + rb_curses_define_const(KEY_NEXT); + rb_define_const(mKey, "NEXT", INT2NUM(KEY_NEXT)); +#endif +#ifdef KEY_OPEN + rb_curses_define_const(KEY_OPEN); + rb_define_const(mKey, "OPEN", INT2NUM(KEY_OPEN)); +#endif +#ifdef KEY_OPTIONS + rb_curses_define_const(KEY_OPTIONS); + rb_define_const(mKey, "OPTIONS", INT2NUM(KEY_OPTIONS)); +#endif +#ifdef KEY_PREVIOUS + rb_curses_define_const(KEY_PREVIOUS); + rb_define_const(mKey, "PREVIOUS", INT2NUM(KEY_PREVIOUS)); +#endif +#ifdef KEY_REDO + rb_curses_define_const(KEY_REDO); + rb_define_const(mKey, "REDO", INT2NUM(KEY_REDO)); +#endif +#ifdef KEY_REFERENCE + rb_curses_define_const(KEY_REFERENCE); + rb_define_const(mKey, "REFERENCE", INT2NUM(KEY_REFERENCE)); +#endif +#ifdef KEY_REFRESH + rb_curses_define_const(KEY_REFRESH); + rb_define_const(mKey, "REFRESH", INT2NUM(KEY_REFRESH)); +#endif +#ifdef KEY_REPLACE + rb_curses_define_const(KEY_REPLACE); + rb_define_const(mKey, "REPLACE", INT2NUM(KEY_REPLACE)); +#endif +#ifdef KEY_RESTART + rb_curses_define_const(KEY_RESTART); + rb_define_const(mKey, "RESTART", INT2NUM(KEY_RESTART)); +#endif +#ifdef KEY_RESUME + rb_curses_define_const(KEY_RESUME); + rb_define_const(mKey, "RESUME", INT2NUM(KEY_RESUME)); +#endif +#ifdef KEY_SAVE + rb_curses_define_const(KEY_SAVE); + rb_define_const(mKey, "SAVE", INT2NUM(KEY_SAVE)); +#endif +#ifdef KEY_SBEG + rb_curses_define_const(KEY_SBEG); + rb_define_const(mKey, "SBEG", INT2NUM(KEY_SBEG)); +#endif +#ifdef KEY_SCANCEL + rb_curses_define_const(KEY_SCANCEL); + rb_define_const(mKey, "SCANCEL", INT2NUM(KEY_SCANCEL)); +#endif +#ifdef KEY_SCOMMAND + rb_curses_define_const(KEY_SCOMMAND); + rb_define_const(mKey, "SCOMMAND", INT2NUM(KEY_SCOMMAND)); +#endif +#ifdef KEY_SCOPY + rb_curses_define_const(KEY_SCOPY); + rb_define_const(mKey, "SCOPY", INT2NUM(KEY_SCOPY)); +#endif +#ifdef KEY_SCREATE + rb_curses_define_const(KEY_SCREATE); + rb_define_const(mKey, "SCREATE", INT2NUM(KEY_SCREATE)); +#endif +#ifdef KEY_SDC + rb_curses_define_const(KEY_SDC); + rb_define_const(mKey, "SDC", INT2NUM(KEY_SDC)); +#endif +#ifdef KEY_SDL + rb_curses_define_const(KEY_SDL); + rb_define_const(mKey, "SDL", INT2NUM(KEY_SDL)); +#endif +#ifdef KEY_SELECT + rb_curses_define_const(KEY_SELECT); + rb_define_const(mKey, "SELECT", INT2NUM(KEY_SELECT)); +#endif +#ifdef KEY_SEND + rb_curses_define_const(KEY_SEND); + rb_define_const(mKey, "SEND", INT2NUM(KEY_SEND)); +#endif +#ifdef KEY_SEOL + rb_curses_define_const(KEY_SEOL); + rb_define_const(mKey, "SEOL", INT2NUM(KEY_SEOL)); +#endif +#ifdef KEY_SEXIT + rb_curses_define_const(KEY_SEXIT); + rb_define_const(mKey, "SEXIT", INT2NUM(KEY_SEXIT)); +#endif +#ifdef KEY_SFIND + rb_curses_define_const(KEY_SFIND); + rb_define_const(mKey, "SFIND", INT2NUM(KEY_SFIND)); +#endif +#ifdef KEY_SHELP + rb_curses_define_const(KEY_SHELP); + rb_define_const(mKey, "SHELP", INT2NUM(KEY_SHELP)); +#endif +#ifdef KEY_SHOME + rb_curses_define_const(KEY_SHOME); + rb_define_const(mKey, "SHOME", INT2NUM(KEY_SHOME)); +#endif +#ifdef KEY_SIC + rb_curses_define_const(KEY_SIC); + rb_define_const(mKey, "SIC", INT2NUM(KEY_SIC)); +#endif +#ifdef KEY_SLEFT + rb_curses_define_const(KEY_SLEFT); + rb_define_const(mKey, "SLEFT", INT2NUM(KEY_SLEFT)); +#endif +#ifdef KEY_SMESSAGE + rb_curses_define_const(KEY_SMESSAGE); + rb_define_const(mKey, "SMESSAGE", INT2NUM(KEY_SMESSAGE)); +#endif +#ifdef KEY_SMOVE + rb_curses_define_const(KEY_SMOVE); + rb_define_const(mKey, "SMOVE", INT2NUM(KEY_SMOVE)); +#endif +#ifdef KEY_SNEXT + rb_curses_define_const(KEY_SNEXT); + rb_define_const(mKey, "SNEXT", INT2NUM(KEY_SNEXT)); +#endif +#ifdef KEY_SOPTIONS + rb_curses_define_const(KEY_SOPTIONS); + rb_define_const(mKey, "SOPTIONS", INT2NUM(KEY_SOPTIONS)); +#endif +#ifdef KEY_SPREVIOUS + rb_curses_define_const(KEY_SPREVIOUS); + rb_define_const(mKey, "SPREVIOUS", INT2NUM(KEY_SPREVIOUS)); +#endif +#ifdef KEY_SPRINT + rb_curses_define_const(KEY_SPRINT); + rb_define_const(mKey, "SPRINT", INT2NUM(KEY_SPRINT)); +#endif +#ifdef KEY_SREDO + rb_curses_define_const(KEY_SREDO); + rb_define_const(mKey, "SREDO", INT2NUM(KEY_SREDO)); +#endif +#ifdef KEY_SREPLACE + rb_curses_define_const(KEY_SREPLACE); + rb_define_const(mKey, "SREPLACE", INT2NUM(KEY_SREPLACE)); +#endif +#ifdef KEY_SRIGHT + rb_curses_define_const(KEY_SRIGHT); + rb_define_const(mKey, "SRIGHT", INT2NUM(KEY_SRIGHT)); +#endif +#ifdef KEY_SRSUME + rb_curses_define_const(KEY_SRSUME); + rb_define_const(mKey, "SRSUME", INT2NUM(KEY_SRSUME)); +#endif +#ifdef KEY_SSAVE + rb_curses_define_const(KEY_SSAVE); + rb_define_const(mKey, "SSAVE", INT2NUM(KEY_SSAVE)); +#endif +#ifdef KEY_SSUSPEND + rb_curses_define_const(KEY_SSUSPEND); + rb_define_const(mKey, "SSUSPEND", INT2NUM(KEY_SSUSPEND)); +#endif +#ifdef KEY_SUNDO + rb_curses_define_const(KEY_SUNDO); + rb_define_const(mKey, "SUNDO", INT2NUM(KEY_SUNDO)); +#endif +#ifdef KEY_SUSPEND + rb_curses_define_const(KEY_SUSPEND); + rb_define_const(mKey, "SUSPEND", INT2NUM(KEY_SUSPEND)); +#endif +#ifdef KEY_UNDO + rb_curses_define_const(KEY_UNDO); + rb_define_const(mKey, "UNDO", INT2NUM(KEY_UNDO)); +#endif +#ifdef KEY_RESIZE + rb_curses_define_const(KEY_RESIZE); + rb_define_const(mKey, "RESIZE", INT2NUM(KEY_RESIZE)); +#endif +#ifdef KEY_MAX + rb_curses_define_const(KEY_MAX); + rb_define_const(mKey, "MAX", INT2NUM(KEY_MAX)); +#endif + { + int c; + char name[] = "KEY_CTRL_x"; + for( c = 'A'; c <= 'Z'; c++ ){ + sprintf(name, "KEY_CTRL_%c", c); + rb_define_const(mCurses, name, INT2FIX(c - 'A' + 1)); + } + } +#undef rb_curses_define_const + + rb_set_end_proc(curses_finalize, 0); +} diff --git a/ext/curses/depend b/ext/curses/depend new file mode 100644 index 0000000..ecb79e5 --- /dev/null +++ b/ext/curses/depend @@ -0,0 +1 @@ +curses.o: curses.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/curses/extconf.rb b/ext/curses/extconf.rb new file mode 100644 index 0000000..a8374a8 --- /dev/null +++ b/ext/curses/extconf.rb @@ -0,0 +1,31 @@ +require 'mkmf' + +dir_config('curses') +dir_config('ncurses') +dir_config('termcap') + +make=false +headers = [] +have_library("mytinfo", "tgetent") if /bow/ =~ RUBY_PLATFORM +have_library("tinfo", "tgetent") or have_library("termcap", "tgetent") +if have_header(*curses=%w"ncurses.h") and (have_library("ncursesw", "initscr") or have_library("ncurses", "initscr")) + make=true +elsif have_header(*curses=%w"ncurses/curses.h") and have_library("ncurses", "initscr") + make=true +elsif have_header(*curses=%w"curses_colr/curses.h") and have_library("cur_colr", "initscr") + curses.unshift("varargs.h") + make=true +elsif have_header(*curses=%w"curses.h") and have_library("curses", "initscr") + make=true +end + +if make + for f in %w(beep bkgd bkgdset curs_set deleteln doupdate flash getbkgd getnstr init isendwin keyname keypad resizeterm scrl set setscrreg ungetch wattroff wattron wattrset wbkgd wbkgdset wdeleteln wgetnstr wresize wscrl wsetscrreg def_prog_mode reset_prog_mode timeout wtimeout nodelay init_color wcolor_set) + have_func(f) || (have_macro(f, curses) && $defs.push(format("-DHAVE_%s", f.upcase))) + end + flag = "-D_XOPEN_SOURCE_EXTENDED" + if try_static_assert("sizeof(char*)>sizeof(int)", %w[stdio.h stdlib.h]+curses , flag) + $defs << flag + end + create_makefile("curses") +end diff --git a/ext/curses/hello.rb b/ext/curses/hello.rb new file mode 100644 index 0000000..7f57d80 --- /dev/null +++ b/ext/curses/hello.rb @@ -0,0 +1,30 @@ +#!/usr/local/bin/ruby + +require "curses" +include Curses + +def show_message(message) + width = message.length + 6 + win = Window.new(5, width, + (lines - 5) / 2, (cols - width) / 2) + win.box(?|, ?-) + win.setpos(2, 3) + win.addstr(message) + win.refresh + win.getch + win.close +end + +init_screen +begin + crmode +# show_message("Hit any key") + setpos((lines - 5) / 2, (cols - 10) / 2) + addstr("Hit any key") + refresh + getch + show_message("Hello, World!") + refresh +ensure + close_screen +end diff --git a/ext/curses/mouse.rb b/ext/curses/mouse.rb new file mode 100644 index 0000000..c42bc31 --- /dev/null +++ b/ext/curses/mouse.rb @@ -0,0 +1,53 @@ +#!/usr/local/bin/ruby + +require "curses" +include Curses + +def show_message(*msgs) + message = msgs.join + width = message.length + 6 + win = Window.new(5, width, + (lines - 5) / 2, (cols - width) / 2) + win.keypad = true + win.attron(color_pair(COLOR_RED)){ + win.box(?|, ?-, ?+) + } + win.setpos(2, 3) + win.addstr(message) + win.refresh + win.getch + win.close +end + +init_screen +start_color +init_pair(COLOR_BLUE,COLOR_BLUE,COLOR_WHITE) +init_pair(COLOR_RED,COLOR_RED,COLOR_WHITE) +crmode +noecho +stdscr.keypad(true) + +begin + mousemask(BUTTON1_CLICKED|BUTTON2_CLICKED|BUTTON3_CLICKED|BUTTON4_CLICKED) + setpos((lines - 5) / 2, (cols - 10) / 2) + attron(color_pair(COLOR_BLUE)|A_BOLD){ + addstr("click") + } + refresh + while( true ) + c = getch + case c + when KEY_MOUSE + m = getmouse + if( m ) + show_message("getch = #{c.inspect}, ", + "mouse event = #{'0x%x' % m.bstate}, ", + "axis = (#{m.x},#{m.y},#{m.z})") + end + break + end + end + refresh +ensure + close_screen +end diff --git a/ext/curses/rain.rb b/ext/curses/rain.rb new file mode 100644 index 0000000..36f0f84 --- /dev/null +++ b/ext/curses/rain.rb @@ -0,0 +1,76 @@ +#!/usr/local/bin/ruby +# rain for a curses test + +require "curses" +include Curses + +def onsig(sig) + close_screen + exit sig +end + +def ranf + rand(32767).to_f / 32767 +end + +# main # +for i in 1 .. 15 # SIGHUP .. SIGTERM + if trap(i, "SIG_IGN") != 0 then # 0 for SIG_IGN + trap(i) {|sig| onsig(sig) } + end +end + +init_screen +nl +noecho +srand + +xpos = {} +ypos = {} +r = lines - 4 +c = cols - 4 +for i in 0 .. 4 + xpos[i] = (c * ranf).to_i + 2 + ypos[i] = (r * ranf).to_i + 2 +end + +i = 0 +while TRUE + x = (c * ranf).to_i + 2 + y = (r * ranf).to_i + 2 + + + setpos(y, x); addstr(".") + + setpos(ypos[i], xpos[i]); addstr("o") + + i = if i == 0 then 4 else i - 1 end + setpos(ypos[i], xpos[i]); addstr("O") + + i = if i == 0 then 4 else i - 1 end + setpos(ypos[i] - 1, xpos[i]); addstr("-") + setpos(ypos[i], xpos[i] - 1); addstr("|.|") + setpos(ypos[i] + 1, xpos[i]); addstr("-") + + i = if i == 0 then 4 else i - 1 end + setpos(ypos[i] - 2, xpos[i]); addstr("-") + setpos(ypos[i] - 1, xpos[i] - 1); addstr("/ \\") + setpos(ypos[i], xpos[i] - 2); addstr("| O |") + setpos(ypos[i] + 1, xpos[i] - 1); addstr("\\ /") + setpos(ypos[i] + 2, xpos[i]); addstr("-") + + i = if i == 0 then 4 else i - 1 end + setpos(ypos[i] - 2, xpos[i]); addstr(" ") + setpos(ypos[i] - 1, xpos[i] - 1); addstr(" ") + setpos(ypos[i], xpos[i] - 2); addstr(" ") + setpos(ypos[i] + 1, xpos[i] - 1); addstr(" ") + setpos(ypos[i] + 2, xpos[i]); addstr(" ") + + + xpos[i] = x + ypos[i] = y + refresh + sleep(0.5) +end + +# end of main diff --git a/ext/curses/view.rb b/ext/curses/view.rb new file mode 100644 index 0000000..239f414 --- /dev/null +++ b/ext/curses/view.rb @@ -0,0 +1,91 @@ +#!/usr/local/bin/ruby + +require "curses" +include Curses + +# +# main +# + +if ARGV.size != 1 then + printf("usage: view file\n"); + exit +end +begin + fp = open(ARGV[0], "r") +rescue + raise "cannot open file: #{ARGV[1]}" +end + +# signal(SIGINT, finish) + +init_screen +#keypad(stdscr, TRUE) +nonl +cbreak +noecho +#scrollok(stdscr, TRUE) + +# slurp the file +data_lines = [] +fp.each_line { |l| + data_lines.push(l) +} +fp.close + + +lptr = 0 +while TRUE + i = 0 + while i < lines + setpos(i, 0) + #clrtoeol + addstr(data_lines[lptr + i]) #if data_lines[lptr + i] + i += 1 + end + refresh + + explicit = FALSE + n = 0 + while TRUE + c = getch + if c =~ /[0-9]/ + n = 10 * n + c.to_i + else + break + end + end + + n = 1 if !explicit && n == 0 + + case c + when "n" #when KEY_DOWN + i = 0 + while i < n + if lptr + lines < data_lines.size then + lptr += 1 + else + break + end + i += 1 + end + #wscrl(i) + + when "p" #when KEY_UP + i = 0 + while i < n + if lptr > 0 then + lptr -= 1 + else + break + end + i += 1 + end + #wscrl(-i) + + when "q" + break + end + +end +close_screen diff --git a/ext/curses/view2.rb b/ext/curses/view2.rb new file mode 100644 index 0000000..18d9619 --- /dev/null +++ b/ext/curses/view2.rb @@ -0,0 +1,115 @@ +#!/usr/local/bin/ruby + +require "curses" + +if ARGV.size != 1 then + printf("usage: view file\n"); + exit +end +begin + fp = open(ARGV[0], "r") +rescue + raise "cannot open file: #{ARGV[1]}" +end + +# signal(SIGINT, finish) + +Curses.init_screen +Curses.nonl +Curses.cbreak +Curses.noecho + +$screen = Curses.stdscr + +$screen.scrollok(true) +#$screen.keypad(true) + +# slurp the file +$data_lines = [] +fp.each_line { |l| + $data_lines.push(l.chop) +} +fp.close + +$top = 0 +$data_lines[0..$screen.maxy-1].each_with_index{|line, idx| + $screen.setpos(idx, 0) + $screen.addstr(line) +} +$screen.setpos(0,0) +$screen.refresh + +def scroll_up + if( $top > 0 ) + $screen.scrl(-1) + $top -= 1 + str = $data_lines[$top] + if( str ) + $screen.setpos(0, 0) + $screen.addstr(str) + end + return true + else + return false + end +end + +def scroll_down + if( $top + $screen.maxy < $data_lines.length ) + $screen.scrl(1) + $top += 1 + str = $data_lines[$top + $screen.maxy - 1] + if( str ) + $screen.setpos($screen.maxy - 1, 0) + $screen.addstr(str) + end + return true + else + return false + end +end + +while true + result = true + c = Curses.getch + case c + when Curses::KEY_DOWN, Curses::KEY_CTRL_N + result = scroll_down + when Curses::KEY_UP, Curses::KEY_CTRL_P + result = scroll_up + when Curses::KEY_NPAGE, ?\s # white space + for i in 0..($screen.maxy - 2) + if( ! scroll_down ) + if( i == 0 ) + result = false + end + break + end + end + when Curses::KEY_PPAGE + for i in 0..($screen.maxy - 2) + if( ! scroll_up ) + if( i == 0 ) + result = false + end + break + end + end + when Curses::KEY_LEFT, Curses::KEY_CTRL_T + while( scroll_up ) + end + when Curses::KEY_RIGHT, Curses::KEY_CTRL_B + while( scroll_down ) + end + when ?q + break + else + $screen.setpos(0,0) + $screen.addstr("[unknown key `#{Curses.keyname(c)}'=#{c}] ") + end + if( !result ) + Curses.beep + end + $screen.setpos(0,0) +end +Curses.close_screen diff --git a/ext/dbm/dbm.c b/ext/dbm/dbm.c new file mode 100644 index 0000000..556674a --- /dev/null +++ b/ext/dbm/dbm.c @@ -0,0 +1,746 @@ +/************************************************ + + dbm.c - + + $Author: yugui $ + created at: Mon Jan 24 15:59:52 JST 1994 + + Copyright (C) 1995-2001 Yukihiro Matsumoto + +************************************************/ + +#include "ruby.h" + +#ifdef HAVE_CDEFS_H +# include +#endif +#ifdef HAVE_SYS_CDEFS_H +# include +#endif +#include DBM_HDR +#include +#include + +static VALUE rb_cDBM, rb_eDBMError; + +#define RUBY_DBM_RW_BIT 0x20000000 + +struct dbmdata { + int di_size; + DBM *di_dbm; +}; + +static void +closed_dbm(void) +{ + rb_raise(rb_eDBMError, "closed DBM file"); +} + +#define GetDBM(obj, dbmp) {\ + Data_Get_Struct(obj, struct dbmdata, dbmp);\ + if (dbmp == 0) closed_dbm();\ + if (dbmp->di_dbm == 0) closed_dbm();\ +} + +#define GetDBM2(obj, data, dbm) {\ + GetDBM(obj, data);\ + (dbm) = dbmp->di_dbm;\ +} + +static void +free_dbm(struct dbmdata *dbmp) +{ + if (dbmp) { + if (dbmp->di_dbm) dbm_close(dbmp->di_dbm); + xfree(dbmp); + } +} + +static VALUE +fdbm_close(VALUE obj) +{ + struct dbmdata *dbmp; + + GetDBM(obj, dbmp); + dbm_close(dbmp->di_dbm); + dbmp->di_dbm = 0; + + return Qnil; +} + +static VALUE +fdbm_closed(VALUE obj) +{ + struct dbmdata *dbmp; + + Data_Get_Struct(obj, struct dbmdata, dbmp); + if (dbmp == 0) + return Qtrue; + if (dbmp->di_dbm == 0) + return Qtrue; + + return Qfalse; +} + +static VALUE +fdbm_alloc(VALUE klass) +{ + return Data_Wrap_Struct(klass, 0, free_dbm, 0); +} + +static VALUE +fdbm_initialize(int argc, VALUE *argv, VALUE obj) +{ + volatile VALUE file; + VALUE vmode, vflags; + DBM *dbm; + struct dbmdata *dbmp; + int mode, flags = 0; + + if (rb_scan_args(argc, argv, "12", &file, &vmode, &vflags) == 1) { + mode = 0666; /* default value */ + } + else if (NIL_P(vmode)) { + mode = -1; /* return nil if DB not exist */ + } + else { + mode = NUM2INT(vmode); + } + + if (!NIL_P(vflags)) + flags = NUM2INT(vflags); + + FilePathValue(file); + + if (flags & RUBY_DBM_RW_BIT) { + flags &= ~RUBY_DBM_RW_BIT; + dbm = dbm_open(RSTRING_PTR(file), flags, mode); + } + else { + dbm = 0; + if (mode >= 0) { + dbm = dbm_open(RSTRING_PTR(file), O_RDWR|O_CREAT, mode); + } + if (!dbm) { + dbm = dbm_open(RSTRING_PTR(file), O_RDWR, 0); + } + if (!dbm) { + dbm = dbm_open(RSTRING_PTR(file), O_RDONLY, 0); + } + } + + if (!dbm) { + if (mode == -1) return Qnil; + rb_sys_fail(RSTRING_PTR(file)); + } + + dbmp = ALLOC(struct dbmdata); + DATA_PTR(obj) = dbmp; + dbmp->di_dbm = dbm; + dbmp->di_size = -1; + + return obj; +} + +static VALUE +fdbm_s_open(int argc, VALUE *argv, VALUE klass) +{ + VALUE obj = Data_Wrap_Struct(klass, 0, free_dbm, 0); + + if (NIL_P(fdbm_initialize(argc, argv, obj))) { + return Qnil; + } + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, obj, fdbm_close, obj); + } + + return obj; +} + +static VALUE +fdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone) +{ + datum key, value; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + GetDBM2(obj, dbmp, dbm); + value = dbm_fetch(dbm, key); + if (value.dptr == 0) { + if (ifnone == Qnil && rb_block_given_p()) + return rb_yield(rb_tainted_str_new(key.dptr, key.dsize)); + return ifnone; + } + return rb_tainted_str_new(value.dptr, value.dsize); +} + +static VALUE +fdbm_aref(VALUE obj, VALUE keystr) +{ + return fdbm_fetch(obj, keystr, Qnil); +} + +static VALUE +fdbm_fetch_m(int argc, VALUE *argv, VALUE obj) +{ + VALUE keystr, valstr, ifnone; + + rb_scan_args(argc, argv, "11", &keystr, &ifnone); + valstr = fdbm_fetch(obj, keystr, ifnone); + if (argc == 1 && !rb_block_given_p() && NIL_P(valstr)) + rb_raise(rb_eIndexError, "key not found"); + + return valstr; +} + +static VALUE +fdbm_key(VALUE obj, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(valstr); + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + if (val.dsize == RSTRING_LEN(valstr) && + memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0) { + return rb_tainted_str_new(key.dptr, key.dsize); + } + } + return Qnil; +} + +static VALUE +fdbm_index(VALUE hash, VALUE value) +{ + rb_warn("DBM#index is deprecated; use DBM#key"); + return fdbm_key(hash, value); +} + +static VALUE +fdbm_select(VALUE obj) +{ + VALUE new = rb_ary_new(); + datum key, val; + DBM *dbm; + struct dbmdata *dbmp; + + GetDBM2(obj, dbmp, dbm); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + VALUE assoc, v; + val = dbm_fetch(dbm, key); + assoc = rb_assoc_new(rb_tainted_str_new(key.dptr, key.dsize), + rb_tainted_str_new(val.dptr, val.dsize)); + v = rb_yield(assoc); + if (RTEST(v)) { + rb_ary_push(new, assoc); + } + GetDBM2(obj, dbmp, dbm); + } + + return new; +} + +static VALUE +fdbm_values_at(int argc, VALUE *argv, VALUE obj) +{ + VALUE new = rb_ary_new2(argc); + int i; + + for (i=0; idi_size = -1; + rb_raise(rb_eDBMError, "dbm_delete failed"); + } + else if (dbmp->di_size >= 0) { + dbmp->di_size--; + } + return valstr; +} + +static VALUE +fdbm_shift(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE keystr, valstr; + + fdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + + key = dbm_firstkey(dbm); + if (!key.dptr) return Qnil; + val = dbm_fetch(dbm, key); + keystr = rb_tainted_str_new(key.dptr, key.dsize); + valstr = rb_tainted_str_new(val.dptr, val.dsize); + dbm_delete(dbm, key); + + return rb_assoc_new(keystr, valstr); +} + +static VALUE +fdbm_delete_if(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE keystr, valstr; + VALUE ret, ary = rb_ary_new(); + int i, status = 0, n; + + fdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + n = dbmp->di_size; + dbmp->di_size = -1; + + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + keystr = rb_tainted_str_new(key.dptr, key.dsize); + valstr = rb_tainted_str_new(val.dptr, val.dsize); + ret = rb_protect(rb_yield, rb_assoc_new(rb_str_dup(keystr), valstr), &status); + if (status != 0) break; + if (RTEST(ret)) rb_ary_push(ary, keystr); + GetDBM2(obj, dbmp, dbm); + } + + for (i = 0; i < RARRAY_LEN(ary); i++) { + keystr = RARRAY_PTR(ary)[i]; + ExportStringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + if (dbm_delete(dbm, key)) { + rb_raise(rb_eDBMError, "dbm_delete failed"); + } + } + if (status) rb_jump_tag(status); + if (n > 0) dbmp->di_size = n - RARRAY_LEN(ary); + + return obj; +} + +static VALUE +fdbm_clear(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + + fdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + while (key = dbm_firstkey(dbm), key.dptr) { + if (dbm_delete(dbm, key)) { + rb_raise(rb_eDBMError, "dbm_delete failed"); + } + } + dbmp->di_size = 0; + + return obj; +} + +static VALUE +fdbm_invert(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE keystr, valstr; + VALUE hash = rb_hash_new(); + + GetDBM2(obj, dbmp, dbm); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + keystr = rb_tainted_str_new(key.dptr, key.dsize); + valstr = rb_tainted_str_new(val.dptr, val.dsize); + rb_hash_aset(hash, valstr, keystr); + } + return hash; +} + +static VALUE fdbm_store(VALUE,VALUE,VALUE); + +static VALUE +update_i(VALUE pair, VALUE dbm) +{ + Check_Type(pair, T_ARRAY); + if (RARRAY_LEN(pair) < 2) { + rb_raise(rb_eArgError, "pair must be [key, value]"); + } + fdbm_store(dbm, RARRAY_PTR(pair)[0], RARRAY_PTR(pair)[1]); + return Qnil; +} + +static VALUE +fdbm_update(VALUE obj, VALUE other) +{ + rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj); + return obj; +} + +static VALUE +fdbm_replace(VALUE obj, VALUE other) +{ + fdbm_clear(obj); + rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj); + return obj; +} + +static VALUE +fdbm_store(VALUE obj, VALUE keystr, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + fdbm_modify(obj); + keystr = rb_obj_as_string(keystr); + valstr = rb_obj_as_string(valstr); + + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + if (dbm_store(dbm, key, val, DBM_REPLACE)) { +#ifdef HAVE_DBM_CLEARERR + dbm_clearerr(dbm); +#endif + if (errno == EPERM) rb_sys_fail(0); + rb_raise(rb_eDBMError, "dbm_store failed"); + } + + return valstr; +} + +static VALUE +fdbm_length(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + int i = 0; + + GetDBM2(obj, dbmp, dbm); + if (dbmp->di_size > 0) return INT2FIX(dbmp->di_size); + + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + i++; + } + dbmp->di_size = i; + + return INT2FIX(i); +} + +static VALUE +fdbm_empty_p(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + int i = 0; + + GetDBM2(obj, dbmp, dbm); + if (dbmp->di_size < 0) { + dbm = dbmp->di_dbm; + + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + i++; + } + } + else { + i = dbmp->di_size; + } + if (i == 0) return Qtrue; + return Qfalse; +} + +static VALUE +fdbm_each_value(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + rb_yield(rb_tainted_str_new(val.dptr, val.dsize)); + GetDBM2(obj, dbmp, dbm); + } + return obj; +} + +static VALUE +fdbm_each_key(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + rb_yield(rb_tainted_str_new(key.dptr, key.dsize)); + GetDBM2(obj, dbmp, dbm); + } + return obj; +} + +static VALUE +fdbm_each_pair(VALUE obj) +{ + datum key, val; + DBM *dbm; + struct dbmdata *dbmp; + VALUE keystr, valstr; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + keystr = rb_tainted_str_new(key.dptr, key.dsize); + valstr = rb_tainted_str_new(val.dptr, val.dsize); + rb_yield(rb_assoc_new(keystr, valstr)); + GetDBM2(obj, dbmp, dbm); + } + + return obj; +} + +static VALUE +fdbm_keys(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + VALUE ary; + + GetDBM2(obj, dbmp, dbm); + + ary = rb_ary_new(); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + rb_ary_push(ary, rb_tainted_str_new(key.dptr, key.dsize)); + } + + return ary; +} + +static VALUE +fdbm_values(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + rb_ary_push(ary, rb_tainted_str_new(val.dptr, val.dsize)); + } + + return ary; +} + +static VALUE +fdbm_has_key(VALUE obj, VALUE keystr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + GetDBM2(obj, dbmp, dbm); + val = dbm_fetch(dbm, key); + if (val.dptr) return Qtrue; + return Qfalse; +} + +static VALUE +fdbm_has_value(VALUE obj, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(valstr); + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + if (val.dsize == RSTRING_LEN(valstr) && + memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0) + return Qtrue; + } + return Qfalse; +} + +static VALUE +fdbm_to_a(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + rb_ary_push(ary, rb_assoc_new(rb_tainted_str_new(key.dptr, key.dsize), + rb_tainted_str_new(val.dptr, val.dsize))); + } + + return ary; +} + +static VALUE +fdbm_to_hash(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE hash; + + GetDBM2(obj, dbmp, dbm); + hash = rb_hash_new(); + for (key = dbm_firstkey(dbm); key.dptr; key = dbm_nextkey(dbm)) { + val = dbm_fetch(dbm, key); + rb_hash_aset(hash, rb_tainted_str_new(key.dptr, key.dsize), + rb_tainted_str_new(val.dptr, val.dsize)); + } + + return hash; +} + +static VALUE +fdbm_reject(VALUE obj) +{ + return rb_hash_delete_if(fdbm_to_hash(obj)); +} + +void +Init_dbm(void) +{ + rb_cDBM = rb_define_class("DBM", rb_cObject); + rb_eDBMError = rb_define_class("DBMError", rb_eStandardError); + rb_include_module(rb_cDBM, rb_mEnumerable); + + rb_define_alloc_func(rb_cDBM, fdbm_alloc); + rb_define_singleton_method(rb_cDBM, "open", fdbm_s_open, -1); + + rb_define_method(rb_cDBM, "initialize", fdbm_initialize, -1); + rb_define_method(rb_cDBM, "close", fdbm_close, 0); + rb_define_method(rb_cDBM, "closed?", fdbm_closed, 0); + rb_define_method(rb_cDBM, "[]", fdbm_aref, 1); + rb_define_method(rb_cDBM, "fetch", fdbm_fetch_m, -1); + rb_define_method(rb_cDBM, "[]=", fdbm_store, 2); + rb_define_method(rb_cDBM, "store", fdbm_store, 2); + rb_define_method(rb_cDBM, "index", fdbm_index, 1); + rb_define_method(rb_cDBM, "key", fdbm_key, 1); + rb_define_method(rb_cDBM, "select", fdbm_select, 0); + rb_define_method(rb_cDBM, "values_at", fdbm_values_at, -1); + rb_define_method(rb_cDBM, "length", fdbm_length, 0); + rb_define_method(rb_cDBM, "size", fdbm_length, 0); + rb_define_method(rb_cDBM, "empty?", fdbm_empty_p, 0); + rb_define_method(rb_cDBM, "each", fdbm_each_pair, 0); + rb_define_method(rb_cDBM, "each_value", fdbm_each_value, 0); + rb_define_method(rb_cDBM, "each_key", fdbm_each_key, 0); + rb_define_method(rb_cDBM, "each_pair", fdbm_each_pair, 0); + rb_define_method(rb_cDBM, "keys", fdbm_keys, 0); + rb_define_method(rb_cDBM, "values", fdbm_values, 0); + rb_define_method(rb_cDBM, "shift", fdbm_shift, 0); + rb_define_method(rb_cDBM, "delete", fdbm_delete, 1); + rb_define_method(rb_cDBM, "delete_if", fdbm_delete_if, 0); + rb_define_method(rb_cDBM, "reject!", fdbm_delete_if, 0); + rb_define_method(rb_cDBM, "reject", fdbm_reject, 0); + rb_define_method(rb_cDBM, "clear", fdbm_clear, 0); + rb_define_method(rb_cDBM,"invert", fdbm_invert, 0); + rb_define_method(rb_cDBM,"update", fdbm_update, 1); + rb_define_method(rb_cDBM,"replace", fdbm_replace, 1); + + rb_define_method(rb_cDBM, "include?", fdbm_has_key, 1); + rb_define_method(rb_cDBM, "has_key?", fdbm_has_key, 1); + rb_define_method(rb_cDBM, "member?", fdbm_has_key, 1); + rb_define_method(rb_cDBM, "has_value?", fdbm_has_value, 1); + rb_define_method(rb_cDBM, "key?", fdbm_has_key, 1); + rb_define_method(rb_cDBM, "value?", fdbm_has_value, 1); + + rb_define_method(rb_cDBM, "to_a", fdbm_to_a, 0); + rb_define_method(rb_cDBM, "to_hash", fdbm_to_hash, 0); + + /* flags for dbm_open() */ + rb_define_const(rb_cDBM, "READER", INT2FIX(O_RDONLY|RUBY_DBM_RW_BIT)); + rb_define_const(rb_cDBM, "WRITER", INT2FIX(O_RDWR|RUBY_DBM_RW_BIT)); + rb_define_const(rb_cDBM, "WRCREAT", INT2FIX(O_RDWR|O_CREAT|RUBY_DBM_RW_BIT)); + rb_define_const(rb_cDBM, "NEWDB", INT2FIX(O_RDWR|O_CREAT|O_TRUNC|RUBY_DBM_RW_BIT)); + +#ifdef DB_VERSION_STRING + rb_define_const(rb_cDBM, "VERSION", rb_str_new2(DB_VERSION_STRING)); +#else + rb_define_const(rb_cDBM, "VERSION", rb_str_new2("unknown")); +#endif +} diff --git a/ext/dbm/depend b/ext/dbm/depend new file mode 100644 index 0000000..5fae80b --- /dev/null +++ b/ext/dbm/depend @@ -0,0 +1 @@ +dbm.o: dbm.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/dbm/extconf.rb b/ext/dbm/extconf.rb new file mode 100644 index 0000000..4d12264 --- /dev/null +++ b/ext/dbm/extconf.rb @@ -0,0 +1,53 @@ +require 'mkmf' + +dir_config("dbm") + +if dblib = with_config("dbm-type", nil) + dblib = dblib.split(/[ ,]+/) +else + dblib = %w(db db2 db1 dbm gdbm gdbm_compat qdbm) +end + +headers = { + "db" => ["db.h"], + "db1" => ["db1/ndbm.h", "db1.h", "ndbm.h"], + "db2" => ["db2/db.h", "db2.h", "db.h"], + "dbm" => ["ndbm.h"], + "gdbm" => ["gdbm-ndbm.h", "ndbm.h"], + "gdbm_compat" => ["gdbm-ndbm.h", "ndbm.h"], + "qdbm" => ["relic.h"], +} + +def headers.db_check(db) + db_prefix = nil + have_gdbm = false + hsearch = nil + + case db + when /^db2?$/ + db_prefix = "__db_n" + hsearch = "-DDB_DBM_HSEARCH " + when "gdbm" + have_gdbm = true + when "gdbm_compat" + have_gdbm = true + have_library("gdbm") or return false + end + db_prefix ||= "" + + if (have_library(db, db_prefix+"dbm_open") || have_func(db_prefix+"dbm_open")) and + hdr = self.fetch(db, ["ndbm.h"]).find {|h| have_type("DBM", h, hsearch)} + have_func(db_prefix+"dbm_clearerr") unless have_gdbm + $defs << hsearch if hsearch + $defs << '-DDBM_HDR="<'+hdr+'>"' + true + else + false + end +end + +if dblib.any? {|db| headers.db_check(db)} + have_header("cdefs.h") + have_header("sys/cdefs.h") + create_makefile("dbm") +end diff --git a/ext/etc/depend b/ext/etc/depend new file mode 100644 index 0000000..ac70647 --- /dev/null +++ b/ext/etc/depend @@ -0,0 +1 @@ +etc.o : etc.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/etc/etc.c b/ext/etc/etc.c new file mode 100644 index 0000000..6390d6d --- /dev/null +++ b/ext/etc/etc.c @@ -0,0 +1,551 @@ +/************************************************ + + etc.c - + + $Author: nobu $ + created at: Tue Mar 22 18:39:19 JST 1994 + +************************************************/ + +#include "ruby.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_GETPWENT +#include +#endif + +#ifdef HAVE_GETGRENT +#include +#endif + +#ifndef HAVE_TYPE_UID_T +#define uid_t int +#endif + +static VALUE sPasswd; +#ifdef HAVE_GETGRENT +static VALUE sGroup; +#endif + +#ifndef _WIN32 +char *getenv(); +#endif +char *getlogin(); + +/* Returns the short user name of the currently logged in user. + * Unfortunately, it is often rather easy to fool getlogin(). + * Avoid getlogin() for security-related purposes. + * + * e.g. + * Etc.getlogin -> 'guest' + */ +static VALUE +etc_getlogin(VALUE obj) +{ + char *login; + + rb_secure(4); +#ifdef HAVE_GETLOGIN + login = getlogin(); + if (!login) login = getenv("USER"); +#else + login = getenv("USER"); +#endif + + if (login) + return rb_tainted_str_new2(login); + return Qnil; +} + +#if defined(HAVE_GETPWENT) || defined(HAVE_GETGRENT) +static VALUE +safe_setup_str(const char *str) +{ + if (str == 0) str = ""; + return rb_tainted_str_new2(str); +} +#endif + +#ifdef HAVE_GETPWENT +static VALUE +setup_passwd(struct passwd *pwd) +{ + if (pwd == 0) rb_sys_fail("/etc/passwd"); + return rb_struct_new(sPasswd, + safe_setup_str(pwd->pw_name), +#ifdef HAVE_ST_PW_PASSWD + safe_setup_str(pwd->pw_passwd), +#endif + UIDT2NUM(pwd->pw_uid), + GIDT2NUM(pwd->pw_gid), +#ifdef HAVE_ST_PW_GECOS + safe_setup_str(pwd->pw_gecos), +#endif + safe_setup_str(pwd->pw_dir), + safe_setup_str(pwd->pw_shell), +#ifdef HAVE_ST_PW_CHANGE + INT2NUM(pwd->pw_change), +#endif +#ifdef HAVE_ST_PW_QUOTA + INT2NUM(pwd->pw_quota), +#endif +#ifdef HAVE_ST_PW_AGE + PW_AGE2VAL(pwd->pw_age), +#endif +#ifdef HAVE_ST_PW_CLASS + safe_setup_str(pwd->pw_class), +#endif +#ifdef HAVE_ST_PW_COMMENT + safe_setup_str(pwd->pw_comment), +#endif +#ifdef HAVE_ST_PW_EXPIRE + INT2NUM(pwd->pw_expire), +#endif + 0 /*dummy*/ + ); +} +#endif + +/* Returns the /etc/passwd information for the user with specified integer + * user id (uid). + * + * The information is returned as a Struct::Passwd; see getpwent above for + * details. + * + * e.g. * Etc.getpwuid(0) -> # + */ +static VALUE +etc_getpwuid(int argc, VALUE *argv, VALUE obj) +{ +#if defined(HAVE_GETPWENT) + VALUE id; + uid_t uid; + struct passwd *pwd; + + rb_secure(4); + if (rb_scan_args(argc, argv, "01", &id) == 1) { + uid = NUM2UIDT(id); + } + else { + uid = getuid(); + } + pwd = getpwuid(uid); + if (pwd == 0) rb_raise(rb_eArgError, "can't find user for %d", (int)uid); + return setup_passwd(pwd); +#else + return Qnil; +#endif +} + +/* Returns the /etc/passwd information for the user with specified login name. + * + * The information is returned as a Struct::Passwd; see getpwent above for + * details. + * + * e.g. * Etc.getpwnam('root') -> # + */ +static VALUE +etc_getpwnam(VALUE obj, VALUE nam) +{ +#ifdef HAVE_GETPWENT + struct passwd *pwd; + + SafeStringValue(nam); + pwd = getpwnam(RSTRING_PTR(nam)); + if (pwd == 0) rb_raise(rb_eArgError, "can't find user for %s", RSTRING_PTR(nam)); + return setup_passwd(pwd); +#else + return Qnil; +#endif +} + +#ifdef HAVE_GETPWENT +static int passwd_blocking = 0; +static VALUE +passwd_ensure(void) +{ + passwd_blocking = Qfalse; + return Qnil; +} + +static VALUE +passwd_iterate(void) +{ + struct passwd *pw; + + setpwent(); + while (pw = getpwent()) { + rb_yield(setup_passwd(pw)); + } + endpwent(); + return Qnil; +} +#endif + +/* Provides a convenient Ruby iterator which executes a block for each entry + * in the /etc/passwd file. + * + * The code block is passed an Struct::Passwd struct; see getpwent above for + * details. + * + * Example: + * + * require 'etc' + * + * Etc.passwd {|u| + * puts u.name + " = " + u.gecos + * } + * + */ +static VALUE +etc_passwd(VALUE obj) +{ +#ifdef HAVE_GETPWENT + struct passwd *pw; + + rb_secure(4); + if (rb_block_given_p()) { + if (passwd_blocking) { + rb_raise(rb_eRuntimeError, "parallel passwd iteration"); + } + passwd_blocking = Qtrue; + rb_ensure(passwd_iterate, 0, passwd_ensure, 0); + } + if (pw = getpwent()) { + return setup_passwd(pw); + } +#endif + return Qnil; +} + +/* Resets the process of reading the /etc/passwd file, so that the next call + * to getpwent will return the first entry again. + */ +static VALUE +etc_setpwent(VALUE obj) +{ +#ifdef HAVE_GETPWENT + setpwent(); +#endif + return Qnil; +} + +/* Ends the process of scanning through the /etc/passwd file begun with + * getpwent, and closes the file. + */ +static VALUE +etc_endpwent(VALUE obj) +{ +#ifdef HAVE_GETPWENT + endpwent(); +#endif + return Qnil; +} + +/* Returns an entry from the /etc/passwd file. The first time it is called it + * opens the file and returns the first entry; each successive call returns + * the next entry, or nil if the end of the file has been reached. + * + * To close the file when processing is complete, call endpwent. + * + * Each entry is returned as a Struct::Passwd: + * + * - Passwd#name contains the short login name of the user as a String. + * + * - Passwd#passwd contains the encrypted password of the user as a String. + * an 'x' is returned if shadow passwords are in use. An '*' is returned + * if the user cannot log in using a password. + * + * - Passwd#uid contains the integer user ID (uid) of the user. + * + * - Passwd#gid contains the integer group ID (gid) of the user's primary group. + * + * - Passwd#gecos contains a longer String description of the user, such as + * a full name. Some Unix systems provide structured information in the + * gecos field, but this is system-dependent. + * + * - Passwd#dir contains the path to the home directory of the user as a String. + * + * - Passwd#shell contains the path to the login shell of the user as a String. + */ +static VALUE +etc_getpwent(VALUE obj) +{ +#ifdef HAVE_GETPWENT + struct passwd *pw; + + if (pw = getpwent()) { + return setup_passwd(pw); + } +#endif + return Qnil; +} + +#ifdef HAVE_GETGRENT +static VALUE +setup_group(struct group *grp) +{ + VALUE mem; + char **tbl; + + mem = rb_ary_new(); + tbl = grp->gr_mem; + while (*tbl) { + rb_ary_push(mem, safe_setup_str(*tbl)); + tbl++; + } + return rb_struct_new(sGroup, + safe_setup_str(grp->gr_name), +#ifdef HAVE_ST_GR_PASSWD + safe_setup_str(grp->gr_passwd), +#endif + GIDT2NUM(grp->gr_gid), + mem); +} +#endif + +/* Returns information about the group with specified integer group id (gid), + * as found in /etc/group. + * + * The information is returned as a Struct::Group; see getgrent above for + * details. + * + * e.g. Etc.getgrgid(100) -> # + * + */ +static VALUE +etc_getgrgid(int argc, VALUE *argv, VALUE obj) +{ +#ifdef HAVE_GETGRENT + VALUE id; + gid_t gid; + struct group *grp; + + rb_secure(4); + if (rb_scan_args(argc, argv, "01", &id) == 1) { + gid = NUM2GIDT(id); + } + else { + gid = getgid(); + } + grp = getgrgid(gid); + if (grp == 0) rb_raise(rb_eArgError, "can't find group for %d", (int)gid); + return setup_group(grp); +#else + return Qnil; +#endif +} + +/* Returns information about the group with specified String name, as found + * in /etc/group. + * + * The information is returned as a Struct::Group; see getgrent above for + * details. + * + * e.g. Etc.getgrnam('users') -> # + * + */ +static VALUE +etc_getgrnam(VALUE obj, VALUE nam) +{ +#ifdef HAVE_GETGRENT + struct group *grp; + + rb_secure(4); + SafeStringValue(nam); + grp = getgrnam(RSTRING_PTR(nam)); + if (grp == 0) rb_raise(rb_eArgError, "can't find group for %s", RSTRING_PTR(nam)); + return setup_group(grp); +#else + return Qnil; +#endif +} + +#ifdef HAVE_GETGRENT +static int group_blocking = 0; +static VALUE +group_ensure(void) +{ + group_blocking = Qfalse; + return Qnil; +} + +static VALUE +group_iterate(void) +{ + struct group *pw; + + setgrent(); + while (pw = getgrent()) { + rb_yield(setup_group(pw)); + } + endgrent(); + return Qnil; +} +#endif + +/* Provides a convenient Ruby iterator which executes a block for each entry + * in the /etc/group file. + * + * The code block is passed an Struct::Group struct; see getgrent above for + * details. + * + * Example: + * + * require 'etc' + * + * Etc.group {|g| + * puts g.name + ": " + g.mem.join(', ') + * } + * + */ +static VALUE +etc_group(VALUE obj) +{ +#ifdef HAVE_GETGRENT + struct group *grp; + + rb_secure(4); + if (rb_block_given_p()) { + if (group_blocking) { + rb_raise(rb_eRuntimeError, "parallel group iteration"); + } + group_blocking = Qtrue; + rb_ensure(group_iterate, 0, group_ensure, 0); + } + if (grp = getgrent()) { + return setup_group(grp); + } +#endif + return Qnil; +} + +/* Resets the process of reading the /etc/group file, so that the next call + * to getgrent will return the first entry again. + */ +static VALUE +etc_setgrent(VALUE obj) +{ +#ifdef HAVE_GETGRENT + setgrent(); +#endif + return Qnil; +} + +/* Ends the process of scanning through the /etc/group file begun by + * getgrent, and closes the file. + */ +static VALUE +etc_endgrent(VALUE obj) +{ +#ifdef HAVE_GETGRENT + endgrent(); +#endif + return Qnil; +} + +/* Returns an entry from the /etc/group file. The first time it is called it + * opens the file and returns the first entry; each successive call returns + * the next entry, or nil if the end of the file has been reached. + * + * To close the file when processing is complete, call endgrent. + * + * Each entry is returned as a Struct::Group: + * + * - Group#name contains the name of the group as a String. + * + * - Group#passwd contains the encrypted password as a String. An 'x' is + * returned if password access to the group is not available; an empty + * string is returned if no password is needed to obtain membership of + * the group. + * + * - Group#gid contains the group's numeric ID as an integer. + * + * - Group#mem is an Array of Strings containing the short login names of the + * members of the group. + */ +static VALUE +etc_getgrent(VALUE obj) +{ +#ifdef HAVE_GETGRENT + struct group *gr; + + if (gr = getgrent()) { + return setup_group(gr); + } +#endif + return Qnil; +} + +/* The etc module provides access to information from the /etc/passwd and + * /etc/group files on Linux and Unix systems. + * + * Documented by mathew . + */ +void +Init_etc(void) +{ + VALUE mEtc; + + mEtc = rb_define_module("Etc"); + rb_define_module_function(mEtc, "getlogin", etc_getlogin, 0); + + rb_define_module_function(mEtc, "getpwuid", etc_getpwuid, -1); + rb_define_module_function(mEtc, "getpwnam", etc_getpwnam, 1); + rb_define_module_function(mEtc, "setpwent", etc_setpwent, 0); + rb_define_module_function(mEtc, "endpwent", etc_endpwent, 0); + rb_define_module_function(mEtc, "getpwent", etc_getpwent, 0); + rb_define_module_function(mEtc, "passwd", etc_passwd, 0); + + rb_define_module_function(mEtc, "getgrgid", etc_getgrgid, -1); + rb_define_module_function(mEtc, "getgrnam", etc_getgrnam, 1); + rb_define_module_function(mEtc, "group", etc_group, 0); + rb_define_module_function(mEtc, "setgrent", etc_setgrent, 0); + rb_define_module_function(mEtc, "endgrent", etc_endgrent, 0); + rb_define_module_function(mEtc, "getgrent", etc_getgrent, 0); + + sPasswd = rb_struct_define("Passwd", + "name", "passwd", "uid", "gid", +#ifdef HAVE_ST_PW_GECOS + "gecos", +#endif + "dir", "shell", +#ifdef HAVE_ST_PW_CHANGE + "change", +#endif +#ifdef HAVE_ST_PW_QUOTA + "quota", +#endif +#ifdef HAVE_ST_PW_AGE + "age", +#endif +#ifdef HAVE_ST_PW_CLASS + "uclass", +#endif +#ifdef HAVE_ST_PW_COMMENT + "comment", +#endif +#ifdef HAVE_ST_PW_EXPIRE + "expire", +#endif + NULL); + rb_define_const(mEtc, "Passwd", sPasswd); + +#ifdef HAVE_GETGRENT + sGroup = rb_struct_define("Group", "name", +#ifdef HAVE_ST_GR_PASSWD + "passwd", +#endif + "gid", "mem", NULL); + + rb_define_const(mEtc, "Group", sGroup); +#endif +} diff --git a/ext/etc/etc.txt b/ext/etc/etc.txt new file mode 100644 index 0000000..5347901 --- /dev/null +++ b/ext/etc/etc.txt @@ -0,0 +1,72 @@ +.\" etc.txt - -*- Indented-Text -*- created at: Fri Jul 14 00:47:15 JST 1995 + +** Etc(Module) + +The module to retrieve information under /etc directory. Available +only on UNIX platforms. All operations defined in this module are +module functions, so that you can include Etc module into your class. + +Module Function: + + getlogin + + returns login name of the user. It this fails, try getpwuid(). + + getpwnam(name) + + searches in /etc/passwd file (or equivalent database), and + returns password entry for the user. The return value is an + passwd structure, which has members described below. + + struct passwd + name # user name(string) + passwd # encrypted password(string) + uid # user ID(integer) + gid # group ID(integer) + gecos # gecos field(string) + dir # home directory(string) + shell # login shell(string) + # members below are optional + change # password change time(integer) + quota # quota value(integer) + age # password age(integer) + class # user access class(string) + comment # comment(string) + expire # account expiration time(integer) + end + + See getpwnam(3) for detail. + + getpwuid([uid]) + + returns passwd entry for the specified user id. If uid is + ommitted, use the value from getuid(). See getpwuid(3) for + detail. + + getgrgid(gid) + + searches in /etc/group file (or equivalent database), and + returns group entry for the group id. The return value is an + group structure, which has members described below. + + struct group + name # group name(string) + passwd # group password(string) + gid # group ID(integer) + mem # array of the group member names + end + + See getgrgid(3) for detail. + + getgrnam(name) + + returns the group entry for the specified name. The return + value is the group structure. See getgrnam(3) for detail. + + group + + iterates over all group entries. + + passwd + + iterates over all passwd entries. diff --git a/ext/etc/etc.txt.ja b/ext/etc/etc.txt.ja new file mode 100644 index 0000000..2dddcfb --- /dev/null +++ b/ext/etc/etc.txt.ja @@ -0,0 +1,72 @@ +.\" etc.txt.ja - -*- Indented-Text -*- created at: Fri Jul 14 00:47:15 JST 1995 + +** Etc() + +/etc + + +Module Function: + + getlogin + + logingetpwuid() + + + getpwnam(name) + + /etc/passwd(DBMNIS) + namepasswdpasswd + + + struct passwd + name # () + passwd # () + uid # ID() + gid # ID() + gecos # gecos() + dir # () + shell # () + # + change # () + quota # () + age # () + class # () + comment # () + expire # () + end + + getpwnam(3) + + getpwuid([uid]) + + uidIDpasswdgetpwnam() + getuid() + getpwuid(3) + + getgrgid(gid) + + /etc/group(getpwnam)gid + IDgroup + + + struct group + name # () + passwd # () + gid # ID() + mem # + end + + getgrgid(3) + + getgrnam(name) + + namegetgrgid() + getgrnam(3) + + group + + + + passwd + + passwd diff --git a/ext/etc/extconf.rb b/ext/etc/extconf.rb new file mode 100644 index 0000000..2914bfb --- /dev/null +++ b/ext/etc/extconf.rb @@ -0,0 +1,28 @@ +require 'mkmf' + +have_library("sun", "getpwnam") # NIS (== YP) interface for IRIX 4 +a = have_func("getlogin") +b = have_func("getpwent") +c = have_func("getgrent") +if a or b or c + have_struct_member('struct passwd', 'pw_gecos', 'pwd.h') + have_struct_member('struct passwd', 'pw_change', 'pwd.h') + have_struct_member('struct passwd', 'pw_quota', 'pwd.h') + if have_struct_member('struct passwd', 'pw_age', 'pwd.h') + case what_type?('struct passwd', 'pw_age', 'pwd.h') + when "string" + f = "safe_setup_str" + when "long long" + f = "LL2NUM" + else + f = "INT2NUM" + end + $defs.push("-DPW_AGE2VAL="+f) + end + have_struct_member('struct passwd', 'pw_class', 'pwd.h') + have_struct_member('struct passwd', 'pw_comment', 'pwd.h') unless /cygwin/ === RUBY_PLATFORM + have_struct_member('struct passwd', 'pw_expire', 'pwd.h') + have_struct_member('struct passwd', 'pw_passwd', 'pwd.h') + have_struct_member('struct group', 'gr_passwd', 'grp.h') + create_makefile("etc") +end diff --git a/ext/extmk.rb b/ext/extmk.rb new file mode 100644 index 0000000..adca9b4 --- /dev/null +++ b/ext/extmk.rb @@ -0,0 +1,590 @@ +#! /usr/local/bin/ruby +# -*- ruby -*- + +$extension = nil +$extstatic = nil +$force_static = nil +$install = nil +$destdir = nil +$dryrun = false +$clean = nil +$nodynamic = nil +$extinit = nil +$extobjs = nil +$extflags = "" +$extlibs = nil +$extpath = nil +$ignore = nil +$message = nil +$command_output = nil + +$progname = $0 +alias $PROGRAM_NAME $0 +alias $0 $progname + +$extlist = [] +$compiled = {} + +srcdir = File.dirname(File.dirname(__FILE__)) +unless defined?(CROSS_COMPILING) and CROSS_COMPILING + $:.replace([File.expand_path("lib", srcdir), Dir.pwd]) +end +$:.unshift(srcdir) +require 'rbconfig' + +$topdir = "." +$top_srcdir = srcdir + +$" << "mkmf.rb" +load File.expand_path("lib/mkmf.rb", srcdir) +require 'optparse/shellwords' + +def sysquote(x) + @quote ||= /os2/ =~ (CROSS_COMPILING || RUBY_PLATFORM) + @quote ? x.quote : x +end + +def extract_makefile(makefile, keep = true) + m = File.read(makefile) + if !(target = m[/^TARGET[ \t]*=[ \t]*(\S*)/, 1]) + return keep + end + installrb = {} + m.scan(/^install-rb-default:[ \t]*(\S+)\n\1:[ \t]*(\S+)/) {installrb[$2] = $1} + oldrb = installrb.keys.sort + newrb = install_rb(nil, "").collect {|d, *f| f}.flatten.sort + if target_prefix = m[/^target_prefix[ \t]*=[ \t]*\/(.*)/, 1] + target = "#{target_prefix}/#{target}" + end + unless oldrb == newrb + if $extout + newrb.each {|f| installrb.delete(f)} + unless installrb.empty? + config = CONFIG.dup + install_dirs(target_prefix).each {|var, val| config[var] = val} + FileUtils.rm_f(installrb.values.collect {|f| RbConfig.expand(f, config)}, :verbose => true) + end + end + return false + end + $target = target + $extconf_h = m[/^RUBY_EXTCONF_H[ \t]*=[ \t]*(\S+)/, 1] + $static ||= m[/^EXTSTATIC[ \t]*=[ \t]*(\S+)/, 1] || false + /^STATIC_LIB[ \t]*=[ \t]*\S+/ =~ m or $static = nil + $preload = Shellwords.shellwords(m[/^preload[ \t]*=[ \t]*(.*)/, 1] || "") + $DLDFLAGS += " " + (m[/^dldflags[ \t]*=[ \t]*(.*)/, 1] || "") + if s = m[/^LIBS[ \t]*=[ \t]*(.*)/, 1] + s.sub!(/^#{Regexp.quote($LIBRUBYARG)} */, "") + s.sub!(/ *#{Regexp.quote($LIBS)}$/, "") + $libs = s + end + $objs = (m[/^OBJS[ \t]*=[ \t](.*)/, 1] || "").split + $srcs = (m[/^SRCS[ \t]*=[ \t](.*)/, 1] || "").split + $LOCAL_LIBS = m[/^LOCAL_LIBS[ \t]*=[ \t]*(.*)/, 1] || "" + $LIBPATH = Shellwords.shellwords(m[/^libpath[ \t]*=[ \t]*(.*)/, 1] || "") - %w[$(libdir) $(topdir)] + true +end + +def extmake(target) + print "#{$message} #{target}\n" + $stdout.flush + + FileUtils.mkpath target unless File.directory?(target) + begin + dir = Dir.pwd + FileUtils.mkpath target unless File.directory?(target) + Dir.chdir target + top_srcdir = $top_srcdir + topdir = $topdir + hdrdir = $hdrdir + prefix = "../" * (target.count("/")+1) + $top_srcdir = relative_from(top_srcdir, prefix) + $hdrdir = relative_from(hdrdir, prefix) + $topdir = prefix + $topdir + $target = target + $mdir = target + $srcdir = File.join($top_srcdir, "ext", $mdir) + $preload = nil + $objs = "" + $srcs = "" + $compiled[target] = false + makefile = "./Makefile" + ok = File.exist?(makefile) + unless $ignore + rbconfig0 = RbConfig::CONFIG + mkconfig0 = CONFIG + rbconfig = { + "hdrdir" => $hdrdir, + "srcdir" => $srcdir, + "topdir" => $topdir, + } + mkconfig = { + "hdrdir" => ($hdrdir == top_srcdir) ? top_srcdir : "$(top_srcdir)/include", + "srcdir" => "$(top_srcdir)/ext/#{$mdir}", + "topdir" => $topdir, + } + rbconfig0.each_pair {|key, val| rbconfig[key] ||= val.dup} + mkconfig0.each_pair {|key, val| mkconfig[key] ||= val.dup} + RbConfig.module_eval { + remove_const(:CONFIG) + const_set(:CONFIG, rbconfig) + remove_const(:MAKEFILE_CONFIG) + const_set(:MAKEFILE_CONFIG, mkconfig) + } + Object.class_eval { + remove_const(:CONFIG) + const_set(:CONFIG, mkconfig) + } + begin + $extconf_h = nil + ok &&= extract_makefile(makefile) + conf = ["#{$srcdir}/makefile.rb", "#{$srcdir}/extconf.rb"].find {|f| File.exist?(f)} + if (($extconf_h && !File.exist?($extconf_h)) || + !(t = modified?(makefile, MTIMES)) || + [conf, "#{$srcdir}/depend"].any? {|f| modified?(f, [t])}) + then + ok = false + init_mkmf + Logging::logfile 'mkmf.log' + rm_f makefile + if conf + load $0 = conf + else + create_makefile(target) + end + $defs << "-DRUBY_EXPORT" if $static + ok = File.exist?(makefile) + end + rescue SystemExit + # ignore + ensure + rm_f "conftest*" + config = $0 + $0 = $PROGRAM_NAME + end + end + ok = yield(ok) if block_given? + unless ok + open(makefile, "w") do |f| + f.print(*dummy_makefile(CONFIG["srcdir"])) + end + return true + end + args = sysquote($mflags) + unless $destdir.to_s.empty? or $mflags.include?("DESTDIR") + args += [sysquote("DESTDIR=" + relative_from($destdir, "../"+prefix))] + end + if $static + args += ["static"] unless $clean + $extlist.push [$static, $target, File.basename($target), $preload] + end + unless system($make, *args) + $ignore or $continue or return false + end + $compiled[target] = true + if $clean + FileUtils.rm_f("mkmf.log") + if $clean != true + FileUtils.rm_f([makefile, $extconf_h || "extconf.h"]) + end + end + if $static + $extflags ||= "" + $extlibs ||= [] + $extpath ||= [] + unless $mswin + $extflags = ($extflags.split | $DLDFLAGS.split | $LDFLAGS.split).join(" ") + end + $extlibs = merge_libs($extlibs, $libs.split, $LOCAL_LIBS.split) + $extpath |= $LIBPATH + end + ensure + unless $ignore + RbConfig.module_eval { + remove_const(:CONFIG) + const_set(:CONFIG, rbconfig0) + remove_const(:MAKEFILE_CONFIG) + const_set(:MAKEFILE_CONFIG, mkconfig0) + } + Object.class_eval { + remove_const(:CONFIG) + const_set(:CONFIG, mkconfig0) + } + end + $top_srcdir = top_srcdir + $topdir = topdir + $hdrdir = hdrdir + Dir.chdir dir + end + begin + Dir.rmdir target + target = File.dirname(target) + rescue SystemCallError + break + end while true + true +end + +def compiled?(target) + $compiled[target] +end + +def parse_args() + $mflags = [] + $makeflags = [] + + $optparser ||= OptionParser.new do |opts| + opts.on('-n') {$dryrun = true} + opts.on('--[no-]extension [EXTS]', Array) do |v| + $extension = (v == false ? [] : v) + end + opts.on('--[no-]extstatic [STATIC]', Array) do |v| + if ($extstatic = v) == false + $extstatic = [] + elsif v + $force_static = true if $extstatic.delete("static") + $extstatic = nil if $extstatic.empty? + end + end + opts.on('--dest-dir=DIR') do |v| + $destdir = v + end + opts.on('--extout=DIR') do |v| + $extout = (v unless v.empty?) + end + opts.on('--make=MAKE') do |v| + $make = v || 'make' + end + opts.on('--make-flags=FLAGS', '--mflags', Shellwords) do |v| + v.grep(/\A([-\w]+)=(.*)/) {$configure_args["--#{$1}"] = $2} + if arg = v.first + arg.insert(0, '-') if /\A[^-][^=]*\Z/ =~ arg + end + $makeflags.concat(v.reject {|arg| /\AMINIRUBY=/ =~ arg}.quote) + $mflags.concat(v) + end + opts.on('--message [MESSAGE]', String) do |v| + $message = v + end + opts.on('--command-output=FILE', String) do |v| + $command_output = v + end + end + begin + $optparser.parse!(ARGV) + rescue OptionParser::InvalidOption => e + retry if /^--/ =~ e.args[0] + $optparser.warn(e) + abort $optparser.to_s + end + + $destdir ||= '' + + $make, *rest = Shellwords.shellwords($make) + $mflags.unshift(*rest) unless rest.empty? + + def $mflags.set?(flag) + grep(/\A-(?!-).*#{flag.chr}/i) { return true } + false + end + def $mflags.defined?(var) + grep(/\A#{var}=(.*)/) {return $1} + false + end + + if $mflags.set?(?n) + $dryrun = true + else + $mflags.unshift '-n' if $dryrun + end + + $continue = $mflags.set?(?k) + if $extout + $extout = '$(topdir)/'+$extout + RbConfig::CONFIG["extout"] = CONFIG["extout"] = $extout + $extout_prefix = $extout ? "$(extout)$(target_prefix)/" : "" + $mflags << "extout=#$extout" << "extout_prefix=#$extout_prefix" + end +end + +parse_args() + +if target = ARGV.shift and /^[a-z-]+$/ =~ target + $mflags.push(target) + case target + when /^(dist|real)?(clean)$/ + target = $2 + $ignore ||= true + $clean = $1 ? $1[0] : true + when /^install\b/ + $install = true + $ignore ||= true + $mflags.unshift("INSTALL_PROG=install -c -p -m 0755", + "INSTALL_DATA=install -c -p -m 0644", + "MAKEDIRS=mkdir -p") if $dryrun + end +end +unless $message + if target + $message = target.sub(/^(\w+)e?\b/, '\1ing').tr('-', ' ') + else + $message = "compiling" + end +end + +EXEEXT = CONFIG['EXEEXT'] +if CROSS_COMPILING + $ruby = $mflags.defined?("MINIRUBY") || CONFIG['MINIRUBY'] +elsif sep = config_string('BUILD_FILE_SEPARATOR') + $ruby = "$(topdir:/=#{sep})#{sep}miniruby" + EXEEXT +else + $ruby = '$(topdir)/miniruby' + EXEEXT +end +$ruby << " -I'$(topdir)'" +unless CROSS_COMPILING + $ruby << " -I'$(top_srcdir)/lib'" + $ruby << " -I'$(extout)/$(arch)' -I'$(extout)/common'" if $extout + $ruby << " -I./- -I'$(top_srcdir)/ext' -rpurelib.rb" + ENV["RUBYLIB"] = "-" + ENV["RUBYOPT"] = "-r#{File.expand_path('ext/purelib.rb', $top_srcdir)}" +end +$mflags << "ruby=#$ruby" + +MTIMES = [__FILE__, 'rbconfig.rb', srcdir+'/lib/mkmf.rb'].collect {|f| File.mtime(f)} + +# get static-link modules +$static_ext = {} +if $extstatic + $extstatic.each do |t| + target = t + target = target.downcase if File::FNM_SYSCASE.nonzero? + $static_ext[target] = $static_ext.size + end +end +for dir in ["ext", File::join($top_srcdir, "ext")] + setup = File::join(dir, CONFIG['setup']) + if File.file? setup + f = open(setup) + while line = f.gets() + line.chomp! + line.sub!(/#.*$/, '') + next if /^\s*$/ =~ line + target, opt = line.split(nil, 3) + if target == 'option' + case opt + when 'nodynamic' + $nodynamic = true + end + next + end + target = target.downcase if File::FNM_SYSCASE.nonzero? + $static_ext[target] = $static_ext.size + end + MTIMES << f.mtime + $setup = setup + f.close + break + end +end unless $extstatic + +ext_prefix = "#{$top_srcdir}/ext" +exts = $static_ext.sort_by {|t, i| i}.collect {|t, i| t} +if $extension + exts |= $extension.select {|d| File.directory?("#{ext_prefix}/#{d}")} +else + withes, withouts = %w[--with --without].collect {|w| + if not (w = %w[-extensions -ext].collect {|o|arg_config(w+o)}).any? + nil + elsif (w = w.grep(String)).empty? + proc {true} + else + proc {|c1| w.collect {|o| o.split(/,/)}.flatten.any?(&c1)} + end + } + if withes + withouts ||= proc {true} + else + withes = proc {false} + withouts ||= withes + end + cond = proc {|ext, *| + cond1 = proc {|n| File.fnmatch(n, ext)} + withes.call(cond1) or !withouts.call(cond1) + } + exts |= Dir.glob("#{ext_prefix}/*/**/extconf.rb").collect {|d| + d = File.dirname(d) + d.slice!(0, ext_prefix.length + 1) + d + }.find_all {|ext| + with_config(ext, &cond) + }.sort +end + +if $extout + extout = RbConfig.expand("#{$extout}", RbConfig::CONFIG.merge("topdir"=>$topdir)) + unless $ignore + FileUtils.mkpath(extout) + end +end + +dir = Dir.pwd +FileUtils::makedirs('ext') +Dir::chdir('ext') + +hdrdir = $hdrdir +$hdrdir = ($top_srcdir = relative_from(srcdir, $topdir = "..")) + "/include" +exts.each do |d| + $static = $force_static ? $static_ext[target] : false + + if $ignore or !$nodynamic or $static + extmake(d) or abort + end +end +$top_srcdir = srcdir +$topdir = "." +$hdrdir = hdrdir + +extinit = Struct.new(:c, :o) { + def initialize(src) + super("#{src}.c", "#{src}.#{$OBJEXT}") + end +}.new("extinit") +if $ignore + FileUtils.rm_f(extinit.to_a) if $clean + Dir.chdir ".." + if $clean + Dir.rmdir('ext') rescue nil + if $extout + FileUtils.rm_rf([extout+"/common", extout+"/include/ruby", extout+"/rdoc"]) + FileUtils.rm_rf(extout+"/"+CONFIG["arch"]) + if $clean != true + FileUtils.rm_rf(extout+"/include/"+CONFIG["arch"]) + FileUtils.rm_f($mflags.defined?("INSTALLED_LIST")||ENV["INSTALLED_LIST"]||".installed.list") + Dir.rmdir(extout+"/include") rescue nil + Dir.rmdir(extout) rescue nil + end + end + end + exit +end + +$extinit ||= "" +$extobjs ||= "" +$extpath ||= [] +$extflags ||= "" +$extlibs ||= [] +unless $extlist.empty? + $extinit << "\n" unless $extinit.empty? + list = $extlist.dup + built = [] + while e = list.shift + s,t,i,r = e + if r and !(r -= built).empty? + l = list.size + if (while l > 0; break true if r.include?(list[l-=1][1]) end) + list.insert(l + 1, e) + end + next + end + f = format("%s/%s.%s", s, i, $LIBEXT) + if File.exist?(f) + $extinit << " init(Init_#{i}, \"#{t}.so\");\n" + $extobjs << "ext/#{f} " + built << t + end + end + + src = %{\ +#include "ruby.h" + +#define init(func, name) { \\ + extern void func _((void)); \\ + ruby_init_ext(name, func); \\ +} + +void ruby_init_ext _((const char *name, void (*init)(void))); + +void Init_ext _((void))\n{\n#$extinit} +} + if !modified?(extinit.c, MTIMES) || IO.read(extinit.c) != src + open(extinit.c, "w") {|fe| fe.print src} + end + + $extobjs = "ext/#{extinit.o} #{$extobjs}" + if RUBY_PLATFORM =~ /beos/ + $extflags.delete("-L/usr/local/lib") + end + $extpath.delete("$(topdir)") + $extflags = libpathflag($extpath) << " " << $extflags.strip + conf = [ + ['LIBRUBY_SO_UPDATE', '$(LIBRUBY_EXTS)'], + ['SETUP', $setup], + [enable_config("shared", $enable_shared) ? 'DLDOBJS' : 'EXTOBJS', $extobjs], + ['EXTLIBS', $extlibs.join(' ')], ['EXTLDFLAGS', $extflags] + ].map {|n, v| + "#{n}=#{v}" if v and !(v = v.strip).empty? + }.compact + puts(*conf) + $stdout.flush + $mflags.concat(conf) + $makeflags.concat(conf) +else + FileUtils.rm_f(extinit.to_a) +end +rubies = [] +%w[RUBY RUBYW STATIC_RUBY].each {|n| + r = n + if r = arg_config("--"+r.downcase) || config_string(r+"_INSTALL_NAME") + rubies << Config.expand(r+=EXEEXT) + $mflags << "#{n}=#{r}" + end +} + +Dir.chdir ".." +unless $destdir.to_s.empty? + $mflags.defined?("DESTDIR") or $mflags << "DESTDIR=#{$destdir}" +end +message = "making #{rubies.join(', ')}" +$mflags.concat(rubies) +$makeflags.uniq! +$makeflags.concat(rubies) + +if $nmake == ?b + unless (vars = $mflags.grep(/\A\w+=/n)).empty? + open(mkf = "libruby.mk", "wb") do |tmf| + tmf.puts("!include Makefile") + tmf.puts + tmf.puts(*vars.map {|v| v.sub(/=/, " = ")}) + tmf.puts("PRE_LIBRUBY_UPDATE = del #{mkf}") + end + $mflags.unshift("-f#{mkf}") + vars.each {|flag| flag.sub!(/\A/, "-D")} + end +end +$mflags.unshift("topdir=#$topdir") +ENV.delete("RUBYOPT") +if $command_output + message = "echo #{message}" + cmd = [$make, *sysquote($makeflags)].join(' ') + open($command_output, 'wb') do |f| + case $command_output + when /\.sh\z/ + f.puts message, "rm -f $0; exec #{cmd}" + when /\.bat\z/ + ["@echo off", message, cmd, "del %0 & exit %ERRORLEVEL%"].each do |s| + f.print s, "\r\n" + end + else + f.puts cmd + end + f.chmod(0755) + end +else + puts message + $stdout.flush + system($make, *sysquote($mflags)) or exit($?.exitstatus) +end + +#Local variables: +# mode: ruby +#end: diff --git a/ext/fcntl/depend b/ext/fcntl/depend new file mode 100644 index 0000000..10eab64 --- /dev/null +++ b/ext/fcntl/depend @@ -0,0 +1 @@ +fcntl.o: fcntl.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/fcntl/extconf.rb b/ext/fcntl/extconf.rb new file mode 100644 index 0000000..8b717d4 --- /dev/null +++ b/ext/fcntl/extconf.rb @@ -0,0 +1,2 @@ +require 'mkmf' +create_makefile('fcntl') diff --git a/ext/fcntl/fcntl.c b/ext/fcntl/fcntl.c new file mode 100644 index 0000000..b032620 --- /dev/null +++ b/ext/fcntl/fcntl.c @@ -0,0 +1,187 @@ +/************************************************ + + fcntl.c - + + $Author: shyouhei $ + created at: Mon Apr 7 18:53:05 JST 1997 + + Copyright (C) 1997-2001 Yukihiro Matsumoto + +************************************************/ + +/************************************************ += NAME + +fcntl - load the C fcntl.h defines + += SYNOPSIS + + require "fcntl" + m = s.fcntl(Fcntl::F_GETFL, 0) + f.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK|m) + += DESCRIPTION + +This module is just a translation of the C file. + += NOTE + +Only #define symbols get translated; you must still correctly +pack up your own arguments to pass as args for locking functions, etc. + +************************************************/ + +#include "ruby.h" +#include + +/* Fcntl loads the constants defined in the system's C header + * file, and used with both the fcntl(2) and open(2) POSIX system calls. + * + * Copyright (C) 1997-2001 Yukihiro Matsumoto + * + * Documented by mathew + * + * = Usage + * + * To perform a fcntl(2) operation, use IO::fcntl in the core classes. + * + * To perform an open(2) operation, use IO::sysopen. + * + * The set of operations and constants available depends upon specific OS + * platform. Some values listed below may not be supported on your system. + * + * The constants supported by Ruby for use with IO::fcntl are: + * + * - F_DUPFD - duplicate a close-on-exec file handle to a non-close-on-exec + * file handle. + * + * - F_GETFD - read the close-on-exec flag of a file handle. + * + * - F_SETFD - set the close-on-exec flag of a file handle. + * + * - FD_CLOEXEC - the value of the close-on-exec flag. + * + * - F_GETFL - get file descriptor flags. + * + * - F_SETFL - set file descriptor flags. + * + * - O_APPEND, O_NONBLOCK, etc (see below) - file descriptor flag + * values for the above. + * + * - F_GETLK - determine whether a given region of a file is locked. + * + * - F_SETLK - acquire a lock on a region of a file. + * + * - F_SETLKW - acquire a lock on a region of a file, waiting if necessary. + * + * - F_RDLCK, F_WRLCK, F_UNLCK - types of lock for the above. + * + * The constants supported by Ruby for use with IO::sysopen are: + * + * - O_APPEND - open file in append mode. + * + * - O_NOCTTY - open tty without it becoming controlling tty. + * + * - O_CREAT - create file if it doesn't exist. + * + * - O_EXCL - used with O_CREAT, fail if file exists. + * + * - O_TRUNC - truncate file on open. + * + * - O_NONBLOCK / O_NDELAY - open in non-blocking mode. + * + * - O_RDONLY - open read-only. + * + * - O_WRONLY - open write-only. + * + * - O_RDWR - open read-write. + * + * - O_ACCMODE - mask to extract read/write flags. + * + * Example: + * + * require 'fcntl' + * + * fd = IO::sysopen('/tmp/tempfile', + * Fcntl::O_WRONLY | Fcntl::O_EXCL | Fcntl::O_CREAT) + * f = IO.open(fd) + * f.syswrite("TEMP DATA") + * f.close + * + */ +void +Init_fcntl() +{ + VALUE mFcntl = rb_define_module("Fcntl"); +#ifdef F_DUPFD + rb_define_const(mFcntl, "F_DUPFD", INT2NUM(F_DUPFD)); +#endif +#ifdef F_GETFD + rb_define_const(mFcntl, "F_GETFD", INT2NUM(F_GETFD)); +#endif +#ifdef F_GETLK + rb_define_const(mFcntl, "F_GETLK", INT2NUM(F_GETLK)); +#endif +#ifdef F_SETFD + rb_define_const(mFcntl, "F_SETFD", INT2NUM(F_SETFD)); +#endif +#ifdef F_GETFL + rb_define_const(mFcntl, "F_GETFL", INT2NUM(F_GETFL)); +#endif +#ifdef F_SETFL + rb_define_const(mFcntl, "F_SETFL", INT2NUM(F_SETFL)); +#endif +#ifdef F_SETLK + rb_define_const(mFcntl, "F_SETLK", INT2NUM(F_SETLK)); +#endif +#ifdef F_SETLKW + rb_define_const(mFcntl, "F_SETLKW", INT2NUM(F_SETLKW)); +#endif +#ifdef FD_CLOEXEC + rb_define_const(mFcntl, "FD_CLOEXEC", INT2NUM(FD_CLOEXEC)); +#endif +#ifdef F_RDLCK + rb_define_const(mFcntl, "F_RDLCK", INT2NUM(F_RDLCK)); +#endif +#ifdef F_UNLCK + rb_define_const(mFcntl, "F_UNLCK", INT2NUM(F_UNLCK)); +#endif +#ifdef F_WRLCK + rb_define_const(mFcntl, "F_WRLCK", INT2NUM(F_WRLCK)); +#endif +#ifdef O_CREAT + rb_define_const(mFcntl, "O_CREAT", INT2NUM(O_CREAT)); +#endif +#ifdef O_EXCL + rb_define_const(mFcntl, "O_EXCL", INT2NUM(O_EXCL)); +#endif +#ifdef O_NOCTTY + rb_define_const(mFcntl, "O_NOCTTY", INT2NUM(O_NOCTTY)); +#endif +#ifdef O_TRUNC + rb_define_const(mFcntl, "O_TRUNC", INT2NUM(O_TRUNC)); +#endif +#ifdef O_APPEND + rb_define_const(mFcntl, "O_APPEND", INT2NUM(O_APPEND)); +#endif +#ifdef O_NONBLOCK + rb_define_const(mFcntl, "O_NONBLOCK", INT2NUM(O_NONBLOCK)); +#endif +#ifdef O_NDELAY + rb_define_const(mFcntl, "O_NDELAY", INT2NUM(O_NDELAY)); +#endif +#ifdef O_RDONLY + rb_define_const(mFcntl, "O_RDONLY", INT2NUM(O_RDONLY)); +#endif +#ifdef O_RDWR + rb_define_const(mFcntl, "O_RDWR", INT2NUM(O_RDWR)); +#endif +#ifdef O_WRONLY + rb_define_const(mFcntl, "O_WRONLY", INT2NUM(O_WRONLY)); +#endif +#ifdef O_ACCMODE + rb_define_const(mFcntl, "O_ACCMODE", INT2FIX(O_ACCMODE)); +#else + rb_define_const(mFcntl, "O_ACCMODE", INT2FIX(O_RDONLY | O_WRONLY | O_RDWR)); +#endif +} diff --git a/ext/fiber/extconf.rb b/ext/fiber/extconf.rb new file mode 100644 index 0000000..904ab94 --- /dev/null +++ b/ext/fiber/extconf.rb @@ -0,0 +1,3 @@ +require 'mkmf' +create_makefile('fiber') + diff --git a/ext/fiber/fiber.c b/ext/fiber/fiber.c new file mode 100644 index 0000000..12fcaad --- /dev/null +++ b/ext/fiber/fiber.c @@ -0,0 +1,8 @@ + +void ruby_Init_Fiber_as_Coroutine(void); + +void +Init_fiber(void) +{ + ruby_Init_Fiber_as_Coroutine(); +} diff --git a/ext/gdbm/README b/ext/gdbm/README new file mode 100644 index 0000000..df7a261 --- /dev/null +++ b/ext/gdbm/README @@ -0,0 +1 @@ +gdbm ext-library for Ruby 1.3 or later diff --git a/ext/gdbm/depend b/ext/gdbm/depend new file mode 100644 index 0000000..c080a81 --- /dev/null +++ b/ext/gdbm/depend @@ -0,0 +1 @@ +gdbm.o: gdbm.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/gdbm/extconf.rb b/ext/gdbm/extconf.rb new file mode 100644 index 0000000..5a09492 --- /dev/null +++ b/ext/gdbm/extconf.rb @@ -0,0 +1,7 @@ +require 'mkmf' + +dir_config("gdbm") +if have_library("gdbm", "gdbm_open") and + have_header("gdbm.h") + create_makefile("gdbm") +end diff --git a/ext/gdbm/gdbm.c b/ext/gdbm/gdbm.c new file mode 100644 index 0000000..2bf389a --- /dev/null +++ b/ext/gdbm/gdbm.c @@ -0,0 +1,1255 @@ +/************************************************ + + gdbm.c - + + $Author: yugui $ + modified at: Mon Jan 24 15:59:52 JST 1994 + + Documentation by Peter Adolphs < futzilogik at users dot sourceforge dot net > + +************************************************/ + +#include "ruby.h" + +#include +#include +#include + +/* + * Document-class: GDBM + * + * == Summary + * + * Ruby extension for GNU dbm (gdbm) -- a simple database engine for storing + * key-value pairs on disk. + * + * == Description + * + * GNU dbm is a library for simple databases. A database is a file that stores + * key-value pairs. Gdbm allows the user to store, retrieve, and delete data by + * key. It furthermore allows a non-sorted traversal of all key-value pairs. + * A gdbm database thus provides the same functionality as a hash. As + * with objects of the Hash class, elements can be accessed with []. + * Furthermore, GDBM mixes in the Enumerable module, thus providing convenient + * methods such as #find, #collect, #map, etc. + * + * A process is allowed to open several different databases at the same time. + * A process can open a database as a "reader" or a "writer". Whereas a reader + * has only read-access to the database, a writer has read- and write-access. + * A database can be accessed either by any number of readers or by exactly one + * writer at the same time. + * + * == Examples + * + * 1. Opening/creating a database, and filling it with some entries: + * + * require 'gdbm' + * + * gdbm = GDBM.new("fruitstore.db") + * gdbm["ananas"] = "3" + * gdbm["banana"] = "8" + * gdbm["cranberry"] = "4909" + * gdbm.close + * + * 2. Reading out a database: + * + * require 'gdbm' + * + * gdbm = GDBM.new("fruitstore.db") + * gdbm.each_pair do |key, value| + * print "#{key}: #{value}\n" + * end + * gdbm.close + * + * produces + * + * banana: 8 + * ananas: 3 + * cranberry: 4909 + * + * == Links + * + * * http://www.gnu.org/software/gdbm/ + */ +static VALUE rb_cGDBM, rb_eGDBMError, rb_eGDBMFatalError; + +#define RUBY_GDBM_RW_BIT 0x20000000 + +#define MY_BLOCK_SIZE (2048) +#define MY_FATAL_FUNC rb_gdbm_fatal +static void +rb_gdbm_fatal(char *msg) +{ + rb_raise(rb_eGDBMFatalError, "%s", msg); +} + +struct dbmdata { + int di_size; + GDBM_FILE di_dbm; +}; + +static void +closed_dbm(void) +{ + rb_raise(rb_eRuntimeError, "closed GDBM file"); +} + +#define GetDBM(obj, dbmp) do {\ + Data_Get_Struct(obj, struct dbmdata, dbmp);\ + if (dbmp == 0) closed_dbm();\ + if (dbmp->di_dbm == 0) closed_dbm();\ +} while (0) + +#define GetDBM2(obj, data, dbm) {\ + GetDBM(obj, data);\ + (dbm) = dbmp->di_dbm;\ +} + +static void +free_dbm(struct dbmdata *dbmp) +{ + if (dbmp) { + if (dbmp->di_dbm) gdbm_close(dbmp->di_dbm); + xfree(dbmp); + } +} + +/* + * call-seq: + * gdbm.close -> nil + * + * Closes the associated database file. + */ +static VALUE +fgdbm_close(VALUE obj) +{ + struct dbmdata *dbmp; + + GetDBM(obj, dbmp); + gdbm_close(dbmp->di_dbm); + dbmp->di_dbm = 0; + + return Qnil; +} + +/* + * call-seq: + * gdbm.closed? -> true or false + * + * Returns true if the associated database file has been closed. + */ +static VALUE +fgdbm_closed(VALUE obj) +{ + struct dbmdata *dbmp; + + Data_Get_Struct(obj, struct dbmdata, dbmp); + if (dbmp == 0) + return Qtrue; + if (dbmp->di_dbm == 0) + return Qtrue; + + return Qfalse; +} + +static VALUE +fgdbm_s_alloc(VALUE klass) +{ + return Data_Wrap_Struct(klass, 0, free_dbm, 0); +} + +/* + * call-seq: + * GDBM.new(filename, mode = 0666, flags = nil) + * + * Creates a new GDBM instance by opening a gdbm file named _filename_. + * If the file does not exist, a new file with file mode _mode_ will be + * created. _flags_ may be one of the following: + * * *READER* - open as a reader + * * *WRITER* - open as a writer + * * *WRCREAT* - open as a writer; if the database does not exist, create a new one + * * *NEWDB* - open as a writer; overwrite any existing databases + * + * The values *WRITER*, *WRCREAT* and *NEWDB* may be combined with the following + * values by bitwise or: + * * *SYNC* - cause all database operations to be synchronized to the disk + * * *NOLOCK* - do not lock the database file + * + * If no _flags_ are specified, the GDBM object will try to open the database + * file as a writer and will create it if it does not already exist + * (cf. flag WRCREAT). If this fails (for instance, if another process + * has already opened the database as a reader), it will try to open the + * database file as a reader (cf. flag READER). + */ +static VALUE +fgdbm_initialize(int argc, VALUE *argv, VALUE obj) +{ + VALUE file, vmode, vflags; + GDBM_FILE dbm; + struct dbmdata *dbmp; + int mode, flags = 0; + + if (rb_scan_args(argc, argv, "12", &file, &vmode, &vflags) == 1) { + mode = 0666; /* default value */ + } + else if (NIL_P(vmode)) { + mode = -1; /* return nil if DB does not exist */ + } + else { + mode = NUM2INT(vmode); + } + + if (!NIL_P(vflags)) + flags = NUM2INT(vflags); + + SafeStringValue(file); + + if (flags & RUBY_GDBM_RW_BIT) { + flags &= ~RUBY_GDBM_RW_BIT; + dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE, + flags, mode, MY_FATAL_FUNC); + } + else { + dbm = 0; + if (mode >= 0) + dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE, + GDBM_WRCREAT|flags, mode, MY_FATAL_FUNC); + if (!dbm) + dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE, + GDBM_WRITER|flags, 0, MY_FATAL_FUNC); + if (!dbm) + dbm = gdbm_open(RSTRING_PTR(file), MY_BLOCK_SIZE, + GDBM_READER|flags, 0, MY_FATAL_FUNC); + } + + if (!dbm) { + if (mode == -1) return Qnil; + + if (gdbm_errno == GDBM_FILE_OPEN_ERROR || + gdbm_errno == GDBM_CANT_BE_READER || + gdbm_errno == GDBM_CANT_BE_WRITER) + rb_sys_fail(RSTRING_PTR(file)); + else + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + + dbmp = ALLOC(struct dbmdata); + free_dbm(DATA_PTR(obj)); + DATA_PTR(obj) = dbmp; + dbmp->di_dbm = dbm; + dbmp->di_size = -1; + + return obj; +} + +/* + * call-seq: + * GDBM.open(filename, mode = 0666, flags = nil) + * GDBM.open(filename, mode = 0666, flags = nil) { |gdbm| ... } + * + * If called without a block, this is synonymous to GDBM::new. + * If a block is given, the new GDBM instance will be passed to the block + * as a parameter, and the corresponding database file will be closed + * after the execution of the block code has been finished. + * + * Example for an open call with a block: + * + * require 'gdbm' + * GDBM.open("fruitstore.db") do |gdbm| + * gdbm.each_pair do |key, value| + * print "#{key}: #{value}\n" + * end + * end + */ +static VALUE +fgdbm_s_open(int argc, VALUE *argv, VALUE klass) +{ + VALUE obj = Data_Wrap_Struct(klass, 0, free_dbm, 0); + + if (NIL_P(fgdbm_initialize(argc, argv, obj))) { + return Qnil; + } + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, obj, fgdbm_close, obj); + } + + return obj; +} + +static VALUE +rb_gdbm_fetch(GDBM_FILE dbm, datum key) +{ + datum val; + VALUE str; + + val = gdbm_fetch(dbm, key); + if (val.dptr == 0) + return Qnil; + + str = rb_str_new(val.dptr, val.dsize); + free(val.dptr); + OBJ_TAINT(str); + return (VALUE)str; +} + +static VALUE +rb_gdbm_fetch2(GDBM_FILE dbm, VALUE keystr) +{ + datum key; + + StringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + return rb_gdbm_fetch(dbm, key); +} + +static VALUE +rb_gdbm_fetch3(VALUE obj, VALUE keystr) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + + GetDBM2(obj, dbmp, dbm); + return rb_gdbm_fetch2(dbm, keystr); +} + +static VALUE +rb_gdbm_firstkey(GDBM_FILE dbm) +{ + datum key; + VALUE str; + + key = gdbm_firstkey(dbm); + if (key.dptr == 0) + return Qnil; + + str = rb_str_new(key.dptr, key.dsize); + free(key.dptr); + OBJ_TAINT(str); + return str; +} + +static VALUE +rb_gdbm_nextkey(GDBM_FILE dbm, VALUE keystr) +{ + datum key, key2; + VALUE str; + + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + key2 = gdbm_nextkey(dbm, key); + if (key2.dptr == 0) + return Qnil; + + str = rb_str_new(key2.dptr, key2.dsize); + free(key2.dptr); + OBJ_TAINT(str); + return str; +} + +static VALUE +fgdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone) +{ + VALUE valstr; + + valstr = rb_gdbm_fetch3(obj, keystr); + if (NIL_P(valstr)) { + if (ifnone == Qnil && rb_block_given_p()) + return rb_yield(keystr); + return ifnone; + } + return valstr; +} + +/* + * call-seq: + * gdbm[key] -> value + * + * Retrieves the _value_ corresponding to _key_. + */ +static VALUE +fgdbm_aref(VALUE obj, VALUE keystr) +{ + return rb_gdbm_fetch3(obj, keystr); +} + +/* + * call-seq: + * gdbm.fetch(key [, default]) -> value + * + * Retrieves the _value_ corresponding to _key_. If there is no value + * associated with _key_, _default_ will be returned instead. + */ +static VALUE +fgdbm_fetch_m(int argc, VALUE *argv, VALUE obj) +{ + VALUE keystr, valstr, ifnone; + + rb_scan_args(argc, argv, "11", &keystr, &ifnone); + valstr = fgdbm_fetch(obj, keystr, ifnone); + if (argc == 1 && !rb_block_given_p() && NIL_P(valstr)) + rb_raise(rb_eIndexError, "key not found"); + + return valstr; +} + +/* + * call-seq: + * gdbm.key(value) -> key + * + * Returns the _key_ for a given _value_. If several keys may map to the + * same value, the key that is found first will be returned. + */ +static VALUE +fgdbm_key(VALUE obj, VALUE valstr) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, valstr2; + + StringValue(valstr); + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + valstr2 = rb_gdbm_fetch2(dbm, keystr); + if (!NIL_P(valstr2) && + RSTRING_LEN(valstr) == RSTRING_LEN(valstr2) && + memcmp(RSTRING_PTR(valstr), RSTRING_PTR(valstr2), + RSTRING_LEN(valstr)) == 0) { + return keystr; + } + } + return Qnil; +} + +/* :nodoc: */ +static VALUE +fgdbm_index(VALUE obj, VALUE value) +{ + rb_warn("GDBM#index is deprecated; use GDBM#key"); + return fgdbm_key(obj, value); +} + +/* + * call-seq: + * gdbm.select { |key, value| block } -> array + * + * Returns a new array of all key-value pairs of the database for which _block_ + * evaluates to true. + */ +static VALUE +fgdbm_select(VALUE obj) +{ + VALUE new = rb_ary_new(); + GDBM_FILE dbm; + struct dbmdata *dbmp; + VALUE keystr; + + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + VALUE assoc = rb_assoc_new(keystr, rb_gdbm_fetch2(dbm, keystr)); + VALUE v = rb_yield(assoc); + + if (RTEST(v)) { + rb_ary_push(new, assoc); + } + GetDBM2(obj, dbmp, dbm); + } + + return new; +} + +/* + * call-seq: + * gdbm.values_at(key, ...) -> array + * + * Returns an array of the values associated with each specified _key_. + */ +static VALUE +fgdbm_values_at(int argc, VALUE *argv, VALUE obj) +{ + VALUE new = rb_ary_new2(argc); + int i; + + for (i=0; idi_size = -1; + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + else if (dbmp->di_size >= 0) { + dbmp->di_size--; + } + return obj; +} + +/* + * call-seq: + * gdbm.delete(key) -> value or nil + * + * Removes the key-value-pair with the specified _key_ from this database and + * returns the corresponding _value_. Returns nil if the database is empty. + */ +static VALUE +fgdbm_delete(VALUE obj, VALUE keystr) +{ + VALUE valstr; + + valstr = fgdbm_fetch(obj, keystr, Qnil); + rb_gdbm_delete(obj, keystr); + return valstr; +} + +/* + * call-seq: + * gdbm.shift -> (key, value) or nil + * + * Removes a key-value-pair from this database and returns it as a + * two-item array [ _key_, _value_ ]. Returns nil if the database is empty. + */ +static VALUE +fgdbm_shift(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, valstr; + + rb_gdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + keystr = rb_gdbm_firstkey(dbm); + if (NIL_P(keystr)) return Qnil; + valstr = rb_gdbm_fetch2(dbm, keystr); + rb_gdbm_delete(obj, keystr); + + return rb_assoc_new(keystr, valstr); +} + +/* + * call-seq: + * gdbm.delete_if { |key, value| block } -> gdbm + * gdbm.reject! { |key, value| block } -> gdbm + * + * Deletes every key-value pair from _gdbm_ for which _block_ evaluates to true. + */ +static VALUE +fgdbm_delete_if(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, valstr; + VALUE ret, ary = rb_ary_new(); + int i, status = 0, n; + + rb_gdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + n = dbmp->di_size; + dbmp->di_size = -1; + + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + valstr = rb_gdbm_fetch2(dbm, keystr); + ret = rb_protect(rb_yield, rb_assoc_new(keystr, valstr), &status); + if (status != 0) break; + if (RTEST(ret)) rb_ary_push(ary, keystr); + GetDBM2(obj, dbmp, dbm); + } + + for (i = 0; i < RARRAY_LEN(ary); i++) + rb_gdbm_delete(obj, RARRAY_PTR(ary)[i]); + if (status) rb_jump_tag(status); + if (n > 0) dbmp->di_size = n - RARRAY_LEN(ary); + + return obj; +} + +/* + * call-seq: + * gdbm.clear -> gdbm + * + * Removes all the key-value pairs within _gdbm_. + */ +static VALUE +fgdbm_clear(VALUE obj) +{ + datum key, nextkey; + struct dbmdata *dbmp; + GDBM_FILE dbm; + + rb_gdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + +#if 0 + while (key = gdbm_firstkey(dbm), key.dptr) { + if (gdbm_delete(dbm, key)) { + free(key.dptr); + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + free(key.dptr); + } +#else + while (key = gdbm_firstkey(dbm), key.dptr) { + for (; key.dptr; key = nextkey) { + nextkey = gdbm_nextkey(dbm, key); + if (gdbm_delete(dbm, key)) { + free(key.dptr); + if (nextkey.dptr) free(nextkey.dptr); + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + free(key.dptr); + } + } +#endif + dbmp->di_size = 0; + + return obj; +} + +/* + * call-seq: + * gdbm.invert -> hash + * + * Returns a hash created by using _gdbm_'s values as keys, and the keys + * as values. + */ +static VALUE +fgdbm_invert(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, valstr; + VALUE hash = rb_hash_new(); + + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + valstr = rb_gdbm_fetch2(dbm, keystr); + + rb_hash_aset(hash, valstr, keystr); + } + return hash; +} + +/* + * call-seq: + * gdbm[key]= value -> value + * gdbm.store(key, value) -> value + * + * Associates the value _value_ with the specified _key_. + */ +static VALUE +fgdbm_store(VALUE obj, VALUE keystr, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + GDBM_FILE dbm; + + rb_gdbm_modify(obj); + StringValue(keystr); + StringValue(valstr); + + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + if (gdbm_store(dbm, key, val, GDBM_REPLACE)) { + if (errno == EPERM) rb_sys_fail(0); + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + + return valstr; +} + +static VALUE +update_i(VALUE pair, VALUE dbm) +{ + Check_Type(pair, T_ARRAY); + if (RARRAY_LEN(pair) < 2) { + rb_raise(rb_eArgError, "pair must be [key, value]"); + } + fgdbm_store(dbm, RARRAY_PTR(pair)[0], RARRAY_PTR(pair)[1]); + return Qnil; +} + +/* + * call-seq: + * gdbm.update(other) -> gdbm + * + * Adds the key-value pairs of _other_ to _gdbm_, overwriting entries with + * duplicate keys with those from _other_. _other_ must have an each_pair + * method. + */ +static VALUE +fgdbm_update(VALUE obj, VALUE other) +{ + rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj); + return obj; +} + +/* + * call-seq: + * gdbm.replace(other) -> gdbm + * + * Replaces the content of _gdbm_ with the key-value pairs of _other_. + * _other_ must have an each_pair method. + */ +static VALUE +fgdbm_replace(VALUE obj, VALUE other) +{ + fgdbm_clear(obj); + rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj); + return obj; +} + +/* + * call-seq: + * gdbm.length -> fixnum + * gdbm.size -> fixnum + * + * Returns the number of key-value pairs in this database. + */ +static VALUE +fgdbm_length(VALUE obj) +{ + datum key, nextkey; + struct dbmdata *dbmp; + GDBM_FILE dbm; + int i = 0; + + GetDBM2(obj, dbmp, dbm); + if (dbmp->di_size > 0) return INT2FIX(dbmp->di_size); + + for (key = gdbm_firstkey(dbm); key.dptr; key = nextkey) { + nextkey = gdbm_nextkey(dbm, key); + free(key.dptr); + i++; + } + dbmp->di_size = i; + + return INT2FIX(i); +} + +/* + * call-seq: + * gdbm.empty? -> true or false + * + * Returns true if the database is empty. + */ +static VALUE +fgdbm_empty_p(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + GDBM_FILE dbm; + + GetDBM(obj, dbmp); + if (dbmp->di_size < 0) { + dbm = dbmp->di_dbm; + + key = gdbm_firstkey(dbm); + if (key.dptr) { + free(key.dptr); + return Qfalse; + } + return Qtrue; + } + + if (dbmp->di_size == 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * gdbm.each_value { |value| block } -> gdbm + * + * Executes _block_ for each key in the database, passing the corresponding + * _value_ as a parameter. + */ +static VALUE +fgdbm_each_value(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + rb_yield(rb_gdbm_fetch2(dbm, keystr)); + GetDBM2(obj, dbmp, dbm); + } + return obj; +} + +/* + * call-seq: + * gdbm.each_key { |key| block } -> gdbm + * + * Executes _block_ for each key in the database, passing the + * _key_ as a parameter. + */ +static VALUE +fgdbm_each_key(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + rb_yield(keystr); + GetDBM2(obj, dbmp, dbm); + } + return obj; +} + +/* + * call-seq: + * gdbm.each_pair { |key, value| block } -> gdbm + * + * Executes _block_ for each key in the database, passing the _key_ and the + * correspoding _value_ as a parameter. + */ +static VALUE +fgdbm_each_pair(VALUE obj) +{ + GDBM_FILE dbm; + struct dbmdata *dbmp; + VALUE keystr; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + rb_yield(rb_assoc_new(keystr, rb_gdbm_fetch2(dbm, keystr))); + GetDBM2(obj, dbmp, dbm); + } + + return obj; +} + +/* + * call-seq: + * gdbm.keys -> array + * + * Returns an array of all keys of this database. + */ +static VALUE +fgdbm_keys(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + rb_ary_push(ary, keystr); + } + + return ary; +} + +/* + * call-seq: + * gdbm.values -> array + * + * Returns an array of all values of this database. + */ +static VALUE +fgdbm_values(VALUE obj) +{ + datum key, nextkey; + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE valstr, ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (key = gdbm_firstkey(dbm); key.dptr; key = nextkey) { + nextkey = gdbm_nextkey(dbm, key); + valstr = rb_gdbm_fetch(dbm, key); + free(key.dptr); + rb_ary_push(ary, valstr); + } + + return ary; +} + +/* + * call-seq: + * gdbm.has_key?(k) -> true or false + * gdbm.key?(k) -> true or false + * + * Returns true if the given key _k_ exists within the database. + * Returns false otherwise. + */ +static VALUE +fgdbm_has_key(VALUE obj, VALUE keystr) +{ + datum key; + struct dbmdata *dbmp; + GDBM_FILE dbm; + + StringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + GetDBM2(obj, dbmp, dbm); + if (gdbm_exists(dbm, key)) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * gdbm.has_value?(v) -> true or false + * gdbm.value?(v) -> true or false + * + * Returns true if the given value _v_ exists within the database. + * Returns false otherwise. + */ +static VALUE +fgdbm_has_value(VALUE obj, VALUE valstr) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, valstr2; + + StringValue(valstr); + GetDBM2(obj, dbmp, dbm); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + valstr2 = rb_gdbm_fetch2(dbm, keystr); + + if (!NIL_P(valstr2) && + RSTRING_LEN(valstr) == RSTRING_LEN(valstr2) && + memcmp(RSTRING_PTR(valstr), RSTRING_PTR(valstr2), + RSTRING_LEN(valstr)) == 0) { + return Qtrue; + } + } + return Qfalse; +} + +/* + * call-seq: + * gdbm.to_a -> array + * + * Returns an array of all key-value pairs contained in the database. + */ +static VALUE +fgdbm_to_a(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + rb_ary_push(ary, rb_assoc_new(keystr, rb_gdbm_fetch2(dbm, keystr))); + } + + return ary; +} + +/* + * call-seq: + * gdbm.reorganize -> gdbm + * + * Reorganizes the database file. This operation removes reserved space of + * elements that have already been deleted. It is only useful after a lot of + * deletions in the database. + */ +static VALUE +fgdbm_reorganize(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + + rb_gdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + gdbm_reorganize(dbm); + return obj; +} + +/* + * call-seq: + * gdbm.sync -> gdbm + * + * Unless the _gdbm_ object has been opened with the *SYNC* flag, it is not + * guarenteed that database modification operations are immediately applied to + * the database file. This method ensures that all recent modifications + * to the database are written to the file. Blocks until all writing operations + * to the disk have been finished. + */ +static VALUE +fgdbm_sync(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + + rb_gdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + gdbm_sync(dbm); + return obj; +} + +/* + * call-seq: + * gdbm.cachesize = size -> size + * + * Sets the size of the internal bucket cache to _size_. + */ +static VALUE +fgdbm_set_cachesize(VALUE obj, VALUE val) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + int optval; + + GetDBM2(obj, dbmp, dbm); + optval = FIX2INT(val); + if (gdbm_setopt(dbm, GDBM_CACHESIZE, &optval, sizeof(optval)) == -1) { + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + return val; +} + +/* + * call-seq: + * gdbm.fastmode = boolean -> boolean + * + * Turns the database's fast mode on or off. If fast mode is turned on, gdbm + * does not wait for writes to be flushed to the disk before continuing. + * + * This option is obsolete for gdbm >= 1.8 since fast mode is turned on by + * default. See also: #syncmode= + */ +static VALUE +fgdbm_set_fastmode(VALUE obj, VALUE val) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + int optval; + + GetDBM2(obj, dbmp, dbm); + optval = 0; + if (RTEST(val)) + optval = 1; + + if (gdbm_setopt(dbm, GDBM_FASTMODE, &optval, sizeof(optval)) == -1) { + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + return val; +} + +/* + * call-seq: + * gdbm.syncmode = boolean -> boolean + * + * Turns the database's synchronization mode on or off. If the synchronization + * mode is turned on, the database's in-memory state will be synchronized to + * disk after every database modification operation. If the synchronization + * mode is turned off, GDBM does not wait for writes to be flushed to the disk + * before continuing. + * + * This option is only available for gdbm >= 1.8 where syncmode is turned off + * by default. See also: #fastmode= + */ +static VALUE +fgdbm_set_syncmode(VALUE obj, VALUE val) +{ +#if !defined(GDBM_SYNCMODE) + fgdbm_set_fastmode(obj, RTEST(val) ? Qfalse : Qtrue); + return val; +#else + struct dbmdata *dbmp; + GDBM_FILE dbm; + int optval; + + GetDBM2(obj, dbmp, dbm); + optval = 0; + if (RTEST(val)) + optval = 1; + + if (gdbm_setopt(dbm, GDBM_FASTMODE, &optval, sizeof(optval)) == -1) { + rb_raise(rb_eGDBMError, "%s", gdbm_strerror(gdbm_errno)); + } + return val; +#endif +} + +/* + * call-seq: + * gdbm.to_hash -> hash + * + * Returns a hash of all key-value pairs contained in the database. + */ +static VALUE +fgdbm_to_hash(VALUE obj) +{ + struct dbmdata *dbmp; + GDBM_FILE dbm; + VALUE keystr, hash; + + GetDBM2(obj, dbmp, dbm); + hash = rb_hash_new(); + for (keystr = rb_gdbm_firstkey(dbm); RTEST(keystr); + keystr = rb_gdbm_nextkey(dbm, keystr)) { + + rb_hash_aset(hash, keystr, rb_gdbm_fetch2(dbm, keystr)); + } + + return hash; +} + +/* + * call-seq: + * gdbm.reject { |key, value| block } -> hash + * + * Returns a hash copy of _gdbm_ where all key-value pairs from _gdbm_ for + * which _block_ evaluates to true are removed. See also: #delete_if + */ +static VALUE +fgdbm_reject(VALUE obj) +{ + return rb_hash_delete_if(fgdbm_to_hash(obj)); +} + +void +Init_gdbm(void) +{ + rb_cGDBM = rb_define_class("GDBM", rb_cObject); + rb_eGDBMError = rb_define_class("GDBMError", rb_eStandardError); + rb_eGDBMFatalError = rb_define_class("GDBMFatalError", rb_eException); + rb_include_module(rb_cGDBM, rb_mEnumerable); + + rb_define_alloc_func(rb_cGDBM, fgdbm_s_alloc); + rb_define_singleton_method(rb_cGDBM, "open", fgdbm_s_open, -1); + + rb_define_method(rb_cGDBM, "initialize", fgdbm_initialize, -1); + rb_define_method(rb_cGDBM, "close", fgdbm_close, 0); + rb_define_method(rb_cGDBM, "closed?", fgdbm_closed, 0); + rb_define_method(rb_cGDBM, "[]", fgdbm_aref, 1); + rb_define_method(rb_cGDBM, "fetch", fgdbm_fetch_m, -1); + rb_define_method(rb_cGDBM, "[]=", fgdbm_store, 2); + rb_define_method(rb_cGDBM, "store", fgdbm_store, 2); + rb_define_method(rb_cGDBM, "index", fgdbm_index, 1); + rb_define_method(rb_cGDBM, "key", fgdbm_key, 1); + rb_define_method(rb_cGDBM, "select", fgdbm_select, 0); + rb_define_method(rb_cGDBM, "values_at", fgdbm_values_at, -1); + rb_define_method(rb_cGDBM, "length", fgdbm_length, 0); + rb_define_method(rb_cGDBM, "size", fgdbm_length, 0); + rb_define_method(rb_cGDBM, "empty?", fgdbm_empty_p, 0); + rb_define_method(rb_cGDBM, "each", fgdbm_each_pair, 0); + rb_define_method(rb_cGDBM, "each_value", fgdbm_each_value, 0); + rb_define_method(rb_cGDBM, "each_key", fgdbm_each_key, 0); + rb_define_method(rb_cGDBM, "each_pair", fgdbm_each_pair, 0); + rb_define_method(rb_cGDBM, "keys", fgdbm_keys, 0); + rb_define_method(rb_cGDBM, "values", fgdbm_values, 0); + rb_define_method(rb_cGDBM, "shift", fgdbm_shift, 0); + rb_define_method(rb_cGDBM, "delete", fgdbm_delete, 1); + rb_define_method(rb_cGDBM, "delete_if", fgdbm_delete_if, 0); + rb_define_method(rb_cGDBM, "reject!", fgdbm_delete_if, 0); + rb_define_method(rb_cGDBM, "reject", fgdbm_reject, 0); + rb_define_method(rb_cGDBM, "clear", fgdbm_clear, 0); + rb_define_method(rb_cGDBM, "invert", fgdbm_invert, 0); + rb_define_method(rb_cGDBM, "update", fgdbm_update, 1); + rb_define_method(rb_cGDBM, "replace", fgdbm_replace, 1); + rb_define_method(rb_cGDBM, "reorganize", fgdbm_reorganize, 0); + rb_define_method(rb_cGDBM, "sync", fgdbm_sync, 0); + /* rb_define_method(rb_cGDBM, "setopt", fgdbm_setopt, 2); */ + rb_define_method(rb_cGDBM, "cachesize=", fgdbm_set_cachesize, 1); + rb_define_method(rb_cGDBM, "fastmode=", fgdbm_set_fastmode, 1); + rb_define_method(rb_cGDBM, "syncmode=", fgdbm_set_syncmode, 1); + + rb_define_method(rb_cGDBM, "include?", fgdbm_has_key, 1); + rb_define_method(rb_cGDBM, "has_key?", fgdbm_has_key, 1); + rb_define_method(rb_cGDBM, "member?", fgdbm_has_key, 1); + rb_define_method(rb_cGDBM, "has_value?", fgdbm_has_value, 1); + rb_define_method(rb_cGDBM, "key?", fgdbm_has_key, 1); + rb_define_method(rb_cGDBM, "value?", fgdbm_has_value, 1); + + rb_define_method(rb_cGDBM, "to_a", fgdbm_to_a, 0); + rb_define_method(rb_cGDBM, "to_hash", fgdbm_to_hash, 0); + + /* flag for #new and #open: open database as a reader */ + rb_define_const(rb_cGDBM, "READER", INT2FIX(GDBM_READER|RUBY_GDBM_RW_BIT)); + /* flag for #new and #open: open database as a writer */ + rb_define_const(rb_cGDBM, "WRITER", INT2FIX(GDBM_WRITER|RUBY_GDBM_RW_BIT)); + /* flag for #new and #open: open database as a writer; if the database does not exist, create a new one */ + rb_define_const(rb_cGDBM, "WRCREAT", INT2FIX(GDBM_WRCREAT|RUBY_GDBM_RW_BIT)); + /* flag for #new and #open: open database as a writer; overwrite any existing databases */ + rb_define_const(rb_cGDBM, "NEWDB", INT2FIX(GDBM_NEWDB|RUBY_GDBM_RW_BIT)); + + /* flag for #new and #open. this flag is obsolete for gdbm >= 1.8 */ + rb_define_const(rb_cGDBM, "FAST", INT2FIX(GDBM_FAST)); + /* this flag is obsolete in gdbm 1.8. + On gdbm 1.8, fast mode is default behavior. */ + + /* gdbm version 1.8 specific */ +#if defined(GDBM_SYNC) + /* flag for #new and #open. only for gdbm >= 1.8 */ + rb_define_const(rb_cGDBM, "SYNC", INT2FIX(GDBM_SYNC)); +#endif +#if defined(GDBM_NOLOCK) + /* flag for #new and #open */ + rb_define_const(rb_cGDBM, "NOLOCK", INT2FIX(GDBM_NOLOCK)); +#endif + /* version of the gdbm library*/ + rb_define_const(rb_cGDBM, "VERSION", rb_str_new2(gdbm_version)); +} diff --git a/ext/iconv/charset_alias.rb b/ext/iconv/charset_alias.rb new file mode 100644 index 0000000..d4ed913 --- /dev/null +++ b/ext/iconv/charset_alias.rb @@ -0,0 +1,103 @@ +#! /usr/bin/ruby +# :stopdoc: +require 'rbconfig' +require 'optparse' + +# http://www.ctan.org/tex-archive/macros/texinfo/texinfo/intl/config.charset +# Fri, 30 May 2003 00:09:00 GMT' + +OS = RbConfig::CONFIG["target_os"] +SHELL = RbConfig::CONFIG['SHELL'] + +class Hash::Ordered < Hash + def [](key) + val = super and val.last + end + def []=(key, val) + ary = fetch(key) {return super(key, [self.size, key, val])} and + ary << val + end + def sort + values.sort.collect {|i, *rest| rest} + end + def each(&block) + sort.each(&block) + end +end + +def charset_alias(config_charset, mapfile, target = OS) + map = Hash::Ordered.new + comments = [] + open(config_charset) do |input| + input.find {|line| /^case "\$os" in/ =~ line} or break + input.find {|line| + /^\s*([-\w\*]+(?:\s*\|\s*[-\w\*]+)*)(?=\))/ =~ line and + $&.split('|').any? {|pattern| File.fnmatch?(pattern.strip, target)} + } or break + input.find do |line| + case line + when /^\s*echo "(?:\$\w+\.)?([-\w*]+)\s+([-\w]+)"/ + sys, can = $1, $2 + can.downcase! + map[can] = sys + false + when /^\s*;;/ + true + else + false + end + end + end + case target + when /linux|-gnu/ + # map.delete('ascii') + when /cygwin|os2-emx/ + # get rid of tilde/yen problem. + map['shift_jis'] = 'cp932' + end + st = Hash.new(0) + map = map.sort.collect do |can, *sys| + if sys.grep(/^en_us(?=.|$)/i) {break true} == true + noen = %r"^(?!en_us)\w+_\w+#{Regexp.new($')}$"i #" + sys.reject! {|s| noen =~ s} + end + sys = sys.first + st[sys] += 1 + [can, sys] + end + st.delete_if {|sys, i| i == 1}.empty? + st.keys.each {|sys| st[sys] = nil} + st.default = nil + writer = proc do |f| + f.puts("require 'iconv.so'") + f.puts + f.puts(comments) + f.puts("class Iconv") + i = 0 + map.each do |can, sys| + if s = st[sys] + sys = s + elsif st.key?(sys) + sys = (st[sys] = "sys#{i+=1}") + " = '#{sys}'.freeze" + else + sys = "'#{sys}'.freeze" + end + f.puts(" charset_map['#{can}'] = #{sys}") + end + f.puts("end") + end + if mapfile + open(mapfile, "w", &writer) + else + writer[STDOUT] + end +end + +target = OS +opt = nil +ARGV.options do |opt| + opt.banner << " config.status map.rb" + opt.on("--target OS") {|t| target = t} + opt.parse! and (1..2) === ARGV.size +end or abort opt.to_s +charset_alias(ARGV[0], ARGV[1], target) diff --git a/ext/iconv/depend b/ext/iconv/depend new file mode 100644 index 0000000..ac55559 --- /dev/null +++ b/ext/iconv/depend @@ -0,0 +1,2 @@ +iconv.o: iconv.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h \ + $(hdrdir)/st.h $(hdrdir)/intern.h $(hdrdir)/encoding.h diff --git a/ext/iconv/extconf.rb b/ext/iconv/extconf.rb new file mode 100644 index 0000000..9632bf4 --- /dev/null +++ b/ext/iconv/extconf.rb @@ -0,0 +1,53 @@ +require 'mkmf' + +dir_config("iconv") + +conf = File.exist?(File.join($srcdir, "config.charset")) +conf = with_config("config-charset", enable_config("config-charset", conf)) + +if have_func("iconv", "iconv.h") or + have_library("iconv", "iconv", "iconv.h") + if checking_for("const of iconv() 2nd argument") do + create_tmpsrc(cpp_include("iconv.h") + "---> iconv(cd,0,0,0,0) <---") + src = xpopen(cpp_command("")) {|f|f.read} + if !(func = src[/^--->\s*(\w+).*\s*<---/, 1]) + Logging::message "iconv function name not found" + false + elsif !(second = src[%r"\b#{func}\s*\(.*?,(.*?),.*?\)\s*;"m, 1]) + Logging::message "prototype for #{func}() not found" + false + else + Logging::message $&+"\n" + /\bconst\b/ =~ second + end + end + $defs.push('-DICONV_INPTR_CONST') + end + have_func("iconvlist", "iconv.h") + have_func("__iconv_free_list", "iconv.h") + if conf + prefix = '$(srcdir)' + prefix = $nmake ? "{#{prefix}}" : "#{prefix}/" + if $extout + wrapper = "$(RUBYARCHDIR)/iconv.rb" + else + wrapper = "./iconv.rb" + $INSTALLFILES = [[wrapper, "$(RUBYARCHDIR)"]] + end + if String === conf + require 'uri' + scheme = URI.parse(conf).scheme + else + conf = "$(srcdir)/config.charset" + end + $cleanfiles << wrapper + end + create_makefile("iconv") + if conf + open("Makefile", "a") do |mf| + mf.print("\nall: #{wrapper}\n\n#{wrapper}: #{prefix}charset_alias.rb") + mf.print(" ", conf) unless scheme + mf.print("\n\t$(RUBY) $(srcdir)/charset_alias.rb #{conf} $@\n") + end + end +end diff --git a/ext/iconv/iconv.c b/ext/iconv/iconv.c new file mode 100644 index 0000000..7fc373b --- /dev/null +++ b/ext/iconv/iconv.c @@ -0,0 +1,1203 @@ +/* -*- mode:c; c-file-style:"ruby" -*- */ +/********************************************************************** + + iconv.c - + + $Author: nobu $ + created at: Wed Dec 1 20:28:09 JST 1999 + + All the files in this distribution are covered under the Ruby's + license (see the file COPYING). + + Documentation by Yukihiro Matsumoto and Gavin Sinclair. + +**********************************************************************/ + +#include "ruby/ruby.h" +#include +#include +#include +#include "ruby/st.h" +#include "ruby/encoding.h" + +/* + * Document-class: Iconv + * + * == Summary + * + * Ruby extension for charset conversion. + * + * == Abstract + * + * Iconv is a wrapper class for the UNIX 95 iconv() function family, + * which translates string between various encoding systems. + * + * See Open Group's on-line documents for more details. + * * iconv.h: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html + * * iconv_open(): http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html + * * iconv(): http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html + * * iconv_close(): http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html + * + * Which coding systems are available is platform-dependent. + * + * == Examples + * + * 1. Simple conversion between two charsets. + * + * converted_text = Iconv.conv('iso-8859-15', 'utf-8', text) + * + * 2. Instantiate a new Iconv and use method Iconv#iconv. + * + * cd = Iconv.new(to, from) + * begin + * input.each { |s| output << cd.iconv(s) } + * output << cd.iconv(nil) # Don't forget this! + * ensure + * cd.close + * end + * + * 3. Invoke Iconv.open with a block. + * + * Iconv.open(to, from) do |cd| + * input.each { |s| output << cd.iconv(s) } + * output << cd.iconv(nil) + * end + * + * 4. Shorthand for (3). + * + * Iconv.iconv(to, from, *input.to_a) + * + * == Attentions + * + * Even if some extentions of implementation dependent are useful, + * DON'T USE those extentions in libraries and scripts to widely distribute. + * If you want to use those feature, use String#encode. + */ + +/* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is + big enough to keep iconv_t */ +#define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1)) +#define ICONV2VALUE(c) ((VALUE)(c) ^ -1) + +struct iconv_env_t +{ + iconv_t cd; + int argc; + VALUE *argv; + VALUE ret; + int toidx; + VALUE (*append)_((VALUE, VALUE)); +}; + +struct rb_iconv_opt_t +{ + VALUE transliterate; + VALUE discard_ilseq; +}; + +static ID id_transliterate, id_discard_ilseq; + +static VALUE rb_eIconvInvalidEncoding; +static VALUE rb_eIconvFailure; +static VALUE rb_eIconvIllegalSeq; +static VALUE rb_eIconvInvalidChar; +static VALUE rb_eIconvOutOfRange; +static VALUE rb_eIconvBrokenLibrary; + +static ID rb_success, rb_failed; +static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)); +static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed)); +static VALUE iconv_failure_success _((VALUE self)); +static VALUE iconv_failure_failed _((VALUE self)); + +static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)); +static void iconv_dfree _((void *cd)); +static VALUE iconv_free _((VALUE cd)); +static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)); +static VALUE rb_str_derive _((VALUE str, const char* ptr, int len)); +static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx, + struct iconv_env_t* env)); +static VALUE iconv_s_allocate _((VALUE klass)); +static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self)); +static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self)); +static VALUE iconv_s_convert _((struct iconv_env_t* env)); +static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self)); +static VALUE iconv_init_state _((VALUE cd)); +static VALUE iconv_finish _((VALUE self)); +static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self)); +static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self)); + +static VALUE charset_map; + +/* + * Document-method: charset_map + * call-seq: Iconv.charset_map + * + * Returns the map from canonical name to system dependent name. + */ +static VALUE +charset_map_get(void) +{ + return charset_map; +} + +static VALUE +strip_glibc_option(VALUE *code) +{ + VALUE val = StringValue(*code); + const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val); + const char *slash = memchr(ptr, '/', pend - ptr); + + if (slash && slash < pend - 1 && slash[1] == '/') { + VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash); + val = rb_str_subseq(val, 0, slash - ptr); + *code = val; + return opt; + } + return 0; +} + +static char * +map_charset(VALUE *code) +{ + VALUE val = StringValue(*code); + + if (RHASH_SIZE(charset_map)) { + VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0); + StringValuePtr(key); + if (st_lookup(RHASH_TBL(charset_map), key, &val)) { + *code = val; + } + } + return StringValuePtr(*code); +} + +static iconv_t +iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx) +{ + VALUE toopt = strip_glibc_option(&to); + VALUE fromopt = strip_glibc_option(&from); + VALUE toenc = 0, fromenc = 0; + const char* tocode = map_charset(&to); + const char* fromcode = map_charset(&from); + iconv_t cd; + int retry = 0; + + *idx = rb_enc_find_index(tocode); + + if (toopt) { + toenc = rb_str_plus(to, toopt); + tocode = RSTRING_PTR(toenc); + } + if (fromopt) { + fromenc = rb_str_plus(from, fromopt); + fromcode = RSTRING_PTR(fromenc); + } + while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) { + int inval = 0; + switch (errno) { + case EMFILE: + case ENFILE: + case ENOMEM: + if (!retry++) { + rb_gc(); + continue; + } + break; + case EINVAL: + retry = 0; + inval = 1; + if (toenc) { + tocode = RSTRING_PTR(to); + rb_str_resize(toenc, 0); + toenc = 0; + continue; + } + if (fromenc) { + fromcode = RSTRING_PTR(from); + rb_str_resize(fromenc, 0); + fromenc = 0; + continue; + } + break; + } + { + const char *s = inval ? "invalid encoding " : "iconv"; + volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING_LEN(to) + + RSTRING_LEN(from) + 8); + + sprintf(RSTRING_PTR(msg), "%s(\"%s\", \"%s\")", + s, RSTRING_PTR(to), RSTRING_PTR(from)); + s = RSTRING_PTR(msg); + rb_str_set_len(msg, strlen(s)); + if (!inval) rb_sys_fail(s); + iconv_fail(rb_eIconvInvalidEncoding, + Qnil, rb_ary_new3(2, to, from), NULL, s); + } + } + + if (toopt || fromopt) { + if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) { + fromopt = 0; + } + if (toopt && fromopt) { + rb_warning("encoding option isn't portable: %s, %s", + RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2); + } + else { + rb_warning("encoding option isn't portable: %s", + (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2); + } + } + + if (opt) { +#ifdef ICONV_SET_TRANSLITERATE + if (opt->transliterate != Qundef) { + int flag = RTEST(opt->transliterate); + rb_warning("encoding option isn't portable: transliterate"); + if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag)) + rb_sys_fail("ICONV_SET_TRANSLITERATE"); + } +#endif +#ifdef ICONV_SET_DISCARD_ILSEQ + if (opt->discard_ilseq != Qundef) { + int flag = RTEST(opt->discard_ilseq); + rb_warning("encoding option isn't portable: discard_ilseq"); + if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag)) + rb_sys_fail("ICONV_SET_DISCARD_ILSEQ"); + } +#endif + } + + return cd; +} + +static void +iconv_dfree(void *cd) +{ + iconv_close(VALUE2ICONV(cd)); +} + +#define ICONV_FREE iconv_dfree + +static VALUE +iconv_free(VALUE cd) +{ + if (cd && iconv_close(VALUE2ICONV(cd)) == -1) + rb_sys_fail("iconv_close"); + return Qnil; +} + +static VALUE +check_iconv(VALUE obj) +{ + Check_Type(obj, T_DATA); + if (RDATA(obj)->dfree != ICONV_FREE) { + rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj))); + } + return (VALUE)DATA_PTR(obj); +} + +static VALUE +iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen) +{ +#ifdef ICONV_INPTR_CONST +#define ICONV_INPTR_CAST +#else +#define ICONV_INPTR_CAST (char **) +#endif + size_t ret; + + errno = 0; + ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen); + if (ret == (size_t)-1) { + if (!*inlen) + return Qfalse; + switch (errno) { + case E2BIG: + /* try the left in next loop */ + break; + case EILSEQ: + return rb_eIconvIllegalSeq; + case EINVAL: + return rb_eIconvInvalidChar; + case 0: + return rb_eIconvBrokenLibrary; + default: + rb_sys_fail("iconv"); + } + } + else if (*inlen > 0) { + /* something goes wrong */ + return rb_eIconvIllegalSeq; + } + else if (ret) { + return Qnil; /* conversion */ + } + return Qfalse; +} + +#define FAILED_MAXLEN 16 + +static VALUE +iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed) +{ + rb_call_super(1, &mesg); + rb_ivar_set(error, rb_success, success); + rb_ivar_set(error, rb_failed, failed); + return error; +} + +static VALUE +iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg) +{ + VALUE args[3]; + + if (mesg && *mesg) { + args[0] = rb_str_new2(mesg); + } + else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) { + args[0] = rb_inspect(failed); + } + else { + args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN)); + rb_str_cat2(args[0], "..."); + } + args[1] = success; + args[2] = failed; + if (env) { + args[1] = env->append(rb_obj_dup(env->ret), success); + if (env->argc > 0) { + *(env->argv) = failed; + args[2] = rb_ary_new4(env->argc, env->argv); + } + } + error = rb_class_new_instance(3, args, error); + if (!rb_block_given_p()) rb_exc_raise(error); + rb_set_errinfo(error); + return rb_yield(failed); +} + +static VALUE +rb_str_derive(VALUE str, const char* ptr, int len) +{ + VALUE ret; + + if (NIL_P(str)) + return rb_str_new(ptr, len); + if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len) + ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len); + else + ret = rb_str_new(ptr, len); + OBJ_INFECT(ret, str); + return ret; +} + +static VALUE +iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env) +{ + VALUE ret = Qfalse; + VALUE error = Qfalse; + VALUE rescue; + const char *inptr, *instart; + size_t inlen; + /* I believe ONE CHARACTER never exceed this. */ + char buffer[BUFSIZ]; + char *outptr; + size_t outlen; + + if (cd == (iconv_t)-1) + rb_raise(rb_eArgError, "closed iconv"); + + if (NIL_P(str)) { + /* Reset output pointer or something. */ + inptr = ""; + inlen = 0; + outptr = buffer; + outlen = sizeof(buffer); + error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen); + if (RTEST(error)) { + unsigned int i; + rescue = iconv_fail(error, Qnil, Qnil, env, 0); + if (TYPE(rescue) == T_ARRAY) { + str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil; + } + if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) { + char c = i; + str = rb_str_new(&c, 1); + } + else if (!NIL_P(str)) { + StringValue(str); + } + } + + inptr = NULL; + length = 0; + } + else { + int slen; + + StringValue(str); + slen = RSTRING_LEN(str); + inptr = RSTRING_PTR(str); + + inptr += start; + if (length < 0 || length > start + slen) + length = slen - start; + } + instart = inptr; + inlen = length; + + do { + char errmsg[50]; + const char *tmpstart = inptr; + outptr = buffer; + outlen = sizeof(buffer); + + errmsg[0] = 0; + error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen); + + if (0 <= outlen && outlen <= sizeof(buffer)) { + outlen = sizeof(buffer) - outlen; + if (NIL_P(error) || /* something converted */ + outlen > inptr - tmpstart || /* input can't contain output */ + (outlen < inptr - tmpstart && inlen > 0) || /* something skipped */ + memcmp(buffer, tmpstart, outlen)) /* something differs */ + { + if (NIL_P(str)) { + ret = rb_str_new(buffer, outlen); + if (toidx >= 0) rb_enc_associate_index(ret, toidx); + } + else { + if (ret) { + ret = rb_str_buf_cat(ret, instart, tmpstart - instart); + } + else { + ret = rb_str_new(instart, tmpstart - instart); + if (toidx >= 0) rb_enc_associate_index(ret, toidx); + OBJ_INFECT(ret, str); + } + ret = rb_str_buf_cat(ret, buffer, outlen); + instart = inptr; + } + } + else if (!inlen) { + inptr = tmpstart + outlen; + } + } + else { + /* Some iconv() have a bug, return *outlen out of range */ + sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen)); + error = rb_eIconvOutOfRange; + } + + if (RTEST(error)) { + long len = 0; + + if (!ret) { + ret = rb_str_derive(str, instart, inptr - instart); + if (toidx >= 0) rb_enc_associate_index(ret, toidx); + } + else if (inptr > instart) { + rb_str_cat(ret, instart, inptr - instart); + } + str = rb_str_derive(str, inptr, inlen); + rescue = iconv_fail(error, ret, str, env, errmsg); + if (TYPE(rescue) == T_ARRAY) { + if ((len = RARRAY_LEN(rescue)) > 0) + rb_str_concat(ret, RARRAY_PTR(rescue)[0]); + if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) { + StringValue(str); + inlen = length = RSTRING_LEN(str); + instart = inptr = RSTRING_PTR(str); + continue; + } + } + else if (!NIL_P(rescue)) { + rb_str_concat(ret, rescue); + } + break; + } + } while (inlen > 0); + + if (!ret) { + ret = rb_str_derive(str, instart, inptr - instart); + if (toidx >= 0) rb_enc_associate_index(ret, toidx); + } + else if (inptr > instart) { + rb_str_cat(ret, instart, inptr - instart); + } + return ret; +} + +static VALUE +iconv_s_allocate(VALUE klass) +{ + return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0); +} + +static VALUE +get_iconv_opt_i(VALUE i, VALUE arg) +{ + struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg; + VALUE name, val; + + (void)opt; + i = rb_Array(i); + name = rb_ary_entry(i, 0); + val = rb_ary_entry(i, 1); + do { + if (SYMBOL_P(name)) { + ID id = SYM2ID(name); + if (id == id_transliterate) { +#ifdef ICONV_SET_TRANSLITERATE + opt->transliterate = val; +#else + rb_notimplement(); +#endif + break; + } + if (id == id_discard_ilseq) { +#ifdef ICONV_SET_DISCARD_ILSEQ + opt->discard_ilseq = val; +#else + rb_notimplement(); +#endif + break; + } + } + else { + const char *s = StringValueCStr(name); + if (strcmp(s, "transliterate") == 0) { +#ifdef ICONV_SET_TRANSLITERATE + opt->transliterate = val; +#else + rb_notimplement(); +#endif + break; + } + if (strcmp(s, "discard_ilseq") == 0) { +#ifdef ICONV_SET_DISCARD_ILSEQ + opt->discard_ilseq = val; +#else + rb_notimplement(); +#endif + break; + } + } + name = rb_inspect(name); + rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name)); + } while (0); + return Qnil; +} + +static void +get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options) +{ + opt->transliterate = Qundef; + opt->discard_ilseq = Qundef; + if (!NIL_P(options)) { + rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt); + } +} + +#define iconv_ctl(self, func, val) (\ + iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \ + rb_sys_fail(#func) : (void)0) + +/* + * Document-method: new + * call-seq: Iconv.new(to, from, [options]) + * + * Creates new code converter from a coding-system designated with +from+ + * to another one designated with +to+. + * + * === Parameters + * + * +to+:: encoding name for destination + * +from+:: encoding name for source + * +options+:: options for converter + * + * === Exceptions + * + * TypeError:: if +to+ or +from+ aren't String + * InvalidEncoding:: if designated converter couldn't find out + * SystemCallError:: if iconv_open(3) fails + */ +static VALUE +iconv_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE to, from, options; + struct rb_iconv_opt_t opt; + int idx; + + rb_scan_args(argc, argv, "21", &to, &from, &options); + get_iconv_opt(&opt, options); + iconv_free(check_iconv(self)); + DATA_PTR(self) = NULL; + DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx)); + if (idx >= 0) ENCODING_SET(self, idx); + return self; +} + +/* + * Document-method: open + * call-seq: Iconv.open(to, from) { |iconv| ... } + * + * Equivalent to Iconv.new except that when it is called with a block, it + * yields with the new instance and closes it, and returns the result which + * returned from the block. + */ +static VALUE +iconv_s_open(int argc, VALUE *argv, VALUE self) +{ + VALUE to, from, options, cd; + struct rb_iconv_opt_t opt; + int idx; + + rb_scan_args(argc, argv, "21", &to, &from, &options); + get_iconv_opt(&opt, options); + cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx)); + + self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd); + if (idx >= 0) ENCODING_SET(self, idx); + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self); + } + else { + return self; + } +} + +static VALUE +iconv_s_convert(struct iconv_env_t* env) +{ + VALUE last = 0; + + for (; env->argc > 0; --env->argc, ++env->argv) { + VALUE s = iconv_convert(env->cd, last = *(env->argv), + 0, -1, env->toidx, env); + env->append(env->ret, s); + } + + if (!NIL_P(last)) { + VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env); + if (RSTRING_LEN(s)) + env->append(env->ret, s); + } + + return env->ret; +} + +/* + * Document-method: Iconv::iconv + * call-seq: Iconv.iconv(to, from, *strs) + * + * Shorthand for + * Iconv.open(to, from) { |cd| + * (strs + [nil]).collect { |s| cd.iconv(s) } + * } + * + * === Parameters + * + * to, from:: see Iconv.new + * strs:: strings to be converted + * + * === Exceptions + * + * Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv. + */ +static VALUE +iconv_s_iconv(int argc, VALUE *argv, VALUE self) +{ + struct iconv_env_t arg; + + if (argc < 2) /* needs `to' and `from' arguments at least */ + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2); + + arg.argc = argc -= 2; + arg.argv = argv + 2; + arg.append = rb_ary_push; + arg.ret = rb_ary_new2(argc); + arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx); + return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd)); +} + +/* + * Document-method: Iconv::conv + * call-seq: Iconv.conv(to, from, str) + * + * Shorthand for + * Iconv.iconv(to, from, str).join + * See Iconv.iconv. + */ +static VALUE +iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str) +{ + struct iconv_env_t arg; + + arg.argc = 1; + arg.argv = &str; + arg.append = rb_str_append; + arg.ret = rb_str_new(0, 0); + arg.cd = iconv_create(to, from, NULL, &arg.toidx); + return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd)); +} + +/* + * Document-method: list + * call-seq: Iconv.list {|*aliases| ... } + * + * Iterates each alias sets. + */ + +#ifdef HAVE_ICONVLIST +struct iconv_name_list +{ + unsigned int namescount; + const char *const *names; + VALUE array; +}; + +static VALUE +list_iconv_i(VALUE ptr) +{ + struct iconv_name_list *p = (struct iconv_name_list *)ptr; + unsigned int i, namescount = p->namescount; + const char *const *names = p->names; + VALUE ary = rb_ary_new2(namescount); + + for (i = 0; i < namescount; i++) { + rb_ary_push(ary, rb_str_new2(names[i])); + } + if (p->array) { + return rb_ary_push(p->array, ary); + } + return rb_yield(ary); +} + +static int +list_iconv(unsigned int namescount, const char *const *names, void *data) +{ + int *state = data; + struct iconv_name_list list; + + list.namescount = namescount; + list.names = names; + list.array = ((VALUE *)data)[1]; + rb_protect(list_iconv_i, (VALUE)&list, state); + return *state; +} +#endif + +static VALUE +iconv_s_list(void) +{ +#ifdef HAVE_ICONVLIST + int state; + VALUE args[2]; + + args[1] = rb_block_given_p() ? 0 : rb_ary_new(); + iconvlist(list_iconv, args); + state = *(int *)args; + if (state) rb_jump_tag(state); + if (args[1]) return args[1]; +#elif defined(HAVE___ICONV_FREE_LIST) + char **list; + size_t sz, i; + VALUE ary; + + if (__iconv_get_list(&list, &sz)) return Qnil; + + ary = rb_ary_new2(sz); + for (i = 0; i < sz; i++) { + rb_ary_push(ary, rb_str_new2(list[i])); + } + __iconv_free_list(list, sz); + + if (!rb_block_given_p()) + return ary; + for (i = 0; i < RARRAY_LEN(ary); i++) { + rb_yield(RARRAY_PTR(ary)[i]); + } +#else + rb_notimplement(); +#endif + return Qnil; +} + +/* + * Document-method: close + * + * Finishes conversion. + * + * After calling this, calling Iconv#iconv will cause an exception, but + * multiple calls of #close are guaranteed to end successfully. + * + * Returns a string containing the byte sequence to change the output buffer to + * its initial shift state. + */ +static VALUE +iconv_init_state(VALUE self) +{ + iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self)); + DATA_PTR(self) = NULL; + return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL); +} + +static VALUE +iconv_finish(VALUE self) +{ + VALUE cd = check_iconv(self); + + if (!cd) return Qnil; + return rb_ensure(iconv_init_state, self, iconv_free, cd); +} + +/* + * Document-method: Iconv#iconv + * call-seq: iconv(str, start=0, length=-1) + * + * Converts string and returns the result. + * * If +str+ is a String, converts str[start, length] and returns the converted string. + * * If +str+ is +nil+, places converter itself into initial shift state and + * just returns a string containing the byte sequence to change the output + * buffer to its initial shift state. + * * Otherwise, raises an exception. + * + * === Parameters + * + * str:: string to be converted, or nil + * start:: starting offset + * length:: conversion length; nil or -1 means whole the string from start + * + * === Exceptions + * + * * IconvIllegalSequence + * * IconvInvalidCharacter + * * IconvOutOfRange + * + * === Examples + * + * See the Iconv documentation. + */ +static VALUE +iconv_iconv(int argc, VALUE *argv, VALUE self) +{ + VALUE str, n1, n2; + VALUE cd = check_iconv(self); + long start = 0, length = 0, slen = 0; + + rb_scan_args(argc, argv, "12", &str, &n1, &n2); + if (!NIL_P(str)) { + VALUE n = rb_str_length(StringValue(str)); + slen = NUM2LONG(n); + } + if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) { + if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) { + length = NIL_P(n2) ? -1 : NUM2LONG(n2); + } + } + if (start > 0 || length > 0) { + rb_encoding *enc = rb_enc_get(str); + const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); + const char *ps = s; + if (start > 0) { + start = (ps = rb_enc_nth(s, e, start, enc)) - s; + } + if (length > 0) { + length = rb_enc_nth(ps, e, length, enc) - ps; + } + } + + return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL); +} + +/* + * Document-method: conv + * call-seq: conv(str...) + * + * Equivalent to + * + * iconv(nil, str..., nil).join + */ +static VALUE +iconv_conv(int argc, VALUE *argv, VALUE self) +{ + iconv_t cd = VALUE2ICONV(check_iconv(self)); + VALUE str, s; + int toidx = ENCODING_GET(self); + + str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL); + if (argc > 0) { + do { + s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL); + if (RSTRING_LEN(s)) + rb_str_buf_append(str, s); + } while (--argc); + s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL); + if (RSTRING_LEN(s)) + rb_str_buf_append(str, s); + } + + return str; +} + +/* + * Document-method: trivial? + * call-seq: trivial? + * + * Returns trivial flag. + */ +static VALUE +iconv_trivialp(VALUE self) +{ +#ifdef ICONV_TRIVIALP + int trivial = 0; + iconv_ctl(self, ICONV_TRIVIALP, trivial); + if (trivial) return Qtrue; +#else + rb_notimplement(); +#endif + return Qfalse; +} + +/* + * Document-method: transliterate? + * call-seq: transliterate? + * + * Returns transliterate flag. + */ +static VALUE +iconv_get_transliterate(VALUE self) +{ +#ifdef ICONV_GET_TRANSLITERATE + int trans = 0; + iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans); + if (trans) return Qtrue; +#else + rb_notimplement(); +#endif + return Qfalse; +} + +/* + * Document-method: transliterate= + * call-seq: cd.transliterate = flag + * + * Sets transliterate flag. + */ +static VALUE +iconv_set_transliterate(VALUE self, VALUE transliterate) +{ +#ifdef ICONV_SET_TRANSLITERATE + int trans = RTEST(transliterate); + iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans); +#else + rb_notimplement(); +#endif + return self; +} + +/* + * Document-method: discard_ilseq? + * call-seq: discard_ilseq? + * + * Returns discard_ilseq flag. + */ +static VALUE +iconv_get_discard_ilseq(VALUE self) +{ +#ifdef ICONV_GET_DISCARD_ILSEQ + int dis = 0; + iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis); + if (dis) return Qtrue; +#else + rb_notimplement(); +#endif + return Qfalse; +} + +/* + * Document-method: discard_ilseq= + * call-seq: cd.discard_ilseq = flag + * + * Sets discard_ilseq flag. + */ +static VALUE +iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq) +{ +#ifdef ICONV_SET_DISCARD_ILSEQ + int dis = RTEST(discard_ilseq); + iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis); +#else + rb_notimplement(); +#endif + return self; +} + +/* + * Document-method: ctlmethods + * call-seq: Iconv.ctlmethods => array + * + * Returns available iconvctl() method list. + */ +static VALUE +iconv_s_ctlmethods(VALUE klass) +{ + VALUE ary = rb_ary_new(); +#ifdef ICONV_TRIVIALP + rb_ary_push(ary, ID2SYM(rb_intern("trivial?"))); +#endif +#ifdef ICONV_GET_TRANSLITERATE + rb_ary_push(ary, ID2SYM(rb_intern("transliterate?"))); +#endif +#ifdef ICONV_SET_TRANSLITERATE + rb_ary_push(ary, ID2SYM(rb_intern("transliterate="))); +#endif +#ifdef ICONV_GET_DISCARD_ILSEQ + rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?"))); +#endif +#ifdef ICONV_SET_DISCARD_ILSEQ + rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq="))); +#endif + return ary; +} + +/* + * Document-class: Iconv::Failure + * + * Base attributes for Iconv exceptions. + */ + +/* + * Document-method: success + * call-seq: success + * + * Returns string(s) translated successfully until the exception occurred. + * * In the case of failure occurred within Iconv.iconv, returned + * value is an array of strings translated successfully preceding + * failure and the last element is string on the way. + */ +static VALUE +iconv_failure_success(VALUE self) +{ + return rb_attr_get(self, rb_success); +} + +/* + * Document-method: failed + * call-seq: failed + * + * Returns substring of the original string passed to Iconv that starts at the + * character caused the exception. + */ +static VALUE +iconv_failure_failed(VALUE self) +{ + return rb_attr_get(self, rb_failed); +} + +/* + * Document-method: inspect + * call-seq: inspect + * + * Returns inspected string like as: #<_class_: _success_, _failed_> + */ +static VALUE +iconv_failure_inspect(VALUE self) +{ + const char *cname = rb_class2name(CLASS_OF(self)); + VALUE success = rb_attr_get(self, rb_success); + VALUE failed = rb_attr_get(self, rb_failed); + VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname); + str = rb_str_buf_cat(str, ": ", 2); + str = rb_str_buf_append(str, rb_inspect(success)); + str = rb_str_buf_cat(str, ", ", 2); + str = rb_str_buf_append(str, rb_inspect(failed)); + return rb_str_buf_cat(str, ">", 1); +} + +/* + * Document-class: Iconv::InvalidEncoding + * + * Requested coding-system is not available on this system. + */ + +/* + * Document-class: Iconv::IllegalSequence + * + * Input conversion stopped due to an input byte that does not belong to + * the input codeset, or the output codeset does not contain the + * character. + */ + +/* + * Document-class: Iconv::InvalidCharacter + * + * Input conversion stopped due to an incomplete character or shift + * sequence at the end of the input buffer. + */ + +/* + * Document-class: Iconv::OutOfRange + * + * Iconv library internal error. Must not occur. + */ + +/* + * Document-class: Iconv::BrokenLibrary + * + * Detected a bug of underlying iconv(3) libray. + * * returns an error without setting errno properly + */ + +void +Init_iconv(void) +{ + VALUE rb_cIconv = rb_define_class("Iconv", rb_cData); + + rb_define_alloc_func(rb_cIconv, iconv_s_allocate); + rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1); + rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1); + rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3); + rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0); + rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0); + rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1); + rb_define_method(rb_cIconv, "close", iconv_finish, 0); + rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1); + rb_define_method(rb_cIconv, "conv", iconv_conv, -1); + rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0); + rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0); + rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1); + rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0); + rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1); + + rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure"); + rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3); + rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0); + rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0); + rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0); + + rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError); + rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError); + rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError); + rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError); + rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError); + rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure); + rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure); + rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure); + rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure); + rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure); + + rb_success = rb_intern("success"); + rb_failed = rb_intern("failed"); + id_transliterate = rb_intern("transliterate"); + id_discard_ilseq = rb_intern("discard_ilseq"); + + rb_gc_register_address(&charset_map); + charset_map = rb_hash_new(); + rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0); +} + diff --git a/ext/iconv/mkwrapper.rb b/ext/iconv/mkwrapper.rb new file mode 100644 index 0000000..3471850 --- /dev/null +++ b/ext/iconv/mkwrapper.rb @@ -0,0 +1,53 @@ +#! /usr/bin/ruby +require 'rbconfig' +require 'optparse' + +# http://www.ctan.org/tex-archive/macros/texinfo/texinfo/intl/config.charset +# Fri, 30 May 2003 00:09:00 GMT' + +HEADER = <") + have_header(ioctl_h = "sys/ioctl.h") or ioctl_h = nil + fionread = %w[sys/ioctl.h sys/filio.h sys/socket.h].find do |h| + have_macro("FIONREAD", [h, ioctl_h].compact) + end + if fionread + $defs << "-DFIONREAD_HEADER=\"<#{fionread}>\"" + create_makefile(target) + end +else + if have_func("rb_w32_ioctlsocket", "ruby.h") + have_func("rb_w32_is_socket", "ruby.h") + create_makefile(target) + end +end diff --git a/ext/io/wait/lib/nonblock.rb b/ext/io/wait/lib/nonblock.rb new file mode 100644 index 0000000..2103fdf --- /dev/null +++ b/ext/io/wait/lib/nonblock.rb @@ -0,0 +1,23 @@ +require "fcntl" +class IO + def nonblock? + (fcntl(Fcntl::F_GETFL) & File::NONBLOCK) != 0 + end + + def nonblock=(nb) + f = fcntl(Fcntl::F_GETFL) + if nb + f |= File::NONBLOCK + else + f &= ~File::NONBLOCK + end + fcntl(Fcntl::F_SETFL, f) + end + + def nonblock(nb = true) + nb, self.nonblock = nonblock?, nb + yield + ensure + self.nonblock = nb + end +end if defined?(Fcntl::F_GETFL) diff --git a/ext/io/wait/wait.c b/ext/io/wait/wait.c new file mode 100644 index 0000000..e82b05c --- /dev/null +++ b/ext/io/wait/wait.c @@ -0,0 +1,141 @@ +/********************************************************************** + + io/wait.c - + + $Author: nobu $ + created at: Tue Aug 28 09:08:06 JST 2001 + + All the files in this distribution are covered under the Ruby's + license (see the file COPYING). + +**********************************************************************/ + +#include "ruby.h" +#include "ruby/io.h" + +#include +#if defined(HAVE_SYS_IOCTL_H) +#include +#endif +#if defined(FIONREAD_HEADER) +#include FIONREAD_HEADER +#endif + +#ifdef HAVE_RB_W32_IOCTLSOCKET +#define ioctl ioctlsocket +#define ioctl_arg u_long +#define ioctl_arg2num(i) ULONG2NUM(i) +#else +#define ioctl_arg int +#define ioctl_arg2num(i) INT2NUM(i) +#endif + +#ifdef HAVE_RB_W32_IS_SOCKET +#define FIONREAD_POSSIBLE_P(fd) rb_w32_is_socket(fd) +#else +#define FIONREAD_POSSIBLE_P(fd) ((void)(fd),Qtrue) +#endif + +static VALUE io_ready_p _((VALUE io)); +static VALUE io_wait _((int argc, VALUE *argv, VALUE io)); +void Init_wait _((void)); + +EXTERN struct timeval rb_time_interval _((VALUE time)); + +/* + * call-seq: + * io.ready? -> true, false or nil + * + * Returns non-nil if input available without blocking, or nil. + */ + +static VALUE +io_ready_p(VALUE io) +{ + rb_io_t *fptr; + ioctl_arg n; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + if (rb_io_read_pending(fptr)) return Qtrue; + if (!FIONREAD_POSSIBLE_P(fptr->fd)) return Qfalse; + if (ioctl(fptr->fd, FIONREAD, &n)) rb_sys_fail(0); + if (n > 0) return ioctl_arg2num(n); + return Qnil; +} + +struct wait_readable_arg { + rb_fdset_t fds; + struct timeval *timeout; +}; + +#ifdef HAVE_RB_FD_INIT +static VALUE +wait_readable(VALUE p) +{ + struct wait_readable_arg *arg = (struct wait_readable_arg *)p; + rb_fdset_t *fds = &arg->fds; + + return (VALUE)rb_thread_select(rb_fd_max(fds), rb_fd_ptr(fds), NULL, NULL, arg->timeout); +} +#endif + +/* + * call-seq: + * io.wait -> IO, true, false or nil + * io.wait(timeout) -> IO, true, false or nil + * + * Waits until input is available or times out and returns self or nil when + * EOF is reached. + */ + +static VALUE +io_wait(int argc, VALUE *argv, VALUE io) +{ + rb_io_t *fptr; + struct wait_readable_arg arg; + int fd, i; + ioctl_arg n; + VALUE timeout; + struct timeval timerec; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + rb_scan_args(argc, argv, "01", &timeout); + if (NIL_P(timeout)) { + arg.timeout = 0; + } + else { + timerec = rb_time_interval(timeout); + arg.timeout = &timerec; + } + + if (rb_io_read_pending(fptr)) return Qtrue; + if (!FIONREAD_POSSIBLE_P(fptr->fd)) return Qfalse; + fd = fptr->fd; + rb_fd_init(&arg.fds); + rb_fd_set(fd, &arg.fds); +#ifdef HAVE_RB_FD_INIT + i = (int)rb_ensure(wait_readable, (VALUE)&arg, + (VALUE (*)_((VALUE)))rb_fd_term, (VALUE)&arg.fds); +#else + i = rb_thread_select(fd + 1, rb_fd_ptr(&arg.fds), NULL, NULL, arg.timeout); +#endif + if (i < 0) + rb_sys_fail(0); + rb_io_check_closed(fptr); + if (ioctl(fptr->fd, FIONREAD, &n)) rb_sys_fail(0); + if (n > 0) return io; + return Qnil; +} + +/* + * IO wait methods + */ + +void +Init_wait() +{ + rb_define_method(rb_cIO, "ready?", io_ready_p, 0); + rb_define_method(rb_cIO, "wait", io_wait, -1); +} diff --git a/ext/mathn/complex/complex.c b/ext/mathn/complex/complex.c new file mode 100644 index 0000000..dce4949 --- /dev/null +++ b/ext/mathn/complex/complex.c @@ -0,0 +1,7 @@ +extern void nucomp_canonicalization(int); + +void +Init_complex(void) +{ + nucomp_canonicalization(1); +} diff --git a/ext/mathn/complex/extconf.rb b/ext/mathn/complex/extconf.rb new file mode 100644 index 0000000..d4d14ff --- /dev/null +++ b/ext/mathn/complex/extconf.rb @@ -0,0 +1,3 @@ +require "mkmf" + +create_makefile "mathn/complex" diff --git a/ext/mathn/rational/extconf.rb b/ext/mathn/rational/extconf.rb new file mode 100644 index 0000000..ba76306 --- /dev/null +++ b/ext/mathn/rational/extconf.rb @@ -0,0 +1,3 @@ +require "mkmf" + +create_makefile "mathn/rational" diff --git a/ext/mathn/rational/rational.c b/ext/mathn/rational/rational.c new file mode 100644 index 0000000..2ac5999 --- /dev/null +++ b/ext/mathn/rational/rational.c @@ -0,0 +1,7 @@ +extern void nurat_canonicalization(int); + +void +Init_rational(void) +{ + nurat_canonicalization(1); +} diff --git a/ext/pty/README b/ext/pty/README new file mode 100644 index 0000000..42c7d4f --- /dev/null +++ b/ext/pty/README @@ -0,0 +1,65 @@ +pty extension version 0.3 by A.ito + +1. Introduction + +This extension module adds ruby a functionality to execute an +arbitrary command through pseudo tty (pty). + +2. Install + +Follow the instruction below. + +(1) Execute + + ruby extconf.rb + + then Makefile is generated. + +(3) Do make; make install. + +3. What you can do + +This extension module defines a module named PTY, which contains +following module fungtions: + + getpty(command) + spawn(command) + + This function reserves a pty, executes command over the pty + and returns an array. The return value is an array with three + elements. The first element in the array is for reading and the + second for writing. The third element is the process ID of the + child process. If this function is called with an iterator block, + the array is passed to the block as block parameters, and the + function itself returns nil. + + When the child process is suspended or finished, an exception is + raised. If this function is called with an iterator block, + exception is raised only within the block. Child process + monitor is terminated on block exit. + + protect_signal + reset_signal + + These functions are obsolete in this version of pty. + +4. License + +(C) Copyright 1998 by Akinori Ito. + +This software may be redistributed freely for this purpose, in full +or in part, provided that this entire copyright notice is included +on any copies of this software and applications and derivations thereof. + +This software is provided on an "as is" basis, without warranty of any +kind, either expressed or implied, as to any matter including, but not +limited to warranty of fitness of purpose, or merchantability, or +results obtained from use of this software. + +5. Bug report + +Please feel free to send E-mail to + + aito@ei5sun.yz.yamagata-u.ac.jp + +for any bug report, opinion, contribution, etc. diff --git a/ext/pty/README.expect b/ext/pty/README.expect new file mode 100644 index 0000000..fddbb6f --- /dev/null +++ b/ext/pty/README.expect @@ -0,0 +1,22 @@ + README for expect + by A. Ito, 28 October, 1998 + + Expect library adds IO class a method called expect(), which +does similar act to tcl's expect extension. + +The usage of the method is: + + IO#expect(pattern,timeout=9999999) + +where `pattern' is an instance of String or Regexp and `timeout' +is Fixnum, which can be omitted. + When the method is called without block, it waits until the +input which matches the pattern is obtained from the IO or the time +specified as the timeout passes. When the pattern is obtained from the +IO, the method returns an array. The first element of the array is the +entire string obtained from the IO until the pattern matches. The +following elements indicates the specific pattern which matched to the +anchor in the regular expression. If the method ends because of +timeout, it returns nil. + When the method is called with block, the array is passed as +the block parameter. diff --git a/ext/pty/README.expect.ja b/ext/pty/README.expect.ja new file mode 100644 index 0000000..db84695 --- /dev/null +++ b/ext/pty/README.expect.ja @@ -0,0 +1,21 @@ + README for expect + by A. Ito, 28 October, 1998 + + Expecttcl expect +IO + + + + IO#expect(pattern,timeout=9999999) + +pattern String Regexp timeout Fixnum +timeout + +IO pattern + +pattern +2pattern + +nil + + diff --git a/ext/pty/README.ja b/ext/pty/README.ja new file mode 100644 index 0000000..5ae4fb0 --- /dev/null +++ b/ext/pty/README.ja @@ -0,0 +1,89 @@ +pty version 0.3 by A.ito + +1. + +tty (pty) + ruby + +2. + + + +(1) ruby extconf.rb + + Makefile + +(2) make; make install + +3. + +PTY + + + getpty(command) + spawn(command) + + ttytty + 3 + ttyIO + 2IO3 + ID + nil + + + SIGCHLD + + SIGCHLD PTY + (system() IO.popen()) + + protect_signal() + + + SIGCHLD + IO + + + + protect_signal + + + + PTYsystem() IO.popen() + + + PTY.spawn("command_foo") do |r,w| + ... + ... + PTY.protect_signal do + system "some other commands" + end + ... + end + + "some other commands" + + + reset_signal + + PTY + + +4. + + + + + + + + + + + +5. + + + + aito@ei5sun.yz.yamagata-u.ac.jp + + diff --git a/ext/pty/depend b/ext/pty/depend new file mode 100644 index 0000000..2249eb8 --- /dev/null +++ b/ext/pty/depend @@ -0,0 +1 @@ +pty.o: pty.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h $(hdrdir)/io.h diff --git a/ext/pty/expect_sample.rb b/ext/pty/expect_sample.rb new file mode 100644 index 0000000..2a2e29f --- /dev/null +++ b/ext/pty/expect_sample.rb @@ -0,0 +1,48 @@ +# +# sample program of expect.rb +# +# by A. Ito +# +# This program reports the latest version of ruby interpreter +# by connecting to ftp server at ruby-lang.org. +# +require 'pty' +require 'expect' + +fnames = [] +PTY.spawn("ftp ftp.ruby-lang.org") do |r_f,w_f,pid| + w_f.sync = true + + $expect_verbose = false + + if !ENV['USER'].nil? + username = ENV['USER'] + elsif !ENV['LOGNAME'].nil? + username = ENV['LOGNAME'] + else + username = 'guest' + end + + r_f.expect(/^(Name).*: |(word):|> /) do + w_f.puts($1 ? "ftp" : $2 ? "#{username}@" : "cd pub/ruby") + end + r_f.expect("> ") do + w_f.print "dir\n" + end + + r_f.expect(/[^\-]> /) do |output| + for x in output[0].split("\n") + if x =~ /(ruby.*?\.tar\.gz)/ then + fnames.push $1 + end + end + end + begin + w_f.print "quit\n" + rescue + end +end + +print "The latest ruby interpreter is " +print fnames.sort.pop +print "\n" diff --git a/ext/pty/extconf.rb b/ext/pty/extconf.rb new file mode 100644 index 0000000..c2fed08 --- /dev/null +++ b/ext/pty/extconf.rb @@ -0,0 +1,16 @@ +require 'mkmf' + +if /mswin|mingw|bccwin/ !~ RUBY_PLATFORM + have_header("sys/stropts.h") + have_func("setresuid") + have_header("libutil.h") + have_header("util.h") # OpenBSD openpty + have_header("pty.h") + have_library("util", "openpty") + if have_func("openpty") or + have_func("_getpty") or + have_func("ptsname") or + have_func("ioctl") + create_makefile('pty') + end +end diff --git a/ext/pty/lib/expect.rb b/ext/pty/lib/expect.rb new file mode 100644 index 0000000..08191b0 --- /dev/null +++ b/ext/pty/lib/expect.rb @@ -0,0 +1,36 @@ +$expect_verbose = false + +class IO + def expect(pat,timeout=9999999) + buf = '' + case pat + when String + e_pat = Regexp.new(Regexp.quote(pat)) + when Regexp + e_pat = pat + end + while true + if !IO.select([self],nil,nil,timeout) or eof? then + result = nil + break + end + c = getc.chr + buf << c + if $expect_verbose + STDOUT.print c + STDOUT.flush + end + if mat=e_pat.match(buf) then + result = [buf,*mat.to_a[1..-1]] + break + end + end + if block_given? then + yield result + else + return result + end + nil + end +end + diff --git a/ext/pty/pty.c b/ext/pty/pty.c new file mode 100644 index 0000000..7f5fd7d --- /dev/null +++ b/ext/pty/pty.c @@ -0,0 +1,521 @@ +#include "ruby/config.h" +#ifdef RUBY_EXTCONF_H +#include RUBY_EXTCONF_H +#endif +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_IOCTL_H +#include +#endif +#ifdef HAVE_LIBUTIL_H +#include +#endif +#ifdef HAVE_UTIL_H +#include +#endif +#ifdef HAVE_PTY_H +#include +#endif +#ifdef HAVE_SYS_WAIT_H +#include +#else +#define WIFSTOPPED(status) (((status) & 0xff) == 0x7f) +#endif +#include + +#include "ruby/ruby.h" +#include "ruby/io.h" +#include "ruby/util.h" + +#include +#ifdef HAVE_SYS_STROPTS_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#define DEVICELEN 16 + +#if !defined(HAVE_OPENPTY) +#if defined(__hpux) +static const +char MasterDevice[] = "/dev/ptym/pty%s", + SlaveDevice[] = "/dev/pty/tty%s", + *const deviceNo[] = { + "p0","p1","p2","p3","p4","p5","p6","p7", + "p8","p9","pa","pb","pc","pd","pe","pf", + "q0","q1","q2","q3","q4","q5","q6","q7", + "q8","q9","qa","qb","qc","qd","qe","qf", + "r0","r1","r2","r3","r4","r5","r6","r7", + "r8","r9","ra","rb","rc","rd","re","rf", + "s0","s1","s2","s3","s4","s5","s6","s7", + "s8","s9","sa","sb","sc","sd","se","sf", + "t0","t1","t2","t3","t4","t5","t6","t7", + "t8","t9","ta","tb","tc","td","te","tf", + "u0","u1","u2","u3","u4","u5","u6","u7", + "u8","u9","ua","ub","uc","ud","ue","uf", + "v0","v1","v2","v3","v4","v5","v6","v7", + "v8","v9","va","vb","vc","vd","ve","vf", + "w0","w1","w2","w3","w4","w5","w6","w7", + "w8","w9","wa","wb","wc","wd","we","wf", + 0, + }; +#elif defined(_IBMESA) /* AIX/ESA */ +static const +char MasterDevice[] = "/dev/ptyp%s", + SlaveDevice[] = "/dev/ttyp%s", + *const deviceNo[] = { +"00","01","02","03","04","05","06","07","08","09","0a","0b","0c","0d","0e","0f", +"10","11","12","13","14","15","16","17","18","19","1a","1b","1c","1d","1e","1f", +"20","21","22","23","24","25","26","27","28","29","2a","2b","2c","2d","2e","2f", +"30","31","32","33","34","35","36","37","38","39","3a","3b","3c","3d","3e","3f", +"40","41","42","43","44","45","46","47","48","49","4a","4b","4c","4d","4e","4f", +"50","51","52","53","54","55","56","57","58","59","5a","5b","5c","5d","5e","5f", +"60","61","62","63","64","65","66","67","68","69","6a","6b","6c","6d","6e","6f", +"70","71","72","73","74","75","76","77","78","79","7a","7b","7c","7d","7e","7f", +"80","81","82","83","84","85","86","87","88","89","8a","8b","8c","8d","8e","8f", +"90","91","92","93","94","95","96","97","98","99","9a","9b","9c","9d","9e","9f", +"a0","a1","a2","a3","a4","a5","a6","a7","a8","a9","aa","ab","ac","ad","ae","af", +"b0","b1","b2","b3","b4","b5","b6","b7","b8","b9","ba","bb","bc","bd","be","bf", +"c0","c1","c2","c3","c4","c5","c6","c7","c8","c9","ca","cb","cc","cd","ce","cf", +"d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","da","db","dc","dd","de","df", +"e0","e1","e2","e3","e4","e5","e6","e7","e8","e9","ea","eb","ec","ed","ee","ef", +"f0","f1","f2","f3","f4","f5","f6","f7","f8","f9","fa","fb","fc","fd","fe","ff", + }; +#elif !defined(HAVE_PTSNAME) +static const +char MasterDevice[] = "/dev/pty%s", + SlaveDevice[] = "/dev/tty%s", + *const deviceNo[] = { + "p0","p1","p2","p3","p4","p5","p6","p7", + "p8","p9","pa","pb","pc","pd","pe","pf", + "q0","q1","q2","q3","q4","q5","q6","q7", + "q8","q9","qa","qb","qc","qd","qe","qf", + "r0","r1","r2","r3","r4","r5","r6","r7", + "r8","r9","ra","rb","rc","rd","re","rf", + "s0","s1","s2","s3","s4","s5","s6","s7", + "s8","s9","sa","sb","sc","sd","se","sf", + 0, + }; +#endif +#endif /* !defined(HAVE_OPENPTY) */ + +#ifndef HAVE_SETEUID +# ifdef HAVE_SETREUID +# define seteuid(e) setreuid(-1, (e)) +# else /* NOT HAVE_SETREUID */ +# ifdef HAVE_SETRESUID +# define seteuid(e) setresuid(-1, (e), -1) +# else /* NOT HAVE_SETRESUID */ + /* I can't set euid. (;_;) */ +# endif /* HAVE_SETRESUID */ +# endif /* HAVE_SETREUID */ +#endif /* NO_SETEUID */ + +static VALUE eChildExited; + +static VALUE +echild_status(VALUE self) +{ + return rb_ivar_get(self, rb_intern("status")); +} + +struct pty_info { + int fd; + rb_pid_t child_pid; + VALUE thread; +}; + +static void +raise_from_wait(const char *state, const struct pty_info *info) +{ + char buf[1024]; + VALUE exc; + + snprintf(buf, sizeof(buf), "pty - %s: %ld", state, (long)info->child_pid); + exc = rb_exc_new2(eChildExited, buf); + rb_iv_set(exc, "status", rb_last_status_get()); + rb_funcall(info->thread, rb_intern("raise"), 1, exc); +} + +static VALUE +pty_syswait(void *arg) +{ + const struct pty_info *const info = arg; + rb_pid_t cpid; + int status; + + for (;;) { + cpid = rb_waitpid(info->child_pid, &status, WUNTRACED); + if (cpid == -1) return Qnil; + +#if defined(WIFSTOPPED) +#elif defined(IF_STOPPED) +#define WIFSTOPPED(status) IF_STOPPED(status) +#else +---->> Either IF_STOPPED or WIFSTOPPED is needed <<---- +#endif /* WIFSTOPPED | IF_STOPPED */ + if (WIFSTOPPED(status)) { /* suspend */ + raise_from_wait("stopped", info); + } + else if (kill(info->child_pid, 0) == 0) { + raise_from_wait("changed", info); + } + else { + raise_from_wait("exited", info); + return Qnil; + } + } +} + +static void getDevice(int*, int*, char [DEVICELEN]); + +struct exec_info { + int argc; + VALUE *argv; +}; + +static VALUE +pty_exec(VALUE v) +{ + struct exec_info *arg = (struct exec_info *)v; + return rb_f_exec(arg->argc, arg->argv); +} + +struct child_info { + int master, slave; + char *slavename; + int argc; + VALUE *argv; +}; + +static int +chfunc(void *data) +{ + struct child_info *carg = data; + int master = carg->master; + int slave = carg->slave; + int argc = carg->argc; + VALUE *argv = carg->argv; + + struct exec_info arg; + int status; + + /* + * Set free from process group and controlling terminal + */ +#ifdef HAVE_SETSID + (void) setsid(); +#else /* HAS_SETSID */ +# ifdef HAVE_SETPGRP +# ifdef SETGRP_VOID + if (setpgrp() == -1) + perror("setpgrp()"); +# else /* SETGRP_VOID */ + if (setpgrp(0, getpid()) == -1) + rb_sys_fail("setpgrp()"); + { + int i = open("/dev/tty", O_RDONLY); + if (i < 0) rb_sys_fail("/dev/tty"); + if (ioctl(i, TIOCNOTTY, (char *)0)) + perror("ioctl(TIOCNOTTY)"); + close(i); + } +# endif /* SETGRP_VOID */ +# endif /* HAVE_SETPGRP */ +#endif /* HAS_SETSID */ + + /* + * obtain new controlling terminal + */ +#if defined(TIOCSCTTY) + close(master); + (void) ioctl(slave, TIOCSCTTY, (char *)0); + /* errors ignored for sun */ +#else + close(slave); + slave = open(carg->slavename, O_RDWR); + if (slave < 0) { + perror("open: pty slave"); + _exit(1); + } + close(master); +#endif + write(slave, "", 1); + dup2(slave,0); + dup2(slave,1); + dup2(slave,2); + close(slave); +#if defined(HAVE_SETEUID) || defined(HAVE_SETREUID) || defined(HAVE_SETRESUID) + seteuid(getuid()); +#endif + + arg.argc = argc; + arg.argv = argv; + rb_protect(pty_exec, (VALUE)&arg, &status); + sleep(1); + _exit(1); +} + +static void +establishShell(int argc, VALUE *argv, struct pty_info *info, + char SlaveName[DEVICELEN]) +{ + int master,slave; + rb_pid_t pid; + char *p, tmp, *getenv(); + struct passwd *pwent; + VALUE v; + struct child_info carg; + + if (argc == 0) { + const char *shellname; + + if ((p = getenv("SHELL")) != NULL) { + shellname = p; + } + else { + pwent = getpwuid(getuid()); + if (pwent && pwent->pw_shell) + shellname = pwent->pw_shell; + else + shellname = "/bin/sh"; + } + v = rb_str_new2(shellname); + argc = 1; + argv = &v; + } + + getDevice(&master, &slave, SlaveName); + + carg.master = master; + carg.slave = slave; + carg.slavename = SlaveName; + carg.argc = argc; + carg.argv = argv; + pid = rb_fork(0, chfunc, &carg, Qnil); + + info->thread = rb_thread_current(); + if (pid < 0) { + close(master); + close(slave); + rb_sys_fail("fork failed"); + } + + read(master, &tmp, 1); + close(slave); + + info->child_pid = pid; + info->fd = master; +} + +static VALUE +pty_finalize_syswait(struct pty_info *info) +{ + rb_thread_kill(info->thread); + rb_funcall(info->thread, rb_intern("value"), 0); + rb_detach_process(info->child_pid); + return Qnil; +} + +static int +get_device_once(int *master, int *slave, char SlaveName[DEVICELEN], int fail) +{ +#if defined HAVE_OPENPTY +/* + * Use openpty(3) of 4.3BSD Reno and later, + * or the same interface function. + */ + if (openpty(master, slave, SlaveName, + (struct termios *)0, (struct winsize *)0) == -1) { + if (!fail) return -1; + rb_raise(rb_eRuntimeError, "openpty() failed"); + } + + return 0; +#elif defined HAVE__GETPTY + char *name; + + if (!(name = _getpty(master, O_RDWR, 0622, 0))) { + if (!fail) return -1; + rb_raise(rb_eRuntimeError, "_getpty() failed"); + } + + *slave = open(name, O_RDWR); + strlcpy(SlaveName, name, sizeof SlaveName); + + return 0; +#else /* HAVE__GETPTY */ + int i,j; + +#ifdef HAVE_PTSNAME + char *pn; + void (*s)(); + + extern char *ptsname(int); + extern int unlockpt(int); + extern int grantpt(int); + + if((i = open("/dev/ptmx", O_RDWR, 0)) != -1) { + s = signal(SIGCHLD, SIG_DFL); + if(grantpt(i) != -1) { + signal(SIGCHLD, s); + if(unlockpt(i) != -1) { + if((pn = ptsname(i)) != NULL) { + if((j = open(pn, O_RDWR, 0)) != -1) { +#if defined I_PUSH && !defined linux + if(ioctl(j, I_PUSH, "ptem") != -1) { + if(ioctl(j, I_PUSH, "ldterm") != -1) { + ioctl(j, I_PUSH, "ttcompat"); +#endif + *master = i; + *slave = j; + strlcpy(SlaveName, pn, sizeof SlaveName); + return 0; +#if defined I_PUSH && !defined linux + } + } +#endif + } + } + } + } + close(i); + } + if (!fail) rb_raise(rb_eRuntimeError, "can't get Master/Slave device"); + return -1; +#else + const char *const *p; + char MasterName[DEVICELEN]; + + for (p = deviceNo; *p != NULL; p++) { + snprintf(MasterName, sizeof MasterName, MasterDevice, *p); + if ((i = open(MasterName,O_RDWR,0)) >= 0) { + *master = i; + snprintf(SlaveName, sizeof SlaveName, SlaveDevice, *p); + if ((j = open(SlaveName,O_RDWR,0)) >= 0) { + *slave = j; + chown(SlaveName, getuid(), getgid()); + chmod(SlaveName, 0622); + return 0; + } + close(i); + } + } + if (fail) rb_raise(rb_eRuntimeError, "can't get %s", SlaveName); + return -1; +#endif +#endif +} + +static void +getDevice(int *master, int *slave, char SlaveName[DEVICELEN]) +{ + if (get_device_once(master, slave, SlaveName, 0)) { + rb_gc(); + get_device_once(master, slave, SlaveName, 1); + } + return; +} + +/* + * call-seq: + * PTY.spawn(command...) {|r, w, pid| ... } => nil + * PTY.spawn(command...) => r, w, pid + * PTY.getpty(command...) {|r, w, pid| ... } => nil + * PTY.getpty(command...) => r, w, pid + * + * spawns the specified command on a newly allocated pty. + * + * The command's controlling tty is set to the slave device of the pty. + * Also its standard input/output/error is redirected to the slave device. + * + * PTY.spawn returns two IO objects and PID. + * PID is the process ID of the command. + * The two IO objects are connected to the master device of the pty. + * The first IO object is opened as read mode and + * The second is opened as write mode. + * + * If a block is given, two IO objects and PID is yielded. + * + */ +static VALUE +pty_getpty(int argc, VALUE *argv, VALUE self) +{ + VALUE res; + struct pty_info info; + struct pty_info thinfo; + rb_io_t *wfptr,*rfptr; + VALUE rport = rb_obj_alloc(rb_cFile); + VALUE wport = rb_obj_alloc(rb_cFile); + char SlaveName[DEVICELEN]; + + MakeOpenFile(rport, rfptr); + MakeOpenFile(wport, wfptr); + + establishShell(argc, argv, &info, SlaveName); + + rfptr->mode = rb_io_mode_flags("r"); + rfptr->fd = info.fd; + rfptr->pathv = rb_obj_freeze(rb_str_new_cstr(SlaveName)); + + wfptr->mode = rb_io_mode_flags("w") | FMODE_SYNC; + wfptr->fd = dup(info.fd); + if (wfptr->fd == -1) + rb_sys_fail("dup()"); + wfptr->pathv = rfptr->pathv; + + res = rb_ary_new2(3); + rb_ary_store(res,0,(VALUE)rport); + rb_ary_store(res,1,(VALUE)wport); + rb_ary_store(res,2,PIDT2NUM(info.child_pid)); + + thinfo.thread = rb_thread_create(pty_syswait, (void*)&info); + thinfo.child_pid = info.child_pid; + rb_thread_schedule(); + + if (rb_block_given_p()) { + rb_ensure(rb_yield, res, pty_finalize_syswait, (VALUE)&thinfo); + return Qnil; + } + return res; +} + +/* ruby function: protect_signal - obsolete */ +static VALUE +pty_protect(VALUE self) +{ + rb_warn("PTY::protect_signal is no longer needed"); + rb_yield(Qnil); + return self; +} + +/* ruby function: reset_signal - obsolete */ +static VALUE +pty_reset_signal(VALUE self) +{ + rb_warn("PTY::reset_signal is no longer needed"); + return self; +} + +static VALUE cPTY; + +void +Init_pty() +{ + cPTY = rb_define_module("PTY"); + rb_define_module_function(cPTY,"getpty",pty_getpty,-1); + rb_define_module_function(cPTY,"spawn",pty_getpty,-1); + rb_define_module_function(cPTY,"protect_signal",pty_protect,0); + rb_define_module_function(cPTY,"reset_signal",pty_reset_signal,0); + + eChildExited = rb_define_class_under(cPTY,"ChildExited",rb_eRuntimeError); + rb_define_method(eChildExited,"status",echild_status,0); +} diff --git a/ext/pty/script.rb b/ext/pty/script.rb new file mode 100644 index 0000000..dbb9331 --- /dev/null +++ b/ext/pty/script.rb @@ -0,0 +1,37 @@ +require 'pty' + +if ARGV.size == 0 then + ofile = "typescript" +else + ofile = ARGV[0] +end + +logfile = File.open(ofile,"a") + +system "stty -echo raw lnext ^_" + +PTY.spawn("/bin/csh") do |r_pty,w_pty,pid| + + Thread.new do + while true + w_pty.print STDIN.getc.chr + w_pty.flush + end + end + + begin + while true + c = r_pty.sysread(512) + break if c.nil? + print c + STDOUT.flush + logfile.print c + end + rescue + # print $@,':',$!,"\n" + logfile.close + end +end + +system "stty echo -raw lnext ^v" + diff --git a/ext/pty/shl.rb b/ext/pty/shl.rb new file mode 100644 index 0000000..cdaf8d7 --- /dev/null +++ b/ext/pty/shl.rb @@ -0,0 +1,92 @@ +# +# old-fashioned 'shl' like program +# by A. Ito +# +# commands: +# c creates new shell +# C-z suspends shell +# p lists all shell +# 0,1,... choose shell +# q quit + +require 'pty' + +$shells = [] +$n_shells = 0 + +$r_pty = nil +$w_pty = nil + +def writer + system "stty -echo raw" + begin + while true + c = STDIN.getc + if c == 26 then # C-z + $reader.raise(nil) + return 'Suspend' + end + $w_pty.print c.chr + $w_pty.flush + end + rescue + $reader.raise(nil) + return 'Exit' + ensure + system "stty echo -raw" + end +end + +$reader = Thread.new { + while true + begin + next if $r_pty.nil? + c = $r_pty.getc + if c.nil? then + Thread.stop + end + print c.chr + STDOUT.flush + rescue + Thread.stop + end + end +} + +# $reader.raise(nil) + + +while true + print ">> " + STDOUT.flush + case gets + when /^c/i + $shells[$n_shells] = PTY.spawn("/bin/csh") + $r_pty,$w_pty = $shells[$n_shells] + $n_shells += 1 + $reader.run + if writer == 'Exit' + $n_shells -= 1 + $shells[$n_shells] = nil + end + when /^p/i + for i in 0..$n_shells + unless $shells[i].nil? + print i,"\n" + end + end + when /^([0-9]+)/ + n = $1.to_i + if $shells[n].nil? + print "\##{i} doesn't exist\n" + else + $r_pty,$w_pty = $shells[n] + $reader.run + if writer == 'Exit' then + $shells[n] = nil + end + end + when /^q/i + exit + end +end diff --git a/ext/purelib.rb b/ext/purelib.rb new file mode 100644 index 0000000..dbe514c --- /dev/null +++ b/ext/purelib.rb @@ -0,0 +1,10 @@ +nul = nil +$:.each_with_index {|path, index| + if /\A(?:\.\/)*-\z/ =~ path + nul = index + break + end +} +if nul + $:[nul..-1] = ["."] +end diff --git a/ext/racc/cparse/README b/ext/racc/cparse/README new file mode 100644 index 0000000..80b4dce --- /dev/null +++ b/ext/racc/cparse/README @@ -0,0 +1,10 @@ +Racc Runtime README +=================== + +This directory contains a runtime library of +Racc parser generator. If you want to generate +your own parser, you must get Racc full package. +Get it from: + + http://raa.ruby-lang.org/list.rhtml?name=racc + diff --git a/ext/racc/cparse/cparse.c b/ext/racc/cparse/cparse.c new file mode 100644 index 0000000..d085158 --- /dev/null +++ b/ext/racc/cparse/cparse.c @@ -0,0 +1,824 @@ +/* + + cparse.c -- Racc Runtime Core + + Copyright (c) 1999-2006 Minero Aoki + + This library is free software. + You can distribute/modify this program under the same terms of ruby. + + $originalId: cparse.c,v 1.8 2006/07/06 11:39:46 aamine Exp $ + +*/ + +#include "ruby/ruby.h" + +/* ----------------------------------------------------------------------- + Important Constants +----------------------------------------------------------------------- */ + +#define RACC_VERSION "1.4.5" + +#define DEFAULT_TOKEN -1 +#define ERROR_TOKEN 1 +#define FINAL_TOKEN 0 + +#define vDEFAULT_TOKEN INT2FIX(DEFAULT_TOKEN) +#define vERROR_TOKEN INT2FIX(ERROR_TOKEN) +#define vFINAL_TOKEN INT2FIX(FINAL_TOKEN) + +/* ----------------------------------------------------------------------- + File Local Variables +----------------------------------------------------------------------- */ + +static VALUE RaccBug; +static VALUE CparseParams; + +static ID id_yydebug; +static ID id_nexttoken; +static ID id_onerror; +static ID id_noreduce; +static ID id_errstatus; + +static ID id_d_shift; +static ID id_d_reduce; +static ID id_d_accept; +static ID id_d_read_token; +static ID id_d_next_state; +static ID id_d_e_pop; + +/* ----------------------------------------------------------------------- + Utils +----------------------------------------------------------------------- */ + +/* For backward compatibility */ +#ifndef ID2SYM +# define ID2SYM(i) ULONG2NUM(i) +#endif +#ifndef SYM2ID +# define SYM2ID(v) ((ID)NUM2ULONG(v)) +#endif +#ifndef SYMBOL_P +# define SYMBOL_P(v) FIXNUM_P(v) +#endif +#ifndef LONG2NUM +# define LONG2NUM(i) INT2NUM(i) +#endif + +static ID value_to_id _((VALUE v)); +static inline long num_to_long _((VALUE n)); + +static ID +value_to_id(VALUE v) +{ + if (! SYMBOL_P(v)) { + rb_raise(rb_eTypeError, "not symbol"); + } + return SYM2ID(v); +} + +static inline long +num_to_long(VALUE n) +{ + return NUM2LONG(n); +} + +#define AREF(s, idx) \ + ((0 <= idx && idx < RARRAY_LEN(s)) ? RARRAY_PTR(s)[idx] : Qnil) + +/* ----------------------------------------------------------------------- + Parser Stack Interfaces +----------------------------------------------------------------------- */ + +static VALUE get_stack_tail _((VALUE stack, long len)); +static void cut_stack_tail _((VALUE stack, long len)); + +static VALUE +get_stack_tail(VALUE stack, long len) +{ + if (len < 0) return Qnil; /* system error */ + if (len > RARRAY_LEN(stack)) len = RARRAY_LEN(stack); + return rb_ary_new4(len, RARRAY_PTR(stack) + RARRAY_LEN(stack) - len); +} + +static void +cut_stack_tail(VALUE stack, long len) +{ + while (len > 0) { + rb_ary_pop(stack); + len--; + } +} + +#define STACK_INIT_LEN 64 +#define NEW_STACK() rb_ary_new2(STACK_INIT_LEN) +#define PUSH(s, i) rb_ary_store(s, RARRAY_LEN(s), i) +#define POP(s) rb_ary_pop(s) +#define LAST_I(s) \ + ((RARRAY_LEN(s) > 0) ? RARRAY_PTR(s)[RARRAY_LEN(s) - 1] : Qnil) +#define GET_TAIL(s, len) get_stack_tail(s, len) +#define CUT_TAIL(s, len) cut_stack_tail(s, len) + +/* ----------------------------------------------------------------------- + struct cparse_params +----------------------------------------------------------------------- */ + +struct cparse_params { + VALUE value_v; /* VALUE version of this struct */ + + VALUE parser; /* parser object */ + + int lex_is_iterator; + VALUE lexer; /* scanner object */ + ID lexmid; /* name of scanner method (must be an iterator) */ + + /* State transition tables (immutable) + Data structure is from Dragon Book 4.9 */ + /* action table */ + VALUE action_table; + VALUE action_check; + VALUE action_default; + VALUE action_pointer; + /* goto table */ + VALUE goto_table; + VALUE goto_check; + VALUE goto_default; + VALUE goto_pointer; + + long nt_base; /* NonTerminal BASE index */ + VALUE reduce_table; /* reduce data table */ + VALUE token_table; /* token conversion table */ + + /* parser stacks and parameters */ + VALUE state; + long curstate; + VALUE vstack; + VALUE tstack; + VALUE t; + long shift_n; + long reduce_n; + long ruleno; + + long errstatus; /* nonzero in error recovering mode */ + long nerr; /* number of error */ + + int use_result_var; + + VALUE retval; /* return value of parser routine */ + long fin; /* parse result status */ +#define CP_FIN_ACCEPT 1 +#define CP_FIN_EOT 2 +#define CP_FIN_CANTPOP 3 + + int debug; /* user level debug */ + int sys_debug; /* system level debug */ + + long i; /* table index */ +}; + +/* ----------------------------------------------------------------------- + Parser Main Routines +----------------------------------------------------------------------- */ + +static VALUE racc_cparse _((VALUE parser, VALUE arg, VALUE sysdebug)); +static VALUE racc_yyparse _((VALUE parser, VALUE lexer, VALUE lexmid, + VALUE arg, VALUE sysdebug)); + +static void call_lexer _((struct cparse_params *v)); +static VALUE lexer_i _((VALUE block_args, VALUE data, VALUE self)); + +static VALUE assert_array _((VALUE a)); +static long assert_integer _((VALUE n)); +static VALUE assert_hash _((VALUE h)); +static VALUE initialize_params _((VALUE vparams, VALUE parser, VALUE arg, + VALUE lexer, VALUE lexmid)); +static void cparse_params_mark _((void *ptr)); + +static void parse_main _((struct cparse_params *v, + VALUE tok, VALUE val, int resume)); +static void extract_user_token _((struct cparse_params *v, + VALUE block_args, VALUE *tok, VALUE *val)); +static void shift _((struct cparse_params* v, long act, VALUE tok, VALUE val)); +static int reduce _((struct cparse_params* v, long act)); +static VALUE reduce0 _((VALUE block_args, VALUE data, VALUE self)); + +#ifdef DEBUG +# define D_puts(msg) if (v->sys_debug) puts(msg) +# define D_printf(fmt,arg) if (v->sys_debug) printf(fmt,arg) +#else +# define D_puts(msg) +# define D_printf(fmt,arg) +#endif + +static VALUE +racc_cparse(VALUE parser, VALUE arg, VALUE sysdebug) +{ + volatile VALUE vparams; + struct cparse_params *v; + + vparams = Data_Make_Struct(CparseParams, struct cparse_params, + cparse_params_mark, -1, v); + D_puts("starting cparse"); + v->sys_debug = RTEST(sysdebug); + vparams = initialize_params(vparams, parser, arg, Qnil, Qnil); + v->lex_is_iterator = Qfalse; + parse_main(v, Qnil, Qnil, 0); + + return v->retval; +} + +static VALUE +racc_yyparse(VALUE parser, VALUE lexer, VALUE lexmid, VALUE arg, VALUE sysdebug) +{ + volatile VALUE vparams; + struct cparse_params *v; + + vparams = Data_Make_Struct(CparseParams, struct cparse_params, + cparse_params_mark, -1, v); + v->sys_debug = RTEST(sysdebug); + D_puts("start C yyparse"); + vparams = initialize_params(vparams, parser, arg, lexer, lexmid); + v->lex_is_iterator = Qtrue; + D_puts("params initialized"); + parse_main(v, Qnil, Qnil, 0); + call_lexer(v); + if (!v->fin) { + rb_raise(rb_eArgError, "%s() is finished before EndOfToken", + rb_id2name(v->lexmid)); + } + + return v->retval; +} + +#ifdef HAVE_RB_BLOCK_CALL +static void +call_lexer(struct cparse_params *v) +{ + rb_block_call(v->lexer, v->lexmid, 0, NULL, lexer_i, v->value_v); +} +#else +static VALUE +lexer_iter(VALUE data) +{ + struct cparse_params *v; + + Data_Get_Struct(data, struct cparse_params, v); + rb_funcall(v->lexer, v->lexmid, 0); + return Qnil; +} + +static void +call_lexer(struct cparse_params *v) +{ + rb_iterate(lexer_iter, v->value_v, lexer_i, v->value_v); +} +#endif + +static VALUE +lexer_i(VALUE block_args, VALUE data, VALUE self) +{ + struct cparse_params *v; + VALUE tok, val; + + Data_Get_Struct(data, struct cparse_params, v); + if (v->fin) + rb_raise(rb_eArgError, "extra token after EndOfToken"); + extract_user_token(v, block_args, &tok, &val); + parse_main(v, tok, val, 1); + if (v->fin && v->fin != CP_FIN_ACCEPT) + rb_iter_break(); + return Qnil; +} + +static VALUE +assert_array(VALUE a) +{ + Check_Type(a, T_ARRAY); + return a; +} + +static VALUE +assert_hash(VALUE h) +{ + Check_Type(h, T_HASH); + return h; +} + +static long +assert_integer(VALUE n) +{ + return NUM2LONG(n); +} + +static VALUE +initialize_params(VALUE vparams, VALUE parser, VALUE arg, VALUE lexer, VALUE lexmid) +{ + struct cparse_params *v; + + Data_Get_Struct(vparams, struct cparse_params, v); + v->value_v = vparams; + v->parser = parser; + v->lexer = lexer; + if (! NIL_P(lexmid)) + v->lexmid = value_to_id(lexmid); + + v->debug = RTEST(rb_ivar_get(parser, id_yydebug)); + + Check_Type(arg, T_ARRAY); + if (!(13 <= RARRAY_LEN(arg) && RARRAY_LEN(arg) <= 14)) + rb_raise(RaccBug, "[Racc Bug] wrong arg.size %ld", RARRAY_LEN(arg)); + v->action_table = assert_array (RARRAY_PTR(arg)[ 0]); + v->action_check = assert_array (RARRAY_PTR(arg)[ 1]); + v->action_default = assert_array (RARRAY_PTR(arg)[ 2]); + v->action_pointer = assert_array (RARRAY_PTR(arg)[ 3]); + v->goto_table = assert_array (RARRAY_PTR(arg)[ 4]); + v->goto_check = assert_array (RARRAY_PTR(arg)[ 5]); + v->goto_default = assert_array (RARRAY_PTR(arg)[ 6]); + v->goto_pointer = assert_array (RARRAY_PTR(arg)[ 7]); + v->nt_base = assert_integer(RARRAY_PTR(arg)[ 8]); + v->reduce_table = assert_array (RARRAY_PTR(arg)[ 9]); + v->token_table = assert_hash (RARRAY_PTR(arg)[10]); + v->shift_n = assert_integer(RARRAY_PTR(arg)[11]); + v->reduce_n = assert_integer(RARRAY_PTR(arg)[12]); + if (RARRAY_LEN(arg) > 13) { + v->use_result_var = RTEST(RARRAY_PTR(arg)[13]); + } + else { + v->use_result_var = Qtrue; + } + + v->tstack = v->debug ? NEW_STACK() : Qnil; + v->vstack = NEW_STACK(); + v->state = NEW_STACK(); + v->curstate = 0; + PUSH(v->state, INT2FIX(0)); + v->t = INT2FIX(FINAL_TOKEN + 1); /* must not init to FINAL_TOKEN */ + v->nerr = 0; + v->errstatus = 0; + rb_ivar_set(parser, id_errstatus, LONG2NUM(v->errstatus)); + + v->retval = Qnil; + v->fin = 0; + + v->lex_is_iterator = Qfalse; + + rb_iv_set(parser, "@vstack", v->vstack); + if (v->debug) { + rb_iv_set(parser, "@tstack", v->tstack); + } + else { + rb_iv_set(parser, "@tstack", Qnil); + } + + return vparams; +} + +static void +cparse_params_mark(void *ptr) +{ + struct cparse_params *v = (struct cparse_params*)ptr; + + rb_gc_mark(v->value_v); + rb_gc_mark(v->parser); + rb_gc_mark(v->lexer); + rb_gc_mark(v->action_table); + rb_gc_mark(v->action_check); + rb_gc_mark(v->action_default); + rb_gc_mark(v->action_pointer); + rb_gc_mark(v->goto_table); + rb_gc_mark(v->goto_check); + rb_gc_mark(v->goto_default); + rb_gc_mark(v->goto_pointer); + rb_gc_mark(v->reduce_table); + rb_gc_mark(v->token_table); + rb_gc_mark(v->state); + rb_gc_mark(v->vstack); + rb_gc_mark(v->tstack); + rb_gc_mark(v->t); + rb_gc_mark(v->retval); +} + +static void +extract_user_token(struct cparse_params *v, VALUE block_args, + VALUE *tok, VALUE *val) +{ + if (NIL_P(block_args)) { + /* EOF */ + *tok = Qfalse; + *val = rb_str_new("$", 1); + return; + } + + if (TYPE(block_args) != T_ARRAY) { + rb_raise(rb_eTypeError, + "%s() %s %s (must be Array[2])", + v->lex_is_iterator ? rb_id2name(v->lexmid) : "next_token", + v->lex_is_iterator ? "yielded" : "returned", + rb_class2name(CLASS_OF(block_args))); + } + if (RARRAY_LEN(block_args) != 2) { + rb_raise(rb_eArgError, + "%s() %s wrong size of array (%ld for 2)", + v->lex_is_iterator ? rb_id2name(v->lexmid) : "next_token", + v->lex_is_iterator ? "yielded" : "returned", + RARRAY_LEN(block_args)); + } + *tok = AREF(block_args, 0); + *val = AREF(block_args, 1); +} + +#define SHIFT(v,act,tok,val) shift(v,act,tok,val) +#define REDUCE(v,act) do {\ + switch (reduce(v,act)) { \ + case 0: /* normal */ \ + break; \ + case 1: /* yyerror */ \ + goto user_yyerror; \ + case 2: /* yyaccept */ \ + D_puts("u accept"); \ + goto accept; \ + default: \ + break; \ + } \ +} while (0) + +static void +parse_main(struct cparse_params *v, VALUE tok, VALUE val, int resume) +{ + long i; /* table index */ + long act; /* action type */ + VALUE act_value; /* action type, VALUE version */ + int read_next = 1; /* true if we need to read next token */ + VALUE tmp; + + if (resume) + goto resume; + + while (1) { + D_puts(""); + D_puts("---- enter new loop ----"); + D_puts(""); + + D_printf("(act) k1=%ld\n", v->curstate); + tmp = AREF(v->action_pointer, v->curstate); + if (NIL_P(tmp)) goto notfound; + D_puts("(act) pointer[k1] ok"); + i = NUM2LONG(tmp); + + D_printf("read_next=%d\n", read_next); + if (read_next && (v->t != vFINAL_TOKEN)) { + if (v->lex_is_iterator) { + D_puts("resuming..."); + if (v->fin) rb_raise(rb_eArgError, "token given after EOF"); + v->i = i; /* save i */ + return; + resume: + D_puts("resumed"); + i = v->i; /* load i */ + } + else { + D_puts("next_token"); + tmp = rb_funcall(v->parser, id_nexttoken, 0); + extract_user_token(v, tmp, &tok, &val); + } + /* convert token */ + v->t = rb_hash_aref(v->token_table, tok); + if (NIL_P(v->t)) { + v->t = vERROR_TOKEN; + } + D_printf("(act) t(k2)=%ld\n", NUM2LONG(v->t)); + if (v->debug) { + rb_funcall(v->parser, id_d_read_token, + 3, v->t, tok, val); + } + } + read_next = 0; + + i += NUM2LONG(v->t); + D_printf("(act) i=%ld\n", i); + if (i < 0) goto notfound; + + act_value = AREF(v->action_table, i); + if (NIL_P(act_value)) goto notfound; + act = NUM2LONG(act_value); + D_printf("(act) table[i]=%ld\n", act); + + tmp = AREF(v->action_check, i); + if (NIL_P(tmp)) goto notfound; + if (NUM2LONG(tmp) != v->curstate) goto notfound; + D_printf("(act) check[i]=%ld\n", NUM2LONG(tmp)); + + D_puts("(act) found"); + act_fixed: + D_printf("act=%ld\n", act); + goto handle_act; + + notfound: + D_puts("(act) not found: use default"); + act_value = AREF(v->action_default, v->curstate); + act = NUM2LONG(act_value); + goto act_fixed; + + + handle_act: + if (act > 0 && act < v->shift_n) { + D_puts("shift"); + if (v->errstatus > 0) { + v->errstatus--; + rb_ivar_set(v->parser, id_errstatus, LONG2NUM(v->errstatus)); + } + SHIFT(v, act, v->t, val); + read_next = 1; + } + else if (act < 0 && act > -(v->reduce_n)) { + D_puts("reduce"); + REDUCE(v, act); + } + else if (act == -(v->reduce_n)) { + goto error; + error_recovered: + ; /* goto label requires stmt */ + } + else if (act == v->shift_n) { + D_puts("accept"); + goto accept; + } + else { + rb_raise(RaccBug, "[Racc Bug] unknown act value %ld", act); + } + + if (v->debug) { + rb_funcall(v->parser, id_d_next_state, + 2, LONG2NUM(v->curstate), v->state); + } + } + /* not reach */ + + + accept: + if (v->debug) rb_funcall(v->parser, id_d_accept, 0); + v->retval = RARRAY_PTR(v->vstack)[0]; + v->fin = CP_FIN_ACCEPT; + return; + + + error: + D_printf("error detected, status=%ld\n", v->errstatus); + if (v->errstatus == 0) { + v->nerr++; + rb_funcall(v->parser, id_onerror, + 3, v->t, val, v->vstack); + } + user_yyerror: + if (v->errstatus == 3) { + if (v->t == vFINAL_TOKEN) { + v->retval = Qfalse; + v->fin = CP_FIN_EOT; + return; + } + read_next = 1; + } + v->errstatus = 3; + rb_ivar_set(v->parser, id_errstatus, LONG2NUM(v->errstatus)); + + /* check if we can shift/reduce error token */ + D_printf("(err) k1=%ld\n", v->curstate); + D_printf("(err) k2=%d (error)\n", ERROR_TOKEN); + while (1) { + tmp = AREF(v->action_pointer, v->curstate); + if (NIL_P(tmp)) goto error_pop; + D_puts("(err) pointer[k1] ok"); + + i = NUM2LONG(tmp) + ERROR_TOKEN; + D_printf("(err) i=%ld\n", i); + if (i < 0) goto error_pop; + + act_value = AREF(v->action_table, i); + if (NIL_P(act_value)) { + D_puts("(err) table[i] == nil"); + goto error_pop; + } + act = NUM2LONG(act_value); + D_printf("(err) table[i]=%ld\n", act); + + tmp = AREF(v->action_check, i); + if (NIL_P(tmp)) { + D_puts("(err) check[i] == nil"); + goto error_pop; + } + if (NUM2LONG(tmp) != v->curstate) { + D_puts("(err) check[i] != k1"); + goto error_pop; + } + + D_puts("(err) found: can handle error token"); + break; + + error_pop: + D_puts("(err) act not found: can't handle error token; pop"); + + if (RARRAY_LEN(v->state) <= 1) { + v->retval = Qnil; + v->fin = CP_FIN_CANTPOP; + return; + } + POP(v->state); + POP(v->vstack); + v->curstate = num_to_long(LAST_I(v->state)); + if (v->debug) { + POP(v->tstack); + rb_funcall(v->parser, id_d_e_pop, + 3, v->state, v->tstack, v->vstack); + } + } + + /* shift/reduce error token */ + if (act > 0 && act < v->shift_n) { + D_puts("e shift"); + SHIFT(v, act, ERROR_TOKEN, val); + } + else if (act < 0 && act > -(v->reduce_n)) { + D_puts("e reduce"); + REDUCE(v, act); + } + else if (act == v->shift_n) { + D_puts("e accept"); + goto accept; + } + else { + rb_raise(RaccBug, "[Racc Bug] unknown act value %ld", act); + } + goto error_recovered; +} + +static void +shift(struct cparse_params *v, long act, VALUE tok, VALUE val) +{ + PUSH(v->vstack, val); + if (v->debug) { + PUSH(v->tstack, tok); + rb_funcall(v->parser, id_d_shift, + 3, tok, v->tstack, v->vstack); + } + v->curstate = act; + PUSH(v->state, LONG2NUM(v->curstate)); +} + +static int +reduce(struct cparse_params *v, long act) +{ + VALUE code; + v->ruleno = -act * 3; + code = rb_catch("racc_jump", reduce0, v->value_v); + v->errstatus = num_to_long(rb_ivar_get(v->parser, id_errstatus)); + return NUM2INT(code); +} + +static VALUE +reduce0(VALUE val, VALUE data, VALUE self) +{ + struct cparse_params *v; + VALUE reduce_to, reduce_len, method_id; + long len; + ID mid; + VALUE tmp, tmp_t = Qundef, tmp_v = Qundef; + long i, k1, k2; + VALUE goto_state; + + Data_Get_Struct(data, struct cparse_params, v); + reduce_len = RARRAY_PTR(v->reduce_table)[v->ruleno]; + reduce_to = RARRAY_PTR(v->reduce_table)[v->ruleno+1]; + method_id = RARRAY_PTR(v->reduce_table)[v->ruleno+2]; + len = NUM2LONG(reduce_len); + mid = value_to_id(method_id); + + /* call action */ + if (len == 0) { + tmp = Qnil; + if (mid != id_noreduce) + tmp_v = rb_ary_new(); + if (v->debug) + tmp_t = rb_ary_new(); + } + else { + if (mid != id_noreduce) { + tmp_v = GET_TAIL(v->vstack, len); + tmp = RARRAY_PTR(tmp_v)[0]; + } + else { + tmp = RARRAY_PTR(v->vstack)[ RARRAY_LEN(v->vstack) - len ]; + } + CUT_TAIL(v->vstack, len); + if (v->debug) { + tmp_t = GET_TAIL(v->tstack, len); + CUT_TAIL(v->tstack, len); + } + CUT_TAIL(v->state, len); + } + if (mid != id_noreduce) { + if (v->use_result_var) { + tmp = rb_funcall(v->parser, mid, + 3, tmp_v, v->vstack, tmp); + } + else { + tmp = rb_funcall(v->parser, mid, + 2, tmp_v, v->vstack); + } + } + + /* then push result */ + PUSH(v->vstack, tmp); + if (v->debug) { + PUSH(v->tstack, reduce_to); + rb_funcall(v->parser, id_d_reduce, + 4, tmp_t, reduce_to, v->tstack, v->vstack); + } + + /* calculate transition state */ + if (RARRAY_LEN(v->state) == 0) + rb_raise(RaccBug, "state stack unexpectedly empty"); + k2 = num_to_long(LAST_I(v->state)); + k1 = num_to_long(reduce_to) - v->nt_base; + D_printf("(goto) k1=%ld\n", k1); + D_printf("(goto) k2=%ld\n", k2); + + tmp = AREF(v->goto_pointer, k1); + if (NIL_P(tmp)) goto notfound; + + i = NUM2LONG(tmp) + k2; + D_printf("(goto) i=%ld\n", i); + if (i < 0) goto notfound; + + goto_state = AREF(v->goto_table, i); + if (NIL_P(goto_state)) { + D_puts("(goto) table[i] == nil"); + goto notfound; + } + D_printf("(goto) table[i]=%ld (goto_state)\n", NUM2LONG(goto_state)); + + tmp = AREF(v->goto_check, i); + if (NIL_P(tmp)) { + D_puts("(goto) check[i] == nil"); + goto notfound; + } + if (tmp != LONG2NUM(k1)) { + D_puts("(goto) check[i] != table[i]"); + goto notfound; + } + D_printf("(goto) check[i]=%ld\n", NUM2LONG(tmp)); + + D_puts("(goto) found"); + transit: + PUSH(v->state, goto_state); + v->curstate = NUM2LONG(goto_state); + return INT2FIX(0); + + notfound: + D_puts("(goto) not found: use default"); + /* overwrite `goto-state' by default value */ + goto_state = AREF(v->goto_default, k1); + goto transit; +} + +/* ----------------------------------------------------------------------- + Ruby Interface +----------------------------------------------------------------------- */ + +void +Init_cparse(void) +{ + VALUE Racc, Parser; + ID id_racc = rb_intern("Racc"); + + if (rb_const_defined(rb_cObject, id_racc)) { + Racc = rb_const_get(rb_cObject, id_racc); + Parser = rb_const_get_at(Racc, rb_intern("Parser")); + } + else { + Racc = rb_define_module("Racc"); + Parser = rb_define_class_under(Racc, "Parser", rb_cObject); + } + rb_define_private_method(Parser, "_racc_do_parse_c", racc_cparse, 2); + rb_define_private_method(Parser, "_racc_yyparse_c", racc_yyparse, 4); + rb_define_const(Parser, "Racc_Runtime_Core_Version_C", + rb_str_new2(RACC_VERSION)); + rb_define_const(Parser, "Racc_Runtime_Core_Id_C", + rb_str_new2("$originalId: cparse.c,v 1.8 2006/07/06 11:39:46 aamine Exp $")); + + CparseParams = rb_define_class_under(Racc, "CparseParams", rb_cObject); + + RaccBug = rb_eRuntimeError; + + id_yydebug = rb_intern("@yydebug"); + id_nexttoken = rb_intern("next_token"); + id_onerror = rb_intern("on_error"); + id_noreduce = rb_intern("_reduce_none"); + id_errstatus = rb_intern("@racc_error_status"); + + id_d_shift = rb_intern("racc_shift"); + id_d_reduce = rb_intern("racc_reduce"); + id_d_accept = rb_intern("racc_accept"); + id_d_read_token = rb_intern("racc_read_token"); + id_d_next_state = rb_intern("racc_next_state"); + id_d_e_pop = rb_intern("racc_e_pop"); +} diff --git a/ext/racc/cparse/depend b/ext/racc/cparse/depend new file mode 100644 index 0000000..7b06a88 --- /dev/null +++ b/ext/racc/cparse/depend @@ -0,0 +1 @@ +cparse.o: cparse.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/racc/cparse/extconf.rb b/ext/racc/cparse/extconf.rb new file mode 100644 index 0000000..1089ecb --- /dev/null +++ b/ext/racc/cparse/extconf.rb @@ -0,0 +1,5 @@ +# $Id: extconf.rb 12501 2007-06-10 03:06:15Z nobu $ + +require 'mkmf' +have_func('rb_block_call', 'ruby/ruby.h') +create_makefile 'racc/cparse' diff --git a/ext/readline/README b/ext/readline/README new file mode 100644 index 0000000..57c51b5 --- /dev/null +++ b/ext/readline/README @@ -0,0 +1,10 @@ +The Readline module provides interface for GNU Readline. +This module defines a number of methods to facilitate completion +and accesses input history from the Ruby interpreter. +This module supported Edit Line(libedit) too. +libedit is compatible with GNU Readline. + +GNU Readline:: http://www.gnu.org/directory/readline.html +libedit:: http://www.thrysoee.dk/editline/ + +See RDoc for Readline module. diff --git a/ext/readline/README.ja b/ext/readline/README.ja new file mode 100644 index 0000000..3c6c6f3 --- /dev/null +++ b/ext/readline/README.ja @@ -0,0 +1,422 @@ +GNU Readline +GNU Readline Edit Line(libedit) + + +GNU Readline:: http://www.gnu.org/directory/readline.html +libedit:: http://www.thrysoee.dk/editline/ + +Readline.readline +GNU Readline Emacs + + require "readline" + while buf = Readline.readline("> ", true) + p buf + end + +() + Readline::HISTORY + + require "readline" + while buf = Readline.readline("> ", true) + p Readline::HISTORY.to_a + print("-> ", buf, "\n") + end + + NotImplementedError + + +$SAFE 4 SecurityError + +== Readline + +=== + +readline([prompt, [add_hist]]) -> String | nil + + prompt + + + add_hist true + + EOF(UNIX ^D) + nil + + IOError + 1. tty + 2. (isatty(2) errno EBADF ) + + + + + vi Emacs + Emacs + + + ^C ruby + 3 + + * ^CInterrupt: + + require "readline" + + stty_save = `stty -g`.chomp + begin + while buf = Readline.readline + p buf + end + rescue Interrupt + system("stty", stty_save) + exit + end + end + end + + * INT: + + require "readline" + + stty_save = `stty -g`.chomp + trap("INT") { system "stty", stty_save; exit } + + while buf = Readline.readline + p buf + end + + * ^C : + + require "readline" + + trap("INT", "SIG_IGN") + + while buf = Readline.readline + p buf + end + + Readline::HISTORY + + + require "readline" + + while buf = Readline.readline("> ", true) + # p Readline::HISTORY.to_a + Readline::HISTORY.pop if /^\s*$/ =~ buf + + begin + if Readline::HISTORY[Readline::HISTORY.length-2] == buf + Readline::HISTORY.pop + end + rescue IndexError + end + + # p Readline::HISTORY.to_a + print "-> ", buf, "\n" + end + + $SAFE 4 SecurityError + +=== + +Readline.input = input + + Readline.readline File input + + + $SAFE 4 SecurityError + +Readline.output = output + + Readline.readline File + output + + $SAFE 4 SecurityError + +Readline.completion_proc = proc + + Proc proc + proc + 1. call + call ArgumentError + 2. (1) + 3. + + 1:/var/lib /v + proc /v + + Readline.completer_word_break_characters + + proc + + $SAFE 4 SecurityError + +Readline.completion_proc -> proc + + Proc proc + + + $SAFE 4 SecurityError + +Readline.completion_case_fold = bool + + + bool bool + + $SAFE 4 SecurityError + +Readline.completion_case_fold -> bool + + + bool bool + + Readline.completion_case_fold= + + + require "readline" + + Readline.completion_case_fold = "This is a String." + p Readline.completion_case_fold # => "This is a String." + + $SAFE 4 SecurityError + +Readline.vi_editing_mode -> nil + + vi + vi GNU Readline + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.vi_editing_mode? -> bool + + vi true false + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.emacs_editing_mode -> nil + + Emacs + Emacs + Emacs GNU Readline + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.emacs_editing_mode? -> bool + + Emacs true false + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.completion_append_character = char + + char + " " + + + : + + require "readline" + + Readline.readline("> ", true) + Readline.completion_append_character = " " + + : + + > + "/var/li" + + > /var/li + TAB + + > /var/lib + "b" " " "/usr" + + > /var/lib /usr + + 1 + "string""s" + + require "readline" + + Readline.completion_append_character = "string" + p Readline.completion_append_character # => "s" + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.completion_append_character -> char + + + (" ") + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.basic_word_break_characters = string + + + string + + GNU Readline Bash + " \t\n\"\\'`@$><=;|&{(" () + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.basic_word_break_characters -> string + + + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.completer_word_break_characters = string + + + string + Readline.basic_word_break_characters= + GNU Readline rl_complete_internal + + GNU Readline + Readline.basic_word_break_characters + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.completer_word_break_characters -> string + + + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.basic_quote_characters = string + + + string + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.basic_quote_characters -> string + + + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.completer_quote_characters = string + + + string + Readline.completer_word_break_characters= + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.completer_quote_characters -> string + + + + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.filename_quote_characters = string + + + string + + GNU Readline nil + + NotImplementedError + + $SAFE 4 SecurityError + +Readline.filename_quote_characters -> string + + + + + NotImplementedError + + $SAFE 4 SecurityError + +=== + +HISTORY + + HISTORY + Enumerable extend + + HISTORY[4] 5 + + require "readline" + + Readline::HISTORY.push("a", "b", "c", "d", "e") + p Readline::HISTORY[4] # => "e" + + + * HISTORY.to_s -> "HISTORY" + * HISTORY[index] -> string + * HISTORY[index] = string + * HISTORY.push(string[, string, ...]) -> self + * HISTORY << string -> self + * HISTORY.pop -> string + * HISTORY.shift -> string + * HISTORY.each -> Enumerator + * HISTORY.each { |i| } -> [string] + * HISTORY.length -> Integer + * HISTORY.empty? -> true or false + * HISTORY.delete_at(index) -> string + * HISTORY.clear -> self + + NotImplementedError + + * HISTORY[index] = string + * HISTORY.pop -> string + * HISTORY.shift -> string + * HISTORY.delete_at(index) -> string + * HISTORY.clear -> self + + $SAFE 4 SecurityError + + +FILENAME_COMPLETION_PROC + + call + + Readline.completion_proc= + + +USERNAME_COMPLETION_PROC + + call + + Readline.completion_proc= + + +VERSION + + GNU Readline libedit diff --git a/ext/readline/depend b/ext/readline/depend new file mode 100644 index 0000000..6570c5a --- /dev/null +++ b/ext/readline/depend @@ -0,0 +1 @@ +readline.o: readline.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/readline/extconf.rb b/ext/readline/extconf.rb new file mode 100644 index 0000000..d497612 --- /dev/null +++ b/ext/readline/extconf.rb @@ -0,0 +1,73 @@ +require "mkmf" + +$readline_headers = ["stdio.h"] + +def have_readline_header(header) + if have_header(header) + $readline_headers.push(header) + return true + else + return false + end +end + +def have_readline_var(var) + return have_var(var, $readline_headers) +end + +def have_readline_func(func) + return have_func(func, $readline_headers) +end + +dir_config('curses') +dir_config('ncurses') +dir_config('termcap') +dir_config("readline") +enable_libedit = enable_config("libedit") +have_library("user32", nil) if /cygwin/ === RUBY_PLATFORM +have_library("ncurses", "tgetnum") || + have_library("termcap", "tgetnum") || + have_library("curses", "tgetnum") + +if enable_libedit + unless (have_readline_header("editline/readline.h") || + have_readline_header("readline/readline.h")) && + have_library("edit", "readline") + exit + end +else + unless ((have_readline_header("readline/readline.h") && + have_readline_header("readline/history.h")) && + (have_library("readline", "readline") || + have_library("edit", "readline"))) || + (have_readline_header("editline/readline.h") && + have_library("edit", "readline")) + exit + end +end + +have_readline_func("rl_filename_completion_function") +have_readline_func("rl_username_completion_function") +have_readline_func("rl_completion_matches") +have_readline_var("rl_deprep_term_function") +have_readline_var("rl_completion_append_character") +have_readline_var("rl_basic_word_break_characters") +have_readline_var("rl_completer_word_break_characters") +have_readline_var("rl_basic_quote_characters") +have_readline_var("rl_completer_quote_characters") +have_readline_var("rl_filename_quote_characters") +have_readline_var("rl_attempted_completion_over") +have_readline_var("rl_library_version") +have_readline_var("rl_editing_mode") +# workaround for native windows. +/mswin|bccwin|mingw/ !~ RUBY_PLATFORM && have_readline_var("rl_event_hook") +/mswin|bccwin|mingw/ !~ RUBY_PLATFORM && have_readline_var("rl_catch_sigwinch") +/mswin|bccwin|mingw/ !~ RUBY_PLATFORM && have_readline_var("rl_catch_signals") +have_readline_func("rl_cleanup_after_signal") +have_readline_func("rl_clear_signals") +have_readline_func("rl_vi_editing_mode") +have_readline_func("rl_emacs_editing_mode") +have_readline_func("replace_history_entry") +have_readline_func("remove_history") +have_readline_func("clear_history") +create_makefile("readline") diff --git a/ext/readline/readline.c b/ext/readline/readline.c new file mode 100644 index 0000000..796b7f1 --- /dev/null +++ b/ext/readline/readline.c @@ -0,0 +1,1320 @@ +/************************************************ + + readline.c - GNU Readline module + + $Author: yugui $ + created at: Wed Jan 20 13:59:32 JST 1999 + + Copyright (C) 1997-2008 Shugo Maeda + Copyright (C) 2008 TAKAO Kouji + + $Id: readline.c 25540 2009-10-28 14:15:47Z yugui $ + + Contact: + - TAKAO Kouji (current maintainer) + +************************************************/ + +#ifdef RUBY_EXTCONF_H +#include RUBY_EXTCONF_H +#endif + +#include "ruby/config.h" +#include +#include +#include +#ifdef HAVE_READLINE_READLINE_H +#include +#endif +#ifdef HAVE_READLINE_HISTORY_H +#include +#endif +#ifdef HAVE_EDITLINE_READLINE_H +#include +#endif + +#include "ruby/ruby.h" +#include "ruby/io.h" + +#ifdef HAVE_UNISTD_H +#include +#endif + +static VALUE mReadline; + +#define EDIT_LINE_LIBRARY_VERSION "EditLine wrapper" + +#define COMPLETION_PROC "completion_proc" +#define COMPLETION_CASE_FOLD "completion_case_fold" +static ID completion_proc, completion_case_fold; + +#ifndef HAVE_RL_FILENAME_COMPLETION_FUNCTION +# define rl_filename_completion_function filename_completion_function +#endif +#ifndef HAVE_RL_USERNAME_COMPLETION_FUNCTION +# define rl_username_completion_function username_completion_function +#endif +#ifndef HAVE_RL_COMPLETION_MATCHES +# define rl_completion_matches completion_matches +#endif + +static int (*history_get_offset_func)(int); + +static char **readline_attempted_completion_function(const char *text, + int start, int end); + +#define OutputStringValue(str) do {\ + SafeStringValue(str);\ + str = rb_str_conv_enc(str, rb_enc_get(str), rb_locale_encoding());\ +} while (0)\ + +#ifdef HAVE_RL_EVENT_HOOK +#define BUSY_WAIT 0 + +/* + * Document-class: Readline + * + * The Readline module provides interface for GNU Readline. + * This module defines a number of methods to facilitate completion + * and accesses input history from the Ruby interpreter. + * This module supported Edit Line(libedit) too. + * libedit is compatible with GNU Readline. + * + * GNU Readline:: http://www.gnu.org/directory/readline.html + * libedit:: http://www.thrysoee.dk/editline/ + * + * Reads one inputted line with line edit by Readline.readline method. + * At this time, the facilitatation completion and the key + * bind like Emacs can be operated like GNU Readline. + * + * require "readline" + * while buf = Readline.readline("> ", true) + * p buf + * end + * + * The content that the user input can be recorded to the history. + * The history can be accessed by Readline::HISTORY constant. + * + * require "readline" + * while buf = Readline.readline("> ", true) + * p Readline::HISTORY.to_a + * print("-> ", buf, "\n") + * end + * + * Most of methods raise SecurityError exception if $SAFE is 4. + * + * Documented by TAKAO Kouji . + */ + +static int readline_event(void); +static int +readline_event(void) +{ +#if BUSY_WAIT + rb_thread_schedule(); +#else + fd_set rset; + + FD_ZERO(&rset); + FD_SET(fileno(rl_instream), &rset); + rb_thread_select(fileno(rl_instream) + 1, &rset, NULL, NULL, NULL); + return 0; +#endif +} +#endif + +/* + * call-seq: + * Readline.readline(prompt = "", add_hist = false) -> string or nil + * + * Shows the +prompt+ and reads the inputted line with line editing. + * The inputted line is added to the history if +add_hist+ is true. + * + * Returns nil when the inputted line is empty and user inputs EOF + * (Presses ^D on UNIX). + * + * Raises IOError exception if below conditions are satisfied. + * 1. stdin is not tty. + * 2. stdin was closed. (errno is EBADF after called isatty(2).) + * + * This method supports thread. Switchs the thread context when waits + * inputting line. + * + * Supports line edit when inputs line. Provides VI and Emacs editing mode. + * Default is Emacs editing mode. + * + * NOTE: Terminates ruby interpreter and does not return the terminal + * status after user pressed '^C' when wait inputting line. + * Give 3 examples that avoid it. + * + * * Catches the Interrupt exception by pressed ^C after returns + * terminal status: + * + * require "readline" + * + * stty_save = `stty -g`.chomp + * begin + * while buf = Readline.readline + * p buf + * end + * rescue Interrupt + * system("stty", stty_save) + * exit + * end + * end + * end + * + * * Catches the INT signal by pressed ^C after returns terminal + * status: + * + * require "readline" + * + * stty_save = `stty -g`.chomp + * trap("INT") { system "stty", stty_save; exit } + * + * while buf = Readline.readline + * p buf + * end + * + * * Ignores pressing ^C: + * + * require "readline" + * + * trap("INT", "SIG_IGN") + * + * while buf = Readline.readline + * p buf + * end + * + * Can make as follows with Readline::HISTORY constant. + * It does not record to the history if the inputted line is empty or + * the same it as last one. + * + * require "readline" + * + * while buf = Readline.readline("> ", true) + * # p Readline::HISTORY.to_a + * Readline::HISTORY.pop if /^\s*$/ =~ buf + * + * begin + * if Readline::HISTORY[Readline::HISTORY.length-2] == buf + * Readline::HISTORY.pop + * end + * rescue IndexError + * end + * + * # p Readline::HISTORY.to_a + * print "-> ", buf, "\n" + * end + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_readline(int argc, VALUE *argv, VALUE self) +{ + VALUE tmp, add_hist, result; + char *prompt = NULL; + char *buff; + int status; + + rb_secure(4); + if (rb_scan_args(argc, argv, "02", &tmp, &add_hist) > 0) { + OutputStringValue(tmp); + prompt = RSTRING_PTR(tmp); + } + + if (!isatty(0) && errno == EBADF) rb_raise(rb_eIOError, "closed stdin"); + +#ifdef _WIN32 + rl_prep_terminal(1); +#endif + buff = (char*)rb_protect((VALUE(*)_((VALUE)))readline, (VALUE)prompt, + &status); + if (status) { +#if defined HAVE_RL_CLEANUP_AFTER_SIGNAL + /* restore terminal mode and signal handler*/ + rl_cleanup_after_signal(); +#elif defined HAVE_RL_DEPREP_TERM_FUNCTION + /* restore terminal mode */ + if (rl_deprep_term_function != NULL) /* NULL in libedit. [ruby-dev:29116] */ + (*rl_deprep_term_function)(); + else +#else + rl_deprep_terminal(); +#endif + rb_jump_tag(status); + } + + if (RTEST(add_hist) && buff) { + add_history(buff); + } + if (buff) { + result = rb_locale_str_new_cstr(buff); + } + else + result = Qnil; + if (buff) free(buff); + return result; +} + +/* + * call-seq: + * Readline.input = input + * + * Specifies a File object +input+ that is input stream for + * Readline.readline method. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_input(VALUE self, VALUE input) +{ + rb_io_t *ifp; + + rb_secure(4); + Check_Type(input, T_FILE); + GetOpenFile(input, ifp); + rl_instream = rb_io_stdio_file(ifp); + return input; +} + +/* + * call-seq: + * Readline.output = output + * + * Specifies a File object +output+ that is output stream for + * Readline.readline method. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_output(VALUE self, VALUE output) +{ + rb_io_t *ofp; + + rb_secure(4); + Check_Type(output, T_FILE); + GetOpenFile(output, ofp); + rl_outstream = rb_io_stdio_file(ofp); + return output; +} + +/* + * call-seq: + * Readline.completion_proc = proc + * + * Specifies a Proc object +proc+ to determine completion behavior. It + * should take input-string, and return an array of completion + * candidates. + * + * Raises ArgumentError exception if +proc+ does not respond to call method. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_completion_proc(VALUE self, VALUE proc) +{ + rb_secure(4); + if (!rb_respond_to(proc, rb_intern("call"))) + rb_raise(rb_eArgError, "argument must respond to `call'"); + return rb_ivar_set(mReadline, completion_proc, proc); +} + +/* + * call-seq: + * Readline.completion_proc -> proc + * + * Returns the completion Proc object. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_completion_proc(VALUE self) +{ + rb_secure(4); + return rb_attr_get(mReadline, completion_proc); +} + +/* + * call-seq: + * Readline.completion_case_fold = bool + * + * Sets whether or not to ignore case on completion. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_completion_case_fold(VALUE self, VALUE val) +{ + rb_secure(4); + return rb_ivar_set(mReadline, completion_case_fold, val); +} + +/* + * call-seq: + * Readline.completion_case_fold -> bool + * + * Returns true if completion ignores case. If no, returns false. + * + * NOTE: Returns the same object that is specified by + * Readline.completion_case_fold= method. + * + * require "readline" + * + * Readline.completion_case_fold = "This is a String." + * p Readline.completion_case_fold # => "This is a String." + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_completion_case_fold(VALUE self) +{ + rb_secure(4); + return rb_attr_get(mReadline, completion_case_fold); +} + +static char ** +readline_attempted_completion_function(const char *text, int start, int end) +{ + VALUE proc, ary, temp; + char **result; + int case_fold; + int i, matches; + + proc = rb_attr_get(mReadline, completion_proc); + if (NIL_P(proc)) + return NULL; +#ifdef HAVE_RL_ATTEMPTED_COMPLETION_OVER + rl_attempted_completion_over = 1; +#endif + case_fold = RTEST(rb_attr_get(mReadline, completion_case_fold)); + ary = rb_funcall(proc, rb_intern("call"), 1, rb_locale_str_new_cstr(text)); + if (TYPE(ary) != T_ARRAY) + ary = rb_Array(ary); + matches = RARRAY_LEN(ary); + if (matches == 0) + return NULL; + result = ALLOC_N(char *, matches + 2); + for (i = 0; i < matches; i++) { + temp = rb_obj_as_string(RARRAY_PTR(ary)[i]); + result[i + 1] = ALLOC_N(char, RSTRING_LEN(temp) + 1); + strcpy(result[i + 1], RSTRING_PTR(temp)); + } + result[matches + 1] = NULL; + + if (matches == 1) { + result[0] = strdup(result[1]); + } + else { + register int i = 1; + int low = 100000; + + while (i < matches) { + register int c1, c2, si; + + if (case_fold) { + for (si = 0; + (c1 = TOLOWER(result[i][si])) && + (c2 = TOLOWER(result[i + 1][si])); + si++) + if (c1 != c2) break; + } else { + for (si = 0; + (c1 = result[i][si]) && + (c2 = result[i + 1][si]); + si++) + if (c1 != c2) break; + } + + if (low > si) low = si; + i++; + } + result[0] = ALLOC_N(char, low + 1); + strncpy(result[0], result[1], low); + result[0][low] = '\0'; + } + + return result; +} + +/* + * call-seq: + * Readline.vi_editing_mode -> nil + * + * Specifies VI editing mode. See the manual of GNU Readline for + * details of VI editing mode. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_vi_editing_mode(VALUE self) +{ +#ifdef HAVE_RL_VI_EDITING_MODE + rb_secure(4); + rl_vi_editing_mode(1,0); + return Qnil; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_VI_EDITING_MODE */ +} + +/* + * call-seq: + * Readline.vi_editing_mode? -> bool + * + * Returns true if vi mode is active. Returns false if not. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_vi_editing_mode_p(VALUE self) +{ +#ifdef HAVE_RL_EDITING_MODE + rb_secure(4); + return rl_editing_mode == 0 ? Qtrue : Qfalse; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_EDITING_MODE */ +} + +/* + * call-seq: + * Readline.emacs_editing_mode -> nil + * + * Specifies Emacs editing mode. The default is this mode. See the + * manual of GNU Readline for details of Emacs editing mode. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_emacs_editing_mode(VALUE self) +{ +#ifdef HAVE_RL_EMACS_EDITING_MODE + rb_secure(4); + rl_emacs_editing_mode(1,0); + return Qnil; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_EMACS_EDITING_MODE */ +} + +/* + * call-seq: + * Readline.emacs_editing_mode? -> bool + * + * Returns true if emacs mode is active. Returns false if not. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_emacs_editing_mode_p(VALUE self) +{ +#ifdef HAVE_RL_EDITING_MODE + rb_secure(4); + return rl_editing_mode == 1 ? Qtrue : Qfalse; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_EDITING_MODE */ +} + +/* + * call-seq: + * Readline.completion_append_character = char + * + * Specifies a character to be appended on completion. + * Nothing will be appended if an empty string ("") or nil is + * specified. + * + * For example: + * require "readline" + * + * Readline.readline("> ", true) + * Readline.completion_append_character = " " + * + * Result: + * > + * Input "/var/li". + * + * > /var/li + * Press TAB key. + * + * > /var/lib + * Completes "b" and appends " ". So, you can continuously input "/usr". + * + * > /var/lib /usr + * + * NOTE: Only one character can be specified. When "string" is + * specified, sets only "s" that is the first. + * + * require "readline" + * + * Readline.completion_append_character = "string" + * p Readline.completion_append_character # => "s" + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_completion_append_character(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_COMPLETION_APPEND_CHARACTER + rb_secure(4); + if (NIL_P(str)) { + rl_completion_append_character = '\0'; + } + else { + OutputStringValue(str); + if (RSTRING_LEN(str) == 0) { + rl_completion_append_character = '\0'; + } else { + rl_completion_append_character = RSTRING_PTR(str)[0]; + } + } + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_COMPLETION_APPEND_CHARACTER */ +} + +/* + * call-seq: + * Readline.completion_append_character -> char + * + * Returns a string containing a character to be appended on + * completion. The default is a space (" "). + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_completion_append_character(VALUE self) +{ +#ifdef HAVE_RL_COMPLETION_APPEND_CHARACTER + char buf[1]; + + rb_secure(4); + if (rl_completion_append_character == '\0') + return Qnil; + + buf[0] = (char) rl_completion_append_character; + return rb_locale_str_new(buf, 1); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_COMPLETION_APPEND_CHARACTER */ +} + +/* + * call-seq: + * Readline.basic_word_break_characters = string + * + * Sets the basic list of characters that signal a break between words + * for the completer routine. The default is the characters which + * break words for completion in Bash: "\t\n\"\\'`@$><=;|&{(". + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_basic_word_break_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_BASIC_WORD_BREAK_CHARACTERS + static char *basic_word_break_characters = NULL; + + rb_secure(4); + OutputStringValue(str); + if (basic_word_break_characters == NULL) { + basic_word_break_characters = + ALLOC_N(char, RSTRING_LEN(str) + 1); + } + else { + REALLOC_N(basic_word_break_characters, char, RSTRING_LEN(str) + 1); + } + strncpy(basic_word_break_characters, + RSTRING_PTR(str), RSTRING_LEN(str)); + basic_word_break_characters[RSTRING_LEN(str)] = '\0'; + rl_basic_word_break_characters = basic_word_break_characters; + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_BASIC_WORD_BREAK_CHARACTERS */ +} + +/* + * call-seq: + * Readline.basic_word_break_characters -> string + * + * Gets the basic list of characters that signal a break between words + * for the completer routine. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_basic_word_break_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_BASIC_WORD_BREAK_CHARACTERS + rb_secure(4); + if (rl_basic_word_break_characters == NULL) + return Qnil; + return rb_locale_str_new_cstr(rl_basic_word_break_characters); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_BASIC_WORD_BREAK_CHARACTERS */ +} + +/* + * call-seq: + * Readline.completer_word_break_characters = string + * + * Sets the basic list of characters that signal a break between words + * for rl_complete_internal(). The default is the value of + * Readline.basic_word_break_characters. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_completer_word_break_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_COMPLETER_WORD_BREAK_CHARACTERS + static char *completer_word_break_characters = NULL; + + rb_secure(4); + OutputStringValue(str); + if (completer_word_break_characters == NULL) { + completer_word_break_characters = + ALLOC_N(char, RSTRING_LEN(str) + 1); + } + else { + REALLOC_N(completer_word_break_characters, char, RSTRING_LEN(str) + 1); + } + strncpy(completer_word_break_characters, + RSTRING_PTR(str), RSTRING_LEN(str)); + completer_word_break_characters[RSTRING_LEN(str)] = '\0'; + rl_completer_word_break_characters = completer_word_break_characters; + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_COMPLETER_WORD_BREAK_CHARACTERS */ +} + +/* + * call-seq: + * Readline.completer_word_break_characters -> string + * + * Gets the basic list of characters that signal a break between words + * for rl_complete_internal(). + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_completer_word_break_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_COMPLETER_WORD_BREAK_CHARACTERS + rb_secure(4); + if (rl_completer_word_break_characters == NULL) + return Qnil; + return rb_locale_str_new_cstr(rl_completer_word_break_characters); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_COMPLETER_WORD_BREAK_CHARACTERS */ +} + +/* + * call-seq: + * Readline.basic_quote_characters = string + * + * Sets a list of quote characters which can cause a word break. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_basic_quote_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_BASIC_QUOTE_CHARACTERS + static char *basic_quote_characters = NULL; + + rb_secure(4); + OutputStringValue(str); + if (basic_quote_characters == NULL) { + basic_quote_characters = + ALLOC_N(char, RSTRING_LEN(str) + 1); + } + else { + REALLOC_N(basic_quote_characters, char, RSTRING_LEN(str) + 1); + } + strncpy(basic_quote_characters, + RSTRING_PTR(str), RSTRING_LEN(str)); + basic_quote_characters[RSTRING_LEN(str)] = '\0'; + rl_basic_quote_characters = basic_quote_characters; + + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_BASIC_QUOTE_CHARACTERS */ +} + +/* + * call-seq: + * Readline.basic_quote_characters -> string + * + * Gets a list of quote characters which can cause a word break. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_basic_quote_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_BASIC_QUOTE_CHARACTERS + rb_secure(4); + if (rl_basic_quote_characters == NULL) + return Qnil; + return rb_locale_str_new_cstr(rl_basic_quote_characters); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_BASIC_QUOTE_CHARACTERS */ +} + +/* + * call-seq: + * Readline.completer_quote_characters = string + * + * Sets a list of characters which can be used to quote a substring of + * the line. Completion occurs on the entire substring, and within + * the substring Readline.completer_word_break_characters are treated + * as any other character, unless they also appear within this list. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_completer_quote_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_COMPLETER_QUOTE_CHARACTERS + static char *completer_quote_characters = NULL; + + rb_secure(4); + OutputStringValue(str); + if (completer_quote_characters == NULL) { + completer_quote_characters = + ALLOC_N(char, RSTRING_LEN(str) + 1); + } + else { + REALLOC_N(completer_quote_characters, char, RSTRING_LEN(str) + 1); + } + strncpy(completer_quote_characters, RSTRING_PTR(str), RSTRING_LEN(str)); + completer_quote_characters[RSTRING_LEN(str)] = '\0'; + rl_completer_quote_characters = completer_quote_characters; + + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_COMPLETER_QUOTE_CHARACTERS */ +} + +/* + * call-seq: + * Readline.completer_quote_characters -> string + * + * Gets a list of characters which can be used to quote a substring of + * the line. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_completer_quote_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_COMPLETER_QUOTE_CHARACTERS + rb_secure(4); + if (rl_completer_quote_characters == NULL) + return Qnil; + return rb_locale_str_new_cstr(rl_completer_quote_characters); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_COMPLETER_QUOTE_CHARACTERS */ +} + +/* + * call-seq: + * Readline.filename_quote_characters = string + * + * Sets a list of characters that cause a filename to be quoted by the completer + * when they appear in a completed filename. The default is nil. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_set_filename_quote_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_FILENAME_QUOTE_CHARACTERS + static char *filename_quote_characters = NULL; + + rb_secure(4); + OutputStringValue(str); + if (filename_quote_characters == NULL) { + filename_quote_characters = + ALLOC_N(char, RSTRING_LEN(str) + 1); + } + else { + REALLOC_N(filename_quote_characters, char, RSTRING_LEN(str) + 1); + } + strncpy(filename_quote_characters, RSTRING_PTR(str), RSTRING_LEN(str)); + filename_quote_characters[RSTRING_LEN(str)] = '\0'; + rl_filename_quote_characters = filename_quote_characters; + + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_FILENAME_QUOTE_CHARACTERS */ +} + +/* + * call-seq: + * Readline.filename_quote_characters -> string + * + * Gets a list of characters that cause a filename to be quoted by the completer + * when they appear in a completed filename. + * + * Raises NotImplementedError if the using readline library does not support. + * + * Raises SecurityError exception if $SAFE is 4. + */ +static VALUE +readline_s_get_filename_quote_characters(VALUE self, VALUE str) +{ +#ifdef HAVE_RL_FILENAME_QUOTE_CHARACTERS + rb_secure(4); + if (rl_filename_quote_characters == NULL) + return Qnil; + return rb_locale_str_new_cstr(rl_filename_quote_characters); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif /* HAVE_RL_FILENAME_QUOTE_CHARACTERS */ +} + +static VALUE +hist_to_s(VALUE self) +{ + return rb_str_new_cstr("HISTORY"); +} + +static int +history_get_offset_history_base(int offset) +{ + return history_base + offset; +} + +static int +history_get_offset_0(int offset) +{ + return offset; +} + +static VALUE +hist_get(VALUE self, VALUE index) +{ + HIST_ENTRY *entry = NULL; + int i; + + rb_secure(4); + i = NUM2INT(index); + if (i < 0) { + i += history_length; + } + if (i >= 0) { + entry = history_get(history_get_offset_func(i)); + } + if (entry == NULL) { + rb_raise(rb_eIndexError, "invalid index"); + } + return rb_locale_str_new_cstr(entry->line); +} + +static VALUE +hist_set(VALUE self, VALUE index, VALUE str) +{ +#ifdef HAVE_REPLACE_HISTORY_ENTRY + HIST_ENTRY *entry = NULL; + int i; + + rb_secure(4); + i = NUM2INT(index); + OutputStringValue(str); + if (i < 0) { + i += history_length; + } + if (i >= 0) { + entry = replace_history_entry(i, RSTRING_PTR(str), NULL); + } + if (entry == NULL) { + rb_raise(rb_eIndexError, "invalid index"); + } + return str; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static VALUE +hist_push(VALUE self, VALUE str) +{ + rb_secure(4); + OutputStringValue(str); + add_history(RSTRING_PTR(str)); + return self; +} + +static VALUE +hist_push_method(int argc, VALUE *argv, VALUE self) +{ + VALUE str; + + rb_secure(4); + while (argc--) { + str = *argv++; + OutputStringValue(str); + add_history(RSTRING_PTR(str)); + } + return self; +} + +static VALUE +rb_remove_history(int index) +{ +#ifdef HAVE_REMOVE_HISTORY + HIST_ENTRY *entry; + VALUE val; + + rb_secure(4); + entry = remove_history(index); + if (entry) { + val = rb_locale_str_new_cstr(entry->line); + free((void *) entry->line); + free(entry); + return val; + } + return Qnil; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static VALUE +hist_pop(VALUE self) +{ + rb_secure(4); + if (history_length > 0) { + return rb_remove_history(history_length - 1); + } else { + return Qnil; + } +} + +static VALUE +hist_shift(VALUE self) +{ + rb_secure(4); + if (history_length > 0) { + return rb_remove_history(0); + } else { + return Qnil; + } +} + +static VALUE +hist_each(VALUE self) +{ + HIST_ENTRY *entry; + int i; + + RETURN_ENUMERATOR(self, 0, 0); + + rb_secure(4); + for (i = 0; i < history_length; i++) { + entry = history_get(history_get_offset_func(i)); + if (entry == NULL) + break; + rb_yield(rb_locale_str_new_cstr(entry->line)); + } + return self; +} + +static VALUE +hist_length(VALUE self) +{ + rb_secure(4); + return INT2NUM(history_length); +} + +static VALUE +hist_empty_p(VALUE self) +{ + rb_secure(4); + return history_length == 0 ? Qtrue : Qfalse; +} + +static VALUE +hist_delete_at(VALUE self, VALUE index) +{ + int i; + + rb_secure(4); + i = NUM2INT(index); + if (i < 0) + i += history_length; + if (i < 0 || i > history_length - 1) { + rb_raise(rb_eIndexError, "invalid index"); + } + return rb_remove_history(i); +} + +static VALUE +hist_clear(VALUE self) +{ +#ifdef HAVE_CLEAR_HISTORY + rb_secure(4); + clear_history(); + return self; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static VALUE +filename_completion_proc_call(VALUE self, VALUE str) +{ + VALUE result; + char **matches; + int i; + + matches = rl_completion_matches(StringValuePtr(str), + rl_filename_completion_function); + if (matches) { + result = rb_ary_new(); + for (i = 0; matches[i]; i++) { + rb_ary_push(result, rb_locale_str_new_cstr(matches[i])); + free(matches[i]); + } + free(matches); + if (RARRAY_LEN(result) >= 2) + rb_ary_shift(result); + } + else { + result = Qnil; + } + return result; +} + +static VALUE +username_completion_proc_call(VALUE self, VALUE str) +{ + VALUE result; + char **matches; + int i; + + matches = rl_completion_matches(StringValuePtr(str), + rl_username_completion_function); + if (matches) { + result = rb_ary_new(); + for (i = 0; matches[i]; i++) { + rb_ary_push(result, rb_locale_str_new_cstr(matches[i])); + free(matches[i]); + } + free(matches); + if (RARRAY_LEN(result) >= 2) + rb_ary_shift(result); + } + else { + result = Qnil; + } + return result; +} + +void +Init_readline() +{ + VALUE history, fcomp, ucomp, version; + + /* Allow conditional parsing of the ~/.inputrc file. */ + rl_readline_name = (char *)"Ruby"; + + using_history(); + + completion_proc = rb_intern(COMPLETION_PROC); + completion_case_fold = rb_intern(COMPLETION_CASE_FOLD); + + mReadline = rb_define_module("Readline"); + rb_define_module_function(mReadline, "readline", + readline_readline, -1); + rb_define_singleton_method(mReadline, "input=", + readline_s_set_input, 1); + rb_define_singleton_method(mReadline, "output=", + readline_s_set_output, 1); + rb_define_singleton_method(mReadline, "completion_proc=", + readline_s_set_completion_proc, 1); + rb_define_singleton_method(mReadline, "completion_proc", + readline_s_get_completion_proc, 0); + rb_define_singleton_method(mReadline, "completion_case_fold=", + readline_s_set_completion_case_fold, 1); + rb_define_singleton_method(mReadline, "completion_case_fold", + readline_s_get_completion_case_fold, 0); + rb_define_singleton_method(mReadline, "vi_editing_mode", + readline_s_vi_editing_mode, 0); + rb_define_singleton_method(mReadline, "vi_editing_mode?", + readline_s_vi_editing_mode_p, 0); + rb_define_singleton_method(mReadline, "emacs_editing_mode", + readline_s_emacs_editing_mode, 0); + rb_define_singleton_method(mReadline, "emacs_editing_mode?", + readline_s_emacs_editing_mode_p, 0); + rb_define_singleton_method(mReadline, "completion_append_character=", + readline_s_set_completion_append_character, 1); + rb_define_singleton_method(mReadline, "completion_append_character", + readline_s_get_completion_append_character, 0); + rb_define_singleton_method(mReadline, "basic_word_break_characters=", + readline_s_set_basic_word_break_characters, 1); + rb_define_singleton_method(mReadline, "basic_word_break_characters", + readline_s_get_basic_word_break_characters, 0); + rb_define_singleton_method(mReadline, "completer_word_break_characters=", + readline_s_set_completer_word_break_characters, 1); + rb_define_singleton_method(mReadline, "completer_word_break_characters", + readline_s_get_completer_word_break_characters, 0); + rb_define_singleton_method(mReadline, "basic_quote_characters=", + readline_s_set_basic_quote_characters, 1); + rb_define_singleton_method(mReadline, "basic_quote_characters", + readline_s_get_basic_quote_characters, 0); + rb_define_singleton_method(mReadline, "completer_quote_characters=", + readline_s_set_completer_quote_characters, 1); + rb_define_singleton_method(mReadline, "completer_quote_characters", + readline_s_get_completer_quote_characters, 0); + rb_define_singleton_method(mReadline, "filename_quote_characters=", + readline_s_set_filename_quote_characters, 1); + rb_define_singleton_method(mReadline, "filename_quote_characters", + readline_s_get_filename_quote_characters, 0); + + history = rb_obj_alloc(rb_cObject); + rb_extend_object(history, rb_mEnumerable); + rb_define_singleton_method(history,"to_s", hist_to_s, 0); + rb_define_singleton_method(history,"[]", hist_get, 1); + rb_define_singleton_method(history,"[]=", hist_set, 2); + rb_define_singleton_method(history,"<<", hist_push, 1); + rb_define_singleton_method(history,"push", hist_push_method, -1); + rb_define_singleton_method(history,"pop", hist_pop, 0); + rb_define_singleton_method(history,"shift", hist_shift, 0); + rb_define_singleton_method(history,"each", hist_each, 0); + rb_define_singleton_method(history,"length", hist_length, 0); + rb_define_singleton_method(history,"size", hist_length, 0); + rb_define_singleton_method(history,"empty?", hist_empty_p, 0); + rb_define_singleton_method(history,"delete_at", hist_delete_at, 1); + rb_define_singleton_method(history,"clear", hist_clear, 0); + + /* + * The history buffer. It extends Enumerable module, so it behaves + * just like an array. + * For example, gets the fifth content that the user input by + * HISTORY[4]. + */ + rb_define_const(mReadline, "HISTORY", history); + + fcomp = rb_obj_alloc(rb_cObject); + rb_define_singleton_method(fcomp, "call", + filename_completion_proc_call, 1); + /* + * The Object with the call method that is a completion for filename. + * This is sets by Readline.completion_proc= method. + */ + rb_define_const(mReadline, "FILENAME_COMPLETION_PROC", fcomp); + + ucomp = rb_obj_alloc(rb_cObject); + rb_define_singleton_method(ucomp, "call", + username_completion_proc_call, 1); + /* + * The Object with the call method that is a completion for usernames. + * This is sets by Readline.completion_proc= method. + */ + rb_define_const(mReadline, "USERNAME_COMPLETION_PROC", ucomp); + history_get_offset_func = history_get_offset_history_base; +#if defined HAVE_RL_LIBRARY_VERSION + version = rb_str_new_cstr(rl_library_version); +#if defined HAVE_CLEAR_HISTORY || defined HAVE_REMOVE_HISTORY + if (strncmp(rl_library_version, EDIT_LINE_LIBRARY_VERSION, + strlen(EDIT_LINE_LIBRARY_VERSION)) == 0) { + add_history("1"); + if (history_get(history_get_offset_func(0)) == NULL) { + history_get_offset_func = history_get_offset_0; + } +#if !defined HAVE_CLEAR_HISTORY + clear_history(); +#else + { + HIST_ENTRY *entry = remove_history(0); + if (entry) { + free((char *)entry->line); + free(entry); + } + } +#endif + } +#endif +#else + version = rb_str_new_cstr("2.0 or prior version"); +#endif + /* Version string of GNU Readline or libedit. */ + rb_define_const(mReadline, "VERSION", version); + + rl_attempted_completion_function = readline_attempted_completion_function; +#ifdef HAVE_RL_EVENT_HOOK + rl_event_hook = readline_event; +#endif +#ifdef HAVE_RL_CATCH_SIGNALS + rl_catch_signals = 0; +#endif +#ifdef HAVE_RL_CATCH_SIGWINCH + rl_catch_sigwinch = 0; +#endif +#ifdef HAVE_RL_CLEAR_SIGNALS + rl_clear_signals(); +#endif +} diff --git a/ext/sdbm/_sdbm.c b/ext/sdbm/_sdbm.c new file mode 100644 index 0000000..3ce79dc --- /dev/null +++ b/ext/sdbm/_sdbm.c @@ -0,0 +1,923 @@ +/* + * sdbm - ndbm work-alike hashed database library + * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). + * author: oz@nexus.yorku.ca + * status: public domain. + * + * core routines + */ + +#ifndef lint +/*char sdbm_rcsid[] = "$Id: _sdbm.c 23225 2009-04-19 13:34:30Z yugui $";*/ +#endif + +#include "sdbm.h" +#include "ruby/config.h" +#include "ruby/defines.h" + +/* + * sdbm - ndbm work-alike hashed database library + * tuning and portability constructs [not nearly enough] + * author: oz@nexus.yorku.ca + */ + +#define BYTESIZ 8 + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef BSD42 +#define SEEK_SET L_SET +#define memset(s,c,n) bzero(s, n) /* only when c is zero */ +#define memcpy(s1,s2,n) bcopy(s2, s1, n) +#define memcmp(s1,s2,n) bcmp(s1,s2,n) +#endif + +/* + * important tuning parms (hah) + */ + +#define SEEDUPS /* always detect duplicates */ +#define BADMESS /* generate a message for worst case: + cannot make room after SPLTMAX splits */ +/* + * misc + */ +#ifdef DEBUG +#define debug(x) printf x +#else +#define debug(x) +#endif + +#ifdef BIG_E +#define GET_SHORT(p, i) (((unsigned)((unsigned char *)(p))[(i)*2] << 8) + (((unsigned char *)(p))[(i)*2 + 1])) +#define PUT_SHORT(p, i, s) (((unsigned char *)(p))[(i)*2] = (unsigned char)((s) >> 8), ((unsigned char *)(p))[(i)*2 + 1] = (unsigned char)(s)) +#else +#define GET_SHORT(p, i) ((p)[i]) +#define PUT_SHORT(p, i, s) ((p)[i] = (s)) +#endif + +/*#include "pair.h"*/ +static int fitpair proto((char *, int)); +static void putpair proto((char *, datum, datum)); +static datum getpair proto((char *, datum)); +static int delpair proto((char *, datum)); +static int chkpage proto((char *)); +static datum getnkey proto((char *, int)); +static void splpage proto((char *, char *, long)); +#ifdef SEEDUPS +static int duppair proto((char *, datum)); +#endif + +#include +#include +#ifdef DOSISH +#include +#endif +#include +#include +#ifdef BSD42 +#include +#else +#include +/*#include */ +#endif +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#include +#ifndef EPERM +#define EPERM EACCES +#endif +#include + +#ifdef __STDC__ +#include +#endif + +#ifndef NULL +#define NULL 0 +#endif + +/* + * externals + */ +#if !defined sun && !defined _WIN32 && !defined __CYGWIN__ && !defined(errno) +extern int errno; +#endif + +/* + * forward + */ +static int getdbit proto((DBM *, long)); +static int setdbit proto((DBM *, long)); +static int getpage proto((DBM *, long)); +static datum getnext proto((DBM *)); +static int makroom proto((DBM *, long, int)); + +/* + * useful macros + */ +#define bad(x) ((x).dptr == NULL || (x).dsize < 0) +#define exhash(item) sdbm_hash((item).dptr, (item).dsize) +#define ioerr(db) ((db)->flags |= DBM_IOERR) + +#define OFF_PAG(off) (long) (off) * PBLKSIZ +#define OFF_DIR(off) (long) (off) * DBLKSIZ + +static long masks[] = { + 000000000000L, 000000000001L, 000000000003L, + 000000000007L, 000000000017L, 000000000037L, + 000000000077L, 000000000177L, 000000000377L, + 000000000777L, 000000001777L, 000000003777L, + 000000007777L, 000000017777L, 000000037777L, + 000000077777L, 000000177777L, 000000377777L, + 000000777777L, 000001777777L, 000003777777L, + 000007777777L, 000017777777L, 000037777777L, + 000077777777L, 000177777777L, 000377777777L, + 000777777777L, 001777777777L, 003777777777L, + 007777777777L, 017777777777L +}; + +datum nullitem = {NULL, 0}; + +DBM * +sdbm_open(register char *file, register int flags, register int mode) +{ + register DBM *db; + register char *dirname; + register char *pagname; + register int n; + + if (file == NULL || !*file) + return errno = EINVAL, (DBM *) NULL; +/* + * need space for two seperate filenames + */ + n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2; + + if ((dirname = malloc((unsigned) n)) == NULL) + return errno = ENOMEM, (DBM *) NULL; +/* + * build the file names + */ + dirname = strcat(strcpy(dirname, file), DIRFEXT); + pagname = strcpy(dirname + strlen(dirname) + 1, file); + pagname = strcat(pagname, PAGFEXT); + + db = sdbm_prep(dirname, pagname, flags, mode); + free((char *) dirname); + return db; +} + +DBM * +sdbm_prep(char *dirname, char *pagname, int flags, int mode) +{ + register DBM *db; + struct stat dstat; + + if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) + return errno = ENOMEM, (DBM *) NULL; + + db->flags = 0; + db->hmask = 0; + db->blkptr = 0; + db->keyptr = 0; +/* + * adjust user flags so that WRONLY becomes RDWR, + * as required by this package. Also set our internal + * flag for RDONLY. + */ + if (flags & O_WRONLY) + flags = (flags & ~O_WRONLY) | O_RDWR; + if (flags & O_RDONLY) + db->flags = DBM_RDONLY; +/* + * open the files in sequence, and stat the dirfile. + * If we fail anywhere, undo everything, return NULL. + */ + flags |= O_BINARY; + if ((db->pagf = open(pagname, flags, mode)) > -1) { + if ((db->dirf = open(dirname, flags, mode)) > -1) { +/* + * need the dirfile size to establish max bit number. + */ + if (fstat(db->dirf, &dstat) == 0) { +/* + * zero size: either a fresh database, or one with a single, + * unsplit data page: dirpage is all zeros. + */ + db->dirbno = (!dstat.st_size) ? 0 : -1; + db->pagbno = -1; + db->maxbno = dstat.st_size * (long) BYTESIZ; + + (void) memset(db->pagbuf, 0, PBLKSIZ); + (void) memset(db->dirbuf, 0, DBLKSIZ); + /* + * success + */ + return db; + } + (void) close(db->dirf); + } + (void) close(db->pagf); + } + free((char *) db); + return (DBM *) NULL; +} + +void +sdbm_close(register DBM *db) +{ + if (db == NULL) + errno = EINVAL; + else { + (void) close(db->dirf); + (void) close(db->pagf); + free((char *) db); + } +} + +datum +sdbm_fetch(register DBM *db, datum key) +{ + if (db == NULL || bad(key)) + return errno = EINVAL, nullitem; + + if (getpage(db, exhash(key))) + return getpair(db->pagbuf, key); + + return ioerr(db), nullitem; +} + +int +sdbm_delete(register DBM *db, datum key) +{ + if (db == NULL || bad(key)) + return errno = EINVAL, -1; + if (sdbm_rdonly(db)) + return errno = EPERM, -1; + + if (getpage(db, exhash(key))) { + if (!delpair(db->pagbuf, key)) + return -1; +/* + * update the page file + */ + if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 + || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) + return ioerr(db), -1; + + return 0; + } + + return ioerr(db), -1; +} + +int +sdbm_store(register DBM *db, datum key, datum val, int flags) +{ + int need; + register long hash; + + if (db == NULL || bad(key)) + return errno = EINVAL, -1; + if (sdbm_rdonly(db)) + return errno = EPERM, -1; + + need = key.dsize + val.dsize; +/* + * is the pair too big (or too small) for this database ?? + */ + if (need < 0 || need > PAIRMAX) + return errno = EINVAL, -1; + + if (getpage(db, (hash = exhash(key)))) { +/* + * if we need to replace, delete the key/data pair + * first. If it is not there, ignore. + */ + if (flags == DBM_REPLACE) + (void) delpair(db->pagbuf, key); +#ifdef SEEDUPS + else if (duppair(db->pagbuf, key)) + return 1; +#endif +/* + * if we do not have enough room, we have to split. + */ + if (!fitpair(db->pagbuf, need)) + if (!makroom(db, hash, need)) + return ioerr(db), -1; +/* + * we have enough room or split is successful. insert the key, + * and update the page file. + */ + (void) putpair(db->pagbuf, key, val); + + if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 + || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) + return ioerr(db), -1; + /* + * success + */ + return 0; + } + + return ioerr(db), -1; +} + +/* + * makroom - make room by splitting the overfull page + * this routine will attempt to make room for SPLTMAX times before + * giving up. + */ +static int +makroom(register DBM *db, long int hash, int need) +{ + long newp; + char twin[PBLKSIZ]; +#if defined _WIN32 && !defined __CYGWIN__ + char zer[PBLKSIZ]; + long oldtail; +#endif + char *pag = db->pagbuf; + char *new = twin; + register int smax = SPLTMAX; + + do { +/* + * split the current page + */ + (void) splpage(pag, new, db->hmask + 1); +/* + * address of the new page + */ + newp = (hash & db->hmask) | (db->hmask + 1); + debug(("newp: %ld\n", newp)); +/* + * write delay, read avoidence/cache shuffle: + * select the page for incoming pair: if key is to go to the new page, + * write out the previous one, and copy the new one over, thus making + * it the current page. If not, simply write the new page, and we are + * still looking at the page of interest. current page is not updated + * here, as sdbm_store will do so, after it inserts the incoming pair. + */ + +#if defined _WIN32 && !defined __CYGWIN__ + /* + * Fill hole with 0 if made it. + * (hole is NOT read as 0) + */ + oldtail = lseek(db->pagf, 0L, SEEK_END); + memset(zer, 0, PBLKSIZ); + while (OFF_PAG(newp) > oldtail) { + if (lseek(db->pagf, 0L, SEEK_END) < 0 || + write(db->pagf, zer, PBLKSIZ) < 0) { + + return 0; + } + oldtail += PBLKSIZ; + } +#endif + + if (hash & (db->hmask + 1)) { + if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 + || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) + return 0; + db->pagbno = newp; + (void) memcpy(pag, new, PBLKSIZ); + } + else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 + || write(db->pagf, new, PBLKSIZ) < 0) + return 0; + + if (!setdbit(db, db->curbit)) + return 0; +/* + * see if we have enough room now + */ + if (fitpair(pag, need)) + return 1; +/* + * try again... update curbit and hmask as getpage would have + * done. because of our update of the current page, we do not + * need to read in anything. BUT we have to write the current + * [deferred] page out, as the window of failure is too great. + */ + db->curbit = 2 * db->curbit + + ((hash & (db->hmask + 1)) ? 2 : 1); + db->hmask |= (db->hmask + 1); + + if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 + || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) + return 0; + + } while (--smax); +/* + * if we are here, this is real bad news. After SPLTMAX splits, + * we still cannot fit the key. say goodnight. + */ +#ifdef BADMESS + (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); +#endif + return 0; + +} + +/* + * the following two routines will break if + * deletions aren't taken into account. (ndbm bug) + */ +datum +sdbm_firstkey(register DBM *db) +{ + if (db == NULL) + return errno = EINVAL, nullitem; +/* + * start at page 0 + */ + (void) memset(db->pagbuf, 0, PBLKSIZ); + if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 + || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) + return ioerr(db), nullitem; + db->pagbno = 0; + db->blkptr = 0; + db->keyptr = 0; + + return getnext(db); +} + +datum +sdbm_nextkey(register DBM *db) +{ + if (db == NULL) + return errno = EINVAL, nullitem; + return getnext(db); +} + +/* + * all important binary trie traversal + */ +static int +getpage(register DBM *db, register long int hash) +{ + register int hbit; + register long dbit; + register long pagb; + + dbit = 0; + hbit = 0; + while (dbit < db->maxbno && getdbit(db, dbit)) + dbit = 2 * dbit + ((hash & ((long) 1 << hbit++)) ? 2 : 1); + + debug(("dbit: %d...", dbit)); + + db->curbit = dbit; + db->hmask = masks[hbit]; + + pagb = hash & db->hmask; +/* + * see if the block we need is already in memory. + * note: this lookaside cache has about 10% hit rate. + */ + if (pagb != db->pagbno) { +/* + * note: here, we assume a "hole" is read as 0s. + * if not, must zero pagbuf first. + */ + (void) memset(db->pagbuf, 0, PBLKSIZ); + + if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 + || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) + return 0; + if (!chkpage(db->pagbuf)) { + return 0; + } + db->pagbno = pagb; + + debug(("pag read: %d\n", pagb)); + } + return 1; +} + +static int +getdbit(register DBM *db, register long int dbit) +{ + register long c; + register long dirb; + + c = dbit / BYTESIZ; + dirb = c / DBLKSIZ; + + if (dirb != db->dirbno) { + if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 + || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) + return 0; + db->dirbno = dirb; + + debug(("dir read: %d\n", dirb)); + } + + return db->dirbuf[c % DBLKSIZ] & (1 << (dbit % BYTESIZ)); +} + +static int +setdbit(register DBM *db, register long int dbit) +{ + register long c; + register long dirb; + + c = dbit / BYTESIZ; + dirb = c / DBLKSIZ; + + if (dirb != db->dirbno) { + if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 + || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) + return 0; + db->dirbno = dirb; + + debug(("dir read: %d\n", dirb)); + } + + db->dirbuf[c % DBLKSIZ] |= (1 << (dbit % BYTESIZ)); + + if (dbit >= db->maxbno) + db->maxbno += (long) DBLKSIZ * BYTESIZ; + + if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 + || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) + return 0; + + return 1; +} + +/* + * getnext - get the next key in the page, and if done with + * the page, try the next page in sequence + */ +static datum +getnext(register DBM *db) +{ + datum key; + + for (;;) { + db->keyptr++; + key = getnkey(db->pagbuf, db->keyptr); + if (key.dptr != NULL) + return key; +/* + * we either run out, or there is nothing on this page.. + * try the next one... If we lost our position on the + * file, we will have to seek. + */ + db->keyptr = 0; + if (db->pagbno != db->blkptr++) + if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) + break; + db->pagbno = db->blkptr; + if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) + break; + if (!chkpage(db->pagbuf)) { + break; + } + } + + return ioerr(db), nullitem; +} + +/* pair.c */ +/* + * sdbm - ndbm work-alike hashed database library + * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). + * author: oz@nexus.yorku.ca + * status: public domain. + * + * page-level routines + */ + +#ifndef lint +/*char pair_rcsid[] = "$Id: _sdbm.c 23225 2009-04-19 13:34:30Z yugui $";*/ +#endif + +#ifndef BSD42 +/*#include */ +#endif + +#define exhash(item) sdbm_hash((item).dptr, (item).dsize) + +/* + * forward + */ +static int seepair proto((char *, int, char *, int)); + +/* + * page format: + * +------------------------------+ + * ino | n | keyoff | datoff | keyoff | + * +------------+--------+--------+ + * | datoff | - - - ----> | + * +--------+---------------------+ + * | F R E E A R E A | + * +--------------+---------------+ + * | <---- - - - | data | + * +--------+-----+----+----------+ + * | key | data | key | + * +--------+----------+----------+ + * + * calculating the offsets for free area: if the number + * of entries (ino[0]) is zero, the offset to the END of + * the free area is the block size. Otherwise, it is the + * nth (ino[ino[0]]) entry's offset. + */ + +static int +fitpair(char *pag, int need) +{ + register int n; + register int off; + register int free; + register short *ino = (short *) pag; + + off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ; + free = off - (n + 1) * sizeof(short); + need += 2 * sizeof(short); + + debug(("free %d need %d\n", free, need)); + + return need <= free; +} + +static void +putpair(char *pag, datum key, datum val) +{ + register int n; + register int off; + register short *ino = (short *) pag; + + off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ; +/* + * enter the key first + */ + off -= key.dsize; + if (key.dsize) + (void) memcpy(pag + off, key.dptr, key.dsize); + PUT_SHORT(ino,n + 1,off); +/* + * now the data + */ + off -= val.dsize; + if (val.dsize) + (void) memcpy(pag + off, val.dptr, val.dsize); + PUT_SHORT(ino,n + 2,off); +/* + * adjust item count + */ + PUT_SHORT(ino,0,GET_SHORT(ino,0) + 2); +} + +static datum +getpair(char *pag, datum key) +{ + register int i; + register int n; + datum val; + register short *ino = (short *) pag; + + if ((n = GET_SHORT(ino,0)) == 0) + return nullitem; + + if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) + return nullitem; + + val.dptr = pag + GET_SHORT(ino,i + 1); + val.dsize = GET_SHORT(ino,i) - GET_SHORT(ino,i + 1); + return val; +} + +#ifdef SEEDUPS +static int +duppair(char *pag, datum key) +{ + register short *ino = (short *) pag; + return GET_SHORT(ino,0) > 0 && + seepair(pag, GET_SHORT(ino,0), key.dptr, key.dsize) > 0; +} +#endif + +static datum +getnkey(char *pag, int num) +{ + datum key; + register int off; + register short *ino = (short *) pag; + + num = num * 2 - 1; + if (GET_SHORT(ino,0) == 0 || num > GET_SHORT(ino,0)) + return nullitem; + + off = (num > 1) ? GET_SHORT(ino,num - 1) : PBLKSIZ; + + key.dptr = pag + GET_SHORT(ino,num); + key.dsize = off - GET_SHORT(ino,num); + + return key; +} + +static int +delpair(char *pag, datum key) +{ + register int n; + register int i; + register short *ino = (short *) pag; + + if ((n = GET_SHORT(ino,0)) == 0) + return 0; + + if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) + return 0; +/* + * found the key. if it is the last entry + * [i.e. i == n - 1] we just adjust the entry count. + * hard case: move all data down onto the deleted pair, + * shift offsets onto deleted offsets, and adjust them. + * [note: 0 < i < n] + */ + if (i < n - 1) { + register int m; + register char *dst = pag + (i == 1 ? PBLKSIZ : GET_SHORT(ino,i - 1)); + register char *src = pag + GET_SHORT(ino,i + 1); + register int zoo = dst - src; + + debug(("free-up %d ", zoo)); +/* + * shift data/keys down + */ + m = GET_SHORT(ino,i + 1) - GET_SHORT(ino,n); +#ifdef DUFF +#define MOVB *--dst = *--src + + if (m > 0) { + register int loop = (m + 8 - 1) >> 3; + + switch (m & (8 - 1)) { + case 0: do { + MOVB; case 7: MOVB; + case 6: MOVB; case 5: MOVB; + case 4: MOVB; case 3: MOVB; + case 2: MOVB; case 1: MOVB; + } while (--loop); + } + } +#else +#ifdef MEMMOVE + memmove(dst, src, m); +#else + while (m--) + *--dst = *--src; +#endif +#endif +/* + * adjust offset index up + */ + while (i < n - 1) { + PUT_SHORT(ino,i, GET_SHORT(ino,i + 2) + zoo); + i++; + } + } + PUT_SHORT(ino, 0, GET_SHORT(ino, 0) - 2); + return 1; +} + +/* + * search for the key in the page. + * return offset index in the range 0 < i < n. + * return 0 if not found. + */ +static int +seepair(char *pag, register int n, register char *key, register int siz) +{ + register int i; + register int off = PBLKSIZ; + register short *ino = (short *) pag; + + for (i = 1; i < n; i += 2) { + if (siz == off - GET_SHORT(ino,i) && + memcmp(key, pag + GET_SHORT(ino,i), siz) == 0) + return i; + off = GET_SHORT(ino,i + 1); + } + return 0; +} + +static void +splpage(char *pag, char *new, long int sbit) +{ + datum key; + datum val; + + register int n; + register int off = PBLKSIZ; + char cur[PBLKSIZ]; + register short *ino = (short *) cur; + + (void) memcpy(cur, pag, PBLKSIZ); + (void) memset(pag, 0, PBLKSIZ); + (void) memset(new, 0, PBLKSIZ); + + n = GET_SHORT(ino,0); + for (ino++; n > 0; ino += 2) { + key.dptr = cur + GET_SHORT(ino,0); + key.dsize = off - GET_SHORT(ino,0); + val.dptr = cur + GET_SHORT(ino,1); + val.dsize = GET_SHORT(ino,0) - GET_SHORT(ino,1); +/* + * select the page pointer (by looking at sbit) and insert + */ + (void) putpair((exhash(key) & sbit) ? new : pag, key, val); + + off = GET_SHORT(ino,1); + n -= 2; + } + + debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, + ((short *) new)[0] / 2, + ((short *) pag)[0] / 2)); +} + +/* + * check page sanity: + * number of entries should be something + * reasonable, and all offsets in the index should be in order. + * this could be made more rigorous. + */ +static int +chkpage(char *pag) +{ + register int n; + register int off; + register short *ino = (short *) pag; + + if ((n = GET_SHORT(ino,0)) < 0 || n > PBLKSIZ / sizeof(short)) + return 0; + + if (n > 0) { + off = PBLKSIZ; + for (ino++; n > 0; ino += 2) { + if (GET_SHORT(ino,0) > off || GET_SHORT(ino,1) > off || + GET_SHORT(ino,1) > GET_SHORT(ino,0)) + return 0; + off = GET_SHORT(ino,1); + n -= 2; + } + } + return 1; +} + +/* hash.c */ +/* + * sdbm - ndbm work-alike hashed database library + * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). + * author: oz@nexus.yorku.ca + * status: public domain. keep it that way. + * + * hashing routine + */ + +/* + * polynomial conversion ignoring overflows + * [this seems to work remarkably well, in fact better + * then the ndbm hash function. Replace at your own risk] + * use: 65599 nice. + * 65587 even better. + */ +long +sdbm_hash(register char *str, register int len) +{ + register unsigned long n = 0; + +#ifdef DUFF + +#define HASHC n = *str++ + 65599 * n + + if (len > 0) { + register int loop = (len + 8 - 1) >> 3; + + switch(len & (8 - 1)) { + case 0: do { + HASHC; case 7: HASHC; + case 6: HASHC; case 5: HASHC; + case 4: HASHC; case 3: HASHC; + case 2: HASHC; case 1: HASHC; + } while (--loop); + } + + } +#else + while (len--) + n = ((*str++) & 255) + 65587L * n; +#endif + return n; +} diff --git a/ext/sdbm/depend b/ext/sdbm/depend new file mode 100644 index 0000000..1d8b13d --- /dev/null +++ b/ext/sdbm/depend @@ -0,0 +1,2 @@ +_sdbm.o: _sdbm.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h +init.o: init.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/sdbm/extconf.rb b/ext/sdbm/extconf.rb new file mode 100644 index 0000000..cc6c8ce --- /dev/null +++ b/ext/sdbm/extconf.rb @@ -0,0 +1,3 @@ +require 'mkmf' + +create_makefile("sdbm") diff --git a/ext/sdbm/init.c b/ext/sdbm/init.c new file mode 100644 index 0000000..d227f32 --- /dev/null +++ b/ext/sdbm/init.c @@ -0,0 +1,713 @@ +/************************************************ + + sdbminit.c - + + $Author: yugui $ + created at: Fri May 7 08:34:24 JST 1999 + + Copyright (C) 1995-2001 Yukihiro Matsumoto + +************************************************/ + +#include "ruby.h" + +#include "sdbm.h" +#include +#include + +static VALUE rb_cDBM, rb_eDBMError; + +struct dbmdata { + int di_size; + DBM *di_dbm; +}; + +static void +closed_sdbm() +{ + rb_raise(rb_eDBMError, "closed SDBM file"); +} + +#define GetDBM(obj, dbmp) {\ + Data_Get_Struct(obj, struct dbmdata, dbmp);\ + if (dbmp == 0) closed_sdbm();\ + if (dbmp->di_dbm == 0) closed_sdbm();\ +} + +#define GetDBM2(obj, data, dbm) {\ + GetDBM(obj, data);\ + (dbm) = dbmp->di_dbm;\ +} + +static void +free_sdbm(struct dbmdata *dbmp) +{ + + if (dbmp->di_dbm) sdbm_close(dbmp->di_dbm); + ruby_xfree(dbmp); +} + +static VALUE +fsdbm_close(VALUE obj) +{ + struct dbmdata *dbmp; + + GetDBM(obj, dbmp); + sdbm_close(dbmp->di_dbm); + dbmp->di_dbm = 0; + + return Qnil; +} + +static VALUE +fsdbm_closed(VALUE obj) +{ + struct dbmdata *dbmp; + + Data_Get_Struct(obj, struct dbmdata, dbmp); + if (dbmp == 0) + return Qtrue; + if (dbmp->di_dbm == 0) + return Qtrue; + + return Qfalse; +} + +static VALUE +fsdbm_alloc(VALUE klass) +{ + return Data_Wrap_Struct(klass, 0, free_sdbm, 0); +} + +static VALUE +fsdbm_initialize(int argc, VALUE *argv, VALUE obj) +{ + volatile VALUE file; + VALUE vmode; + DBM *dbm; + struct dbmdata *dbmp; + int mode; + + if (rb_scan_args(argc, argv, "11", &file, &vmode) == 1) { + mode = 0666; /* default value */ + } + else if (NIL_P(vmode)) { + mode = -1; /* return nil if DB not exist */ + } + else { + mode = NUM2INT(vmode); + } + FilePathValue(file); + + dbm = 0; + if (mode >= 0) + dbm = sdbm_open(RSTRING_PTR(file), O_RDWR|O_CREAT, mode); + if (!dbm) + dbm = sdbm_open(RSTRING_PTR(file), O_RDWR, 0); + if (!dbm) + dbm = sdbm_open(RSTRING_PTR(file), O_RDONLY, 0); + + if (!dbm) { + if (mode == -1) return Qnil; + rb_sys_fail(RSTRING_PTR(file)); + } + + dbmp = ALLOC(struct dbmdata); + DATA_PTR(obj) = dbmp; + dbmp->di_dbm = dbm; + dbmp->di_size = -1; + + return obj; +} + +static VALUE +fsdbm_s_open(int argc, VALUE *argv, VALUE klass) +{ + VALUE obj = Data_Wrap_Struct(klass, 0, free_sdbm, 0); + + if (NIL_P(fsdbm_initialize(argc, argv, obj))) { + return Qnil; + } + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, obj, fsdbm_close, obj); + } + + return obj; +} + +static VALUE +fsdbm_fetch(VALUE obj, VALUE keystr, VALUE ifnone) +{ + datum key, value; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + GetDBM2(obj, dbmp, dbm); + value = sdbm_fetch(dbm, key); + if (value.dptr == 0) { + if (ifnone == Qnil && rb_block_given_p()) + return rb_yield(rb_external_str_new(key.dptr, key.dsize)); + return ifnone; + } + return rb_external_str_new(value.dptr, value.dsize); +} + +static VALUE +fsdbm_aref(VALUE obj, VALUE keystr) +{ + return fsdbm_fetch(obj, keystr, Qnil); +} + +static VALUE +fsdbm_fetch_m(int argc, VALUE *argv, VALUE obj) +{ + VALUE keystr, valstr, ifnone; + + rb_scan_args(argc, argv, "11", &keystr, &ifnone); + valstr = fsdbm_fetch(obj, keystr, ifnone); + if (argc == 1 && !rb_block_given_p() && NIL_P(valstr)) + rb_raise(rb_eIndexError, "key not found"); + + return valstr; +} + +static VALUE +fsdbm_key(VALUE obj, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(valstr); + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + if (val.dsize == RSTRING_LEN(valstr) && + memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0) + return rb_external_str_new(key.dptr, key.dsize); + } + return Qnil; +} + +static VALUE +fsdbm_index(VALUE hash, VALUE value) +{ + rb_warn("SDBM#index is deprecated; use SDBM#key"); + return fsdbm_key(hash, value); +} + +static VALUE +fsdbm_select(VALUE obj) +{ + VALUE new = rb_ary_new(); + datum key, val; + DBM *dbm; + struct dbmdata *dbmp; + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + VALUE assoc, v; + val = sdbm_fetch(dbm, key); + assoc = rb_assoc_new(rb_external_str_new(key.dptr, key.dsize), + rb_external_str_new(val.dptr, val.dsize)); + v = rb_yield(assoc); + if (RTEST(v)) { + rb_ary_push(new, assoc); + } + GetDBM2(obj, dbmp, dbm); + } + + return new; +} + +static VALUE +fsdbm_values_at(int argc, VALUE *argv, VALUE obj) +{ + VALUE new = rb_ary_new2(argc); + int i; + + for (i=0; idi_size = -1; + + value = sdbm_fetch(dbm, key); + if (value.dptr == 0) { + if (rb_block_given_p()) return rb_yield(keystr); + return Qnil; + } + + /* need to save value before sdbm_delete() */ + valstr = rb_external_str_new(value.dptr, value.dsize); + + if (sdbm_delete(dbm, key)) { + dbmp->di_size = -1; + rb_raise(rb_eDBMError, "dbm_delete failed"); + } + else if (dbmp->di_size >= 0) { + dbmp->di_size--; + } + return valstr; +} + +static VALUE +fsdbm_shift(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE keystr, valstr; + + fdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + key = sdbm_firstkey(dbm); + if (!key.dptr) return Qnil; + val = sdbm_fetch(dbm, key); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); + sdbm_delete(dbm, key); + if (dbmp->di_size >= 0) { + dbmp->di_size--; + } + + return rb_assoc_new(keystr, valstr); +} + +static VALUE +fsdbm_delete_if(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE keystr, valstr; + VALUE ret, ary = rb_ary_new(); + int i, status = 0, n; + + fdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + n = dbmp->di_size; + dbmp->di_size = -1; + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); + ret = rb_protect(rb_yield, rb_assoc_new(rb_str_dup(keystr), valstr), &status); + if (status != 0) break; + if (RTEST(ret)) rb_ary_push(ary, keystr); + GetDBM2(obj, dbmp, dbm); + } + + for (i = 0; i < RARRAY_LEN(ary); i++) { + keystr = RARRAY_PTR(ary)[i]; + ExportStringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + if (sdbm_delete(dbm, key)) { + rb_raise(rb_eDBMError, "sdbm_delete failed"); + } + } + if (status) rb_jump_tag(status); + if (n > 0) dbmp->di_size = n - RARRAY_LEN(ary); + + return obj; +} + +static VALUE +fsdbm_clear(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + + fdbm_modify(obj); + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + while (key = sdbm_firstkey(dbm), key.dptr) { + if (sdbm_delete(dbm, key)) { + rb_raise(rb_eDBMError, "sdbm_delete failed"); + } + } + dbmp->di_size = 0; + + return obj; +} + +static VALUE +fsdbm_invert(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE keystr, valstr; + VALUE hash = rb_hash_new(); + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); + rb_hash_aset(hash, valstr, keystr); + } + return hash; +} + +static VALUE +fsdbm_store(VALUE obj, VALUE keystr, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + if (valstr == Qnil) { + fsdbm_delete(obj, keystr); + return Qnil; + } + + fdbm_modify(obj); + ExportStringValue(keystr); + ExportStringValue(valstr); + + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + dbmp->di_size = -1; + if (sdbm_store(dbm, key, val, DBM_REPLACE)) { +#ifdef HAVE_DBM_CLAERERR + sdbm_clearerr(dbm); +#endif + if (errno == EPERM) rb_sys_fail(0); + rb_raise(rb_eDBMError, "sdbm_store failed"); + } + + return valstr; +} + +static VALUE +update_i(VALUE pair, VALUE dbm) +{ + Check_Type(pair, T_ARRAY); + if (RARRAY_LEN(pair) < 2) { + rb_raise(rb_eArgError, "pair must be [key, value]"); + } + fsdbm_store(dbm, RARRAY_PTR(pair)[0], RARRAY_PTR(pair)[1]); + return Qnil; +} + +static VALUE +fsdbm_update(VALUE obj, VALUE other) +{ + rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj); + return obj; +} + +static VALUE +fsdbm_replace(VALUE obj, VALUE other) +{ + fsdbm_clear(obj); + rb_block_call(other, rb_intern("each_pair"), 0, 0, update_i, obj); + return obj; +} + +static VALUE +fsdbm_length(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + int i = 0; + + GetDBM2(obj, dbmp, dbm); + if (dbmp->di_size > 0) return INT2FIX(dbmp->di_size); + + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + i++; + } + dbmp->di_size = i; + + return INT2FIX(i); +} + +static VALUE +fsdbm_empty_p(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + int i = 0; + + GetDBM(obj, dbmp); + if (dbmp->di_size < 0) { + dbm = dbmp->di_dbm; + + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + i++; + } + } + else { + i = dbmp->di_size; + } + if (i == 0) return Qtrue; + return Qfalse; +} + +static VALUE +fsdbm_each_value(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + rb_yield(rb_external_str_new(val.dptr, val.dsize)); + GetDBM2(obj, dbmp, dbm); + } + return obj; +} + +static VALUE +fsdbm_each_key(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + rb_yield(rb_external_str_new(key.dptr, key.dsize)); + GetDBM2(obj, dbmp, dbm); + } + return obj; +} + +static VALUE +fsdbm_each_pair(VALUE obj) +{ + datum key, val; + DBM *dbm; + struct dbmdata *dbmp; + VALUE keystr, valstr; + + RETURN_ENUMERATOR(obj, 0, 0); + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + keystr = rb_external_str_new(key.dptr, key.dsize); + valstr = rb_external_str_new(val.dptr, val.dsize); + rb_yield(rb_assoc_new(keystr, valstr)); + GetDBM2(obj, dbmp, dbm); + } + + return obj; +} + +static VALUE +fsdbm_keys(VALUE obj) +{ + datum key; + struct dbmdata *dbmp; + DBM *dbm; + VALUE ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + rb_ary_push(ary, rb_external_str_new(key.dptr, key.dsize)); + } + + return ary; +} + +static VALUE +fsdbm_values(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + rb_ary_push(ary, rb_external_str_new(val.dptr, val.dsize)); + } + + return ary; +} + +static VALUE +fsdbm_has_key(VALUE obj, VALUE keystr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(keystr); + key.dptr = RSTRING_PTR(keystr); + key.dsize = RSTRING_LEN(keystr); + + GetDBM2(obj, dbmp, dbm); + val = sdbm_fetch(dbm, key); + if (val.dptr) return Qtrue; + return Qfalse; +} + +static VALUE +fsdbm_has_value(VALUE obj, VALUE valstr) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + + ExportStringValue(valstr); + val.dptr = RSTRING_PTR(valstr); + val.dsize = RSTRING_LEN(valstr); + + GetDBM2(obj, dbmp, dbm); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + if (val.dsize == RSTRING_LEN(valstr) && + memcmp(val.dptr, RSTRING_PTR(valstr), val.dsize) == 0) + return Qtrue; + } + return Qfalse; +} + +static VALUE +fsdbm_to_a(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE ary; + + GetDBM2(obj, dbmp, dbm); + ary = rb_ary_new(); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + rb_ary_push(ary, rb_assoc_new(rb_external_str_new(key.dptr, key.dsize), + rb_external_str_new(val.dptr, val.dsize))); + } + + return ary; +} + +static VALUE +fsdbm_to_hash(VALUE obj) +{ + datum key, val; + struct dbmdata *dbmp; + DBM *dbm; + VALUE hash; + + GetDBM2(obj, dbmp, dbm); + hash = rb_hash_new(); + for (key = sdbm_firstkey(dbm); key.dptr; key = sdbm_nextkey(dbm)) { + val = sdbm_fetch(dbm, key); + rb_hash_aset(hash, rb_external_str_new(key.dptr, key.dsize), + rb_external_str_new(val.dptr, val.dsize)); + } + + return hash; +} + +static VALUE +fsdbm_reject(VALUE obj) +{ + return rb_hash_delete_if(fsdbm_to_hash(obj)); +} + +void +Init_sdbm() +{ + rb_cDBM = rb_define_class("SDBM", rb_cObject); + rb_eDBMError = rb_define_class("SDBMError", rb_eStandardError); + rb_include_module(rb_cDBM, rb_mEnumerable); + + rb_define_alloc_func(rb_cDBM, fsdbm_alloc); + rb_define_singleton_method(rb_cDBM, "open", fsdbm_s_open, -1); + + rb_define_method(rb_cDBM, "initialize", fsdbm_initialize, -1); + rb_define_method(rb_cDBM, "close", fsdbm_close, 0); + rb_define_method(rb_cDBM, "closed?", fsdbm_closed, 0); + rb_define_method(rb_cDBM, "[]", fsdbm_aref, 1); + rb_define_method(rb_cDBM, "fetch", fsdbm_fetch_m, -1); + rb_define_method(rb_cDBM, "[]=", fsdbm_store, 2); + rb_define_method(rb_cDBM, "store", fsdbm_store, 2); + rb_define_method(rb_cDBM, "index", fsdbm_index, 1); + rb_define_method(rb_cDBM, "key", fsdbm_key, 1); + rb_define_method(rb_cDBM, "select", fsdbm_select, 0); + rb_define_method(rb_cDBM, "values_at", fsdbm_values_at, -1); + rb_define_method(rb_cDBM, "length", fsdbm_length, 0); + rb_define_method(rb_cDBM, "size", fsdbm_length, 0); + rb_define_method(rb_cDBM, "empty?", fsdbm_empty_p, 0); + rb_define_method(rb_cDBM, "each", fsdbm_each_pair, 0); + rb_define_method(rb_cDBM, "each_value", fsdbm_each_value, 0); + rb_define_method(rb_cDBM, "each_key", fsdbm_each_key, 0); + rb_define_method(rb_cDBM, "each_pair", fsdbm_each_pair, 0); + rb_define_method(rb_cDBM, "keys", fsdbm_keys, 0); + rb_define_method(rb_cDBM, "values", fsdbm_values, 0); + rb_define_method(rb_cDBM, "shift", fsdbm_shift, 0); + rb_define_method(rb_cDBM, "delete", fsdbm_delete, 1); + rb_define_method(rb_cDBM, "delete_if", fsdbm_delete_if, 0); + rb_define_method(rb_cDBM, "reject!", fsdbm_delete_if, 0); + rb_define_method(rb_cDBM, "reject", fsdbm_reject, 0); + rb_define_method(rb_cDBM, "clear", fsdbm_clear, 0); + rb_define_method(rb_cDBM,"invert", fsdbm_invert, 0); + rb_define_method(rb_cDBM,"update", fsdbm_update, 1); + rb_define_method(rb_cDBM,"replace", fsdbm_replace, 1); + + rb_define_method(rb_cDBM, "include?", fsdbm_has_key, 1); + rb_define_method(rb_cDBM, "has_key?", fsdbm_has_key, 1); + rb_define_method(rb_cDBM, "member?", fsdbm_has_key, 1); + rb_define_method(rb_cDBM, "has_value?", fsdbm_has_value, 1); + rb_define_method(rb_cDBM, "key?", fsdbm_has_key, 1); + rb_define_method(rb_cDBM, "value?", fsdbm_has_value, 1); + + rb_define_method(rb_cDBM, "to_a", fsdbm_to_a, 0); + rb_define_method(rb_cDBM, "to_hash", fsdbm_to_hash, 0); +} diff --git a/ext/sdbm/sdbm.h b/ext/sdbm/sdbm.h new file mode 100644 index 0000000..0c7ed1b --- /dev/null +++ b/ext/sdbm/sdbm.h @@ -0,0 +1,84 @@ +/* + * sdbm - ndbm work-alike hashed database library + * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978). + * author: oz@nexus.yorku.ca + * status: public domain. + */ +#ifndef _SDBM_H_ +#define _SDBM_H_ + +#define DBLKSIZ 4096 +#define PBLKSIZ 1024 +#define PAIRMAX 1008 /* arbitrary on PBLKSIZ-N */ +#define SPLTMAX 10 /* maximum allowed splits */ + /* for a single insertion */ +#define DIRFEXT ".dir" +#define PAGFEXT ".pag" + +typedef struct { + int dirf; /* directory file descriptor */ + int pagf; /* page file descriptor */ + int flags; /* status/error flags, see below */ + long maxbno; /* size of dirfile in bits */ + long curbit; /* current bit number */ + long hmask; /* current hash mask */ + long blkptr; /* current block for nextkey */ + int keyptr; /* current key for nextkey */ + long blkno; /* current page to read/write */ + long pagbno; /* current page in pagbuf */ + char pagbuf[PBLKSIZ]; /* page file block buffer */ + long dirbno; /* current block in dirbuf */ + char dirbuf[DBLKSIZ]; /* directory file block buffer */ +} DBM; + +#define DBM_RDONLY 0x1 /* data base open read-only */ +#define DBM_IOERR 0x2 /* data base I/O error */ + +/* + * utility macros + */ +#define sdbm_rdonly(db) ((db)->flags & DBM_RDONLY) +#define sdbm_error(db) ((db)->flags & DBM_IOERR) + +#define sdbm_clearerr(db) ((db)->flags &= ~DBM_IOERR) /* ouch */ + +#define sdbm_dirfno(db) ((db)->dirf) +#define sdbm_pagfno(db) ((db)->pagf) + +typedef struct { + char *dptr; + int dsize; +} datum; + +extern datum nullitem; + +#if defined(__STDC__) +#define proto(p) p +#else +#define proto(p) () +#endif + +/* + * flags to sdbm_store + */ +#define DBM_INSERT 0 +#define DBM_REPLACE 1 + +/* + * ndbm interface + */ +extern DBM *sdbm_open proto((char *, int, int)); +extern void sdbm_close proto((DBM *)); +extern datum sdbm_fetch proto((DBM *, datum)); +extern int sdbm_delete proto((DBM *, datum)); +extern int sdbm_store proto((DBM *, datum, datum, int)); +extern datum sdbm_firstkey proto((DBM *)); +extern datum sdbm_nextkey proto((DBM *)); + +/* + * other + */ +extern DBM *sdbm_prep proto((char *, char *, int, int)); +extern long sdbm_hash proto((char *, int)); + +#endif /* _SDBM_H_ */ diff --git a/ext/stringio/README b/ext/stringio/README new file mode 100644 index 0000000..5fe2ab2 --- /dev/null +++ b/ext/stringio/README @@ -0,0 +1,18 @@ +-*- rd -*- +$Author: akr $ + +=begin + += StringIO +Pseudo (({IO})) class from/to (({String})). + +This library is based on MoonWolf version written in Ruby. Thanks a lot. + += Differences to (({IO})) + +* not implemented: (({fcntl})), (({reopen})). +* (({fileno})) returns nil. +* (({pos=})) returns new position, not 0. +* (({ungetc})) does nothing at start of the string. + +=end diff --git a/ext/stringio/depend b/ext/stringio/depend new file mode 100644 index 0000000..338ebde --- /dev/null +++ b/ext/stringio/depend @@ -0,0 +1,3 @@ +stringio.o: stringio.c $(hdrdir)/ruby/ruby.h $(arch_hdrdir)/ruby/config.h \ + $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/io.h \ + $(hdrdir)/ruby/encoding.h diff --git a/ext/stringio/extconf.rb b/ext/stringio/extconf.rb new file mode 100644 index 0000000..8fc84b3 --- /dev/null +++ b/ext/stringio/extconf.rb @@ -0,0 +1,2 @@ +require 'mkmf' +create_makefile('stringio') diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c new file mode 100644 index 0000000..fd114bf --- /dev/null +++ b/ext/stringio/stringio.c @@ -0,0 +1,1398 @@ +/********************************************************************** + + stringio.c - + + $Author: yugui $ + $RoughId: stringio.c,v 1.13 2002/03/14 03:24:18 nobu Exp $ + created at: Tue Feb 19 04:10:38 JST 2002 + + All the files in this distribution are covered under the Ruby's + license (see the file COPYING). + +**********************************************************************/ + +#include "ruby.h" +#include "ruby/io.h" +#include "ruby/encoding.h" +#if defined(HAVE_FCNTL_H) || defined(_WIN32) +#include +#elif defined(HAVE_SYS_FCNTL_H) +#include +#endif + +struct StringIO { + VALUE string; + long pos; + long lineno; + int flags; + int count; +}; + +static void strio_mark _((struct StringIO *)); +static void strio_free _((struct StringIO *)); +static void strio_init(int, VALUE *, struct StringIO *); + +#define IS_STRIO(obj) (RDATA(obj)->dmark == (RUBY_DATA_FUNC)strio_mark) +#define error_inval(msg) (errno = EINVAL, rb_sys_fail(msg)) + +static struct StringIO * +strio_alloc() +{ + struct StringIO *ptr = ALLOC(struct StringIO); + ptr->string = Qnil; + ptr->pos = 0; + ptr->lineno = 0; + ptr->flags = 0; + ptr->count = 1; + return ptr; +} + +static void +strio_mark(struct StringIO *ptr) +{ + if (ptr) { + rb_gc_mark(ptr->string); + } +} + +static void +strio_free(struct StringIO *ptr) +{ + if (--ptr->count <= 0) { + xfree(ptr); + } +} + +static struct StringIO* +check_strio(VALUE self) +{ + Check_Type(self, T_DATA); + if (!IS_STRIO(self)) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected StringIO)", + rb_class2name(CLASS_OF(self))); + } + return DATA_PTR(self); +} + +static struct StringIO* +get_strio(VALUE self) +{ + struct StringIO *ptr = check_strio(self); + + if (!ptr) { + rb_raise(rb_eIOError, "uninitialized stream"); + } + return ptr; +} + +static VALUE +strio_substr(struct StringIO *ptr, int pos, int len) +{ + VALUE str = ptr->string; + rb_encoding *enc = rb_enc_get(str); + int rlen = RSTRING_LEN(str) - pos; + + if (len > rlen) len = rlen; + if (len < 0) len = 0; + return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc); +} + +#define StringIO(obj) get_strio(obj) + +#define CLOSED(ptr) (!((ptr)->flags & FMODE_READWRITE)) +#define READABLE(ptr) ((ptr)->flags & FMODE_READABLE) +#define WRITABLE(ptr) ((ptr)->flags & FMODE_WRITABLE) + +static struct StringIO* +readable(struct StringIO *ptr) +{ + if (!READABLE(ptr)) { + rb_raise(rb_eIOError, "not opened for reading"); + } + return ptr; +} + +static struct StringIO* +writable(struct StringIO *ptr) +{ + if (!WRITABLE(ptr)) { + rb_raise(rb_eIOError, "not opened for writing"); + } + if (!OBJ_TAINTED(ptr->string)) { + rb_secure(4); + } + return ptr; +} + +static void +check_modifiable(struct StringIO *ptr) +{ + if (OBJ_FROZEN(ptr->string)) { + rb_raise(rb_eIOError, "not modifiable string"); + } +} + +static VALUE +strio_s_allocate(VALUE klass) +{ + return Data_Wrap_Struct(klass, strio_mark, strio_free, 0); +} + +/* + * call-seq: StringIO.new(string=""[, mode]) + * + * Creates new StringIO instance from with _string_ and _mode_. + */ +static VALUE +strio_initialize(int argc, VALUE *argv, VALUE self) +{ + struct StringIO *ptr = check_strio(self); + + if (!ptr) { + DATA_PTR(self) = ptr = strio_alloc(); + } + rb_call_super(0, 0); + strio_init(argc, argv, ptr); + return self; +} + +static void +strio_init(int argc, VALUE *argv, struct StringIO *ptr) +{ + VALUE string, mode; + int trunc = Qfalse; + + switch (rb_scan_args(argc, argv, "02", &string, &mode)) { + case 2: + if (FIXNUM_P(mode)) { + int flags = FIX2INT(mode); + ptr->flags = rb_io_modenum_flags(flags); + trunc = flags & O_TRUNC; + } + else { + const char *m = StringValueCStr(mode); + ptr->flags = rb_io_mode_flags(m); + trunc = *m == 'w'; + } + StringValue(string); + if ((ptr->flags & FMODE_WRITABLE) && OBJ_FROZEN(string)) { + errno = EACCES; + rb_sys_fail(0); + } + if (trunc) { + rb_str_resize(string, 0); + } + break; + case 1: + StringValue(string); + ptr->flags = OBJ_FROZEN(string) ? FMODE_READABLE : FMODE_READWRITE; + break; + case 0: + string = rb_enc_str_new("", 0, rb_default_external_encoding()); + ptr->flags = FMODE_READWRITE; + break; + } + ptr->string = string; + ptr->pos = 0; + ptr->lineno = 0; +} + +static VALUE +strio_finalize(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + ptr->string = Qnil; + ptr->flags &= ~FMODE_READWRITE; + return self; +} + +/* + * call-seq: StringIO.open(string=""[, mode]) {|strio| ...} + * + * Equivalent to StringIO.new except that when it is called with a block, it + * yields with the new instance and closes it, and returns the result which + * returned from the block. + */ +static VALUE +strio_s_open(int argc, VALUE *argv, VALUE klass) +{ + VALUE obj = rb_class_new_instance(argc, argv, klass); + if (!rb_block_given_p()) return obj; + return rb_ensure(rb_yield, obj, strio_finalize, obj); +} + +/* + * Returns +false+. Just for compatibility to IO. + */ +static VALUE +strio_false(VALUE self) +{ + StringIO(self); + return Qfalse; +} + +/* + * Returns +nil+. Just for compatibility to IO. + */ +static VALUE +strio_nil(VALUE self) +{ + StringIO(self); + return Qnil; +} + +/* + * Returns *strio* itself. Just for compatibility to IO. + */ +static VALUE +strio_self(VALUE self) +{ + StringIO(self); + return self; +} + +/* + * Returns 0. Just for compatibility to IO. + */ +static VALUE +strio_0(VALUE self) +{ + StringIO(self); + return INT2FIX(0); +} + +/* + * Returns the argument unchanged. Just for compatibility to IO. + */ +static VALUE +strio_first(VALUE self, VALUE arg) +{ + StringIO(self); + return arg; +} + +/* + * Raises NotImplementedError. + */ +static VALUE +strio_unimpl(int argc, VALUE *argv, VALUE self) +{ + StringIO(self); + rb_notimplement(); + return Qnil; /* not reached */ +} + +/* + * call-seq: strio.string -> string + * + * Returns underlying String object, the subject of IO. + */ +static VALUE +strio_get_string(VALUE self) +{ + return StringIO(self)->string; +} + +/* + * call-seq: + * strio.string = string -> string + * + * Changes underlying String object, the subject of IO. + */ +static VALUE +strio_set_string(VALUE self, VALUE string) +{ + struct StringIO *ptr = StringIO(self); + + if (!OBJ_TAINTED(self)) rb_secure(4); + ptr->flags &= ~FMODE_READWRITE; + StringValue(string); + ptr->flags = OBJ_FROZEN(string) ? FMODE_READABLE : FMODE_READWRITE; + ptr->pos = 0; + ptr->lineno = 0; + return ptr->string = string; +} + +/* + * call-seq: + * strio.close -> nil + * + * Closes strio. The *strio* is unavailable for any further data + * operations; an +IOError+ is raised if such an attempt is made. + */ +static VALUE +strio_close(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + if (CLOSED(ptr)) { + rb_raise(rb_eIOError, "closed stream"); + } + ptr->flags &= ~FMODE_READWRITE; + return Qnil; +} + +/* + * call-seq: + * strio.close_read -> nil + * + * Closes the read end of a StringIO. Will raise an +IOError+ if the + * *strio* is not readable. + */ +static VALUE +strio_close_read(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + if (!READABLE(ptr)) { + rb_raise(rb_eIOError, "closing non-duplex IO for reading"); + } + ptr->flags &= ~FMODE_READABLE; + return Qnil; +} + +/* + * call-seq: + * strio.close_write -> nil + * + * Closes the write end of a StringIO. Will raise an +IOError+ if the + * *strio* is not writeable. + */ +static VALUE +strio_close_write(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + if (!WRITABLE(ptr)) { + rb_raise(rb_eIOError, "closing non-duplex IO for writing"); + } + ptr->flags &= ~FMODE_WRITABLE; + return Qnil; +} + +/* + * call-seq: + * strio.closed? -> true or false + * + * Returns +true+ if *strio* is completely closed, +false+ otherwise. + */ +static VALUE +strio_closed(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + if (!CLOSED(ptr)) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * strio.closed_read? -> true or false + * + * Returns +true+ if *strio* is not readable, +false+ otherwise. + */ +static VALUE +strio_closed_read(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + if (READABLE(ptr)) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * strio.closed_write? -> true or false + * + * Returns +true+ if *strio* is not writable, +false+ otherwise. + */ +static VALUE +strio_closed_write(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + if (WRITABLE(ptr)) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * strio.eof -> true or false + * strio.eof? -> true or false + * + * Returns true if *strio* is at end of file. The stringio must be + * opened for reading or an +IOError+ will be raised. + */ +static VALUE +strio_eof(VALUE self) +{ + struct StringIO *ptr = readable(StringIO(self)); + if (ptr->pos < RSTRING_LEN(ptr->string)) return Qfalse; + return Qtrue; +} + +/* :nodoc: */ +static VALUE +strio_copy(VALUE copy, VALUE orig) +{ + struct StringIO *ptr; + + orig = rb_convert_type(orig, T_DATA, "StringIO", "to_strio"); + if (copy == orig) return copy; + ptr = StringIO(orig); + if (check_strio(copy)) { + strio_free(DATA_PTR(copy)); + } + DATA_PTR(copy) = ptr; + OBJ_INFECT(copy, orig); + ++ptr->count; + return copy; +} + +/* + * call-seq: + * strio.lineno -> integer + * + * Returns the current line number in *strio*. The stringio must be + * opened for reading. +lineno+ counts the number of times +gets+ is + * called, rather than the number of newlines encountered. The two + * values will differ if +gets+ is called with a separator other than + * newline. See also the $. variable. + */ +static VALUE +strio_get_lineno(VALUE self) +{ + return LONG2NUM(StringIO(self)->lineno); +} + +/* + * call-seq: + * strio.lineno = integer -> integer + * + * Manually sets the current line number to the given value. + * $. is updated only on the next read. + */ +static VALUE +strio_set_lineno(VALUE self, VALUE lineno) +{ + StringIO(self)->lineno = NUM2LONG(lineno); + return lineno; +} + +/* call-seq: strio.binmode -> true */ +#define strio_binmode strio_self + +/* call-seq: strio.fcntl */ +#define strio_fcntl strio_unimpl + +/* call-seq: strio.flush -> strio */ +#define strio_flush strio_self + +/* call-seq: strio.fsync -> 0 */ +#define strio_fsync strio_0 + +/* + * call-seq: + * strio.reopen(other_StrIO) -> strio + * strio.reopen(string, mode) -> strio + * + * Reinitializes *strio* with the given other_StrIO or _string_ + * and _mode_ (see StringIO#new). + */ +static VALUE +strio_reopen(int argc, VALUE *argv, VALUE self) +{ + if (!OBJ_TAINTED(self)) rb_secure(4); + if (argc == 1 && TYPE(*argv) != T_STRING) { + return strio_copy(self, *argv); + } + strio_init(argc, argv, StringIO(self)); + return self; +} + +/* + * call-seq: + * strio.pos -> integer + * strio.tell -> integer + * + * Returns the current offset (in bytes) of *strio*. + */ +static VALUE +strio_get_pos(VALUE self) +{ + return LONG2NUM(StringIO(self)->pos); +} + +/* + * call-seq: + * strio.pos = integer -> integer + * + * Seeks to the given position (in bytes) in *strio*. + */ +static VALUE +strio_set_pos(VALUE self, VALUE pos) +{ + struct StringIO *ptr = StringIO(self); + long p = NUM2LONG(pos); + if (p < 0) { + error_inval(0); + } + ptr->pos = p; + return pos; +} + +/* + * call-seq: + * strio.rewind -> 0 + * + * Positions *strio* to the beginning of input, resetting + * +lineno+ to zero. + */ +static VALUE +strio_rewind(VALUE self) +{ + struct StringIO *ptr = StringIO(self); + ptr->pos = 0; + ptr->lineno = 0; + return INT2FIX(0); +} + +/* + * call-seq: + * strio.seek(amount, whence=SEEK_SET) -> 0 + * + * Seeks to a given offset _amount_ in the stream according to + * the value of _whence_ (see IO#seek). + */ +static VALUE +strio_seek(int argc, VALUE *argv, VALUE self) +{ + VALUE whence; + struct StringIO *ptr = StringIO(self); + long offset; + + rb_scan_args(argc, argv, "11", NULL, &whence); + offset = NUM2LONG(argv[0]); + if (CLOSED(ptr)) { + rb_raise(rb_eIOError, "closed stream"); + } + switch (NIL_P(whence) ? 0 : NUM2LONG(whence)) { + case 0: + break; + case 1: + offset += ptr->pos; + break; + case 2: + offset += RSTRING_LEN(ptr->string); + break; + default: + error_inval("invalid whence"); + } + if (offset < 0) { + error_inval(0); + } + ptr->pos = offset; + return INT2FIX(0); +} + +/* + * call-seq: + * strio.sync -> true + * + * Returns +true+ always. + */ +static VALUE +strio_get_sync(VALUE self) +{ + StringIO(self); + return Qtrue; +} + +/* call-seq: strio.sync = boolean -> boolean */ +#define strio_set_sync strio_first + +#define strio_tell strio_get_pos + +/* + * call-seq: + * strio.each_byte {|byte| block } -> strio + * + * See IO#each_byte. + */ +static VALUE +strio_each_byte(VALUE self) +{ + struct StringIO *ptr = readable(StringIO(self)); + + RETURN_ENUMERATOR(self, 0, 0); + + while (ptr->pos < RSTRING_LEN(ptr->string)) { + char c = RSTRING_PTR(ptr->string)[ptr->pos++]; + rb_yield(CHR2FIX(c)); + } + return self; +} + +/* + * call-seq: + * strio.getc -> string or nil + * + * See IO#getc. + */ +static VALUE +strio_getc(VALUE self) +{ + struct StringIO *ptr = readable(StringIO(self)); + rb_encoding *enc = rb_enc_get(ptr->string); + int len; + char *p; + + if (ptr->pos >= RSTRING_LEN(ptr->string)) { + return Qnil; + } + p = RSTRING_PTR(ptr->string)+ptr->pos; + len = rb_enc_mbclen(p, RSTRING_END(ptr->string), enc); + ptr->pos += len; + return rb_enc_str_new(p, len, rb_enc_get(ptr->string)); +} + +/* + * call-seq: + * strio.getbyte -> fixnum or nil + * + * See IO#getbyte. + */ +static VALUE +strio_getbyte(VALUE self) +{ + struct StringIO *ptr = readable(StringIO(self)); + int c; + if (ptr->pos >= RSTRING_LEN(ptr->string)) { + return Qnil; + } + c = RSTRING_PTR(ptr->string)[ptr->pos++]; + return CHR2FIX(c); +} + +static void +strio_extend(struct StringIO *ptr, long pos, long len) +{ + long olen; + + check_modifiable(ptr); + olen = RSTRING_LEN(ptr->string); + if (pos + len > olen) { + rb_str_resize(ptr->string, pos + len); + if (pos > olen) + MEMZERO(RSTRING_PTR(ptr->string) + olen, char, pos - olen); + } + else { + rb_str_modify(ptr->string); + } +} + +/* + * call-seq: + * strio.ungetc(string) -> nil + * + * Pushes back one character (passed as a parameter) onto *strio* + * such that a subsequent buffered read will return it. Pushing back + * behind the beginning of the buffer string is not possible. Nothing + * will be done if such an attempt is made. + * In other case, there is no limitation for multiple pushbacks. + */ +static VALUE +strio_ungetc(VALUE self, VALUE c) +{ + struct StringIO *ptr = readable(StringIO(self)); + long lpos, clen; + char *p, *pend; + rb_encoding *enc, *enc2; + + if (NIL_P(c)) return Qnil; + if (FIXNUM_P(c)) { + int cc = FIX2INT(c); + char buf[16]; + + enc = rb_enc_get(ptr->string); + rb_enc_mbcput(cc, buf, enc); + c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc); + } + else { + SafeStringValue(c); + enc = rb_enc_get(ptr->string); + enc2 = rb_enc_get(c); + if (enc != enc2 && enc != rb_ascii8bit_encoding()) { + c = rb_str_conv_enc(c, enc2, enc); + } + } + /* get logical position */ + lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos; + for (;;) { + clen = rb_enc_mbclen(p, pend, enc); + if (p+clen >= pend) break; + p += clen; + lpos++; + } + clen = p - RSTRING_PTR(ptr->string); + rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c); + ptr->pos = clen; + + return Qnil; +} + +/* + * call-seq: + * strio.ungetbyte(fixnum) -> nil + * + * See IO#ungetbyte + */ +static VALUE +strio_ungetbyte(VALUE self, VALUE c) +{ + struct StringIO *ptr = readable(StringIO(self)); + char buf[1], *cp = buf; + long pos = ptr->pos, cl = 1; + VALUE str = ptr->string; + + if (NIL_P(c)) return Qnil; + if (FIXNUM_P(c)) { + buf[0] = (char)FIX2INT(c); + } + else { + SafeStringValue(c); + cp = RSTRING_PTR(c); + cl = RSTRING_LEN(c); + if (cl == 0) return Qnil; + } + rb_str_modify(str); + if (cl > pos) { + char *s; + long rest = RSTRING_LEN(str) - pos; + rb_str_resize(str, rest + cl); + s = RSTRING_PTR(str); + memmove(s + cl, s + pos, rest); + pos = 0; + } + else { + pos -= cl; + } + memcpy(RSTRING_PTR(str) + pos, cp, cl); + ptr->pos = pos; + RB_GC_GUARD(c); + return Qnil; +} + +/* + * call-seq: + * strio.readchar -> string + * + * See IO#readchar. + */ +static VALUE +strio_readchar(VALUE self) +{ + VALUE c = strio_getc(self); + if (NIL_P(c)) rb_eof_error(); + return c; +} + +/* + * call-seq: + * strio.readbyte -> fixnum + * + * See IO#readbyte. + */ +static VALUE +strio_readbyte(VALUE self) +{ + VALUE c = strio_getbyte(self); + if (NIL_P(c)) rb_eof_error(); + return c; +} + +/* + * call-seq: + * strio.each_char {|char| block } -> strio + * + * See IO#each_char. + */ +static VALUE +strio_each_char(VALUE self) +{ + VALUE c; + + RETURN_ENUMERATOR(self, 0, 0); + + while (!NIL_P(c = strio_getc(self))) { + rb_yield(c); + } + return self; +} + +/* Boyer-Moore search: copied from regex.c */ +static void +bm_init_skip(long *skip, const char *pat, long m) +{ + int c; + + for (c = 0; c < (1 << CHAR_BIT); c++) { + skip[c] = m; + } + while (--m) { + skip[(unsigned char)*pat++] = m; + } +} + +static long +bm_search(const char *little, long llen, const char *big, long blen, const long *skip) +{ + long i, j, k; + + i = llen - 1; + while (i < blen) { + k = i; + j = llen - 1; + while (j >= 0 && big[k] == little[j]) { + k--; + j--; + } + if (j < 0) return k + 1; + i += skip[(unsigned char)big[i]]; + } + return -1; +} + +static VALUE +strio_getline(int argc, VALUE *argv, struct StringIO *ptr) +{ + const char *s, *e, *p; + long n, limit = 0; + VALUE str; + + if (argc == 0) { + str = rb_rs; + } + else { + VALUE lim, tmp; + + rb_scan_args(argc, argv, "11", &str, &lim); + if (!NIL_P(lim)) limit = NUM2LONG(lim); + else if (!NIL_P(str) && TYPE(str) != T_STRING) { + tmp = rb_check_string_type(str); + if (NIL_P(tmp)) { + limit = NUM2LONG(str); + if (limit == 0) return rb_str_new(0,0); + str = rb_rs; + } + else { + str = tmp; + } + } + else if (!NIL_P(str)) { + StringValue(str); + } + } + + if (ptr->pos >= (n = RSTRING_LEN(ptr->string))) { + return Qnil; + } + s = RSTRING_PTR(ptr->string); + e = s + RSTRING_LEN(ptr->string); + s += ptr->pos; + if (limit > 0 && s + limit < e) { + e = s + limit; + } + if (NIL_P(str)) { + str = strio_substr(ptr, ptr->pos, e - s); + } + else if ((n = RSTRING_LEN(str)) == 0) { + p = s; + while (*p == '\n') { + if (++p == e) { + return Qnil; + } + } + s = p; + while ((p = memchr(p, '\n', e - p)) && (p != e)) { + if (*++p == '\n') { + e = p + 1; + break; + } + } + str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s); + } + else if (n == 1) { + if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) { + e = p + 1; + } + str = strio_substr(ptr, ptr->pos, e - s); + } + else { + if (n < e - s) { + if (e - s < 1024) { + for (p = s; p + n <= e; ++p) { + if (MEMCMP(p, RSTRING_PTR(str), char, n) == 0) { + e = p + n; + break; + } + } + } + else { + long skip[1 << CHAR_BIT], pos; + p = RSTRING_PTR(str); + bm_init_skip(skip, p, n); + if ((pos = bm_search(p, n, s, e - s, skip)) >= 0) { + e = s + pos + n; + } + } + } + str = strio_substr(ptr, ptr->pos, e - s); + } + ptr->pos = e - RSTRING_PTR(ptr->string); + ptr->lineno++; + return str; +} + +/* + * call-seq: + * strio.gets(sep=$/) -> string or nil + * strio.gets(limit) -> string or nil + * strio.gets(sep, limit) -> string or nil + * + * See IO#gets. + */ +static VALUE +strio_gets(int argc, VALUE *argv, VALUE self) +{ + VALUE str = strio_getline(argc, argv, readable(StringIO(self))); + + rb_lastline_set(str); + return str; +} + +/* + * call-seq: + * strio.readline(sep=$/) -> string + * strio.readline(limit) -> string or nil + * strio.readline(sep, limit) -> string or nil + * + * See IO#readline. + */ +static VALUE +strio_readline(int argc, VALUE *argv, VALUE self) +{ + VALUE line = strio_gets(argc, argv, self); + if (NIL_P(line)) rb_eof_error(); + return line; +} + +/* + * call-seq: + * strio.each(sep=$/) {|line| block } -> strio + * strio.each(limit) {|line| block } -> strio + * strio.each(sep, limit) {|line| block } -> strio + * strio.each_line(sep=$/) {|line| block } -> strio + * strio.each_line(limit) {|line| block } -> strio + * strio.each_line(sep,limit) {|line| block } -> strio + * + * See IO#each. + */ +static VALUE +strio_each(int argc, VALUE *argv, VALUE self) +{ + struct StringIO *ptr = StringIO(self); + VALUE line; + + RETURN_ENUMERATOR(self, argc, argv); + + while (!NIL_P(line = strio_getline(argc, argv, readable(ptr)))) { + rb_yield(line); + } + return self; +} + +/* + * call-seq: + * strio.readlines(sep=$/) -> array + * strio.readlines(limit) -> array + * strio.readlines(sep,limit) -> array + * + * See IO#readlines. + */ +static VALUE +strio_readlines(int argc, VALUE *argv, VALUE self) +{ + struct StringIO *ptr = StringIO(self); + VALUE ary = rb_ary_new(), line; + while (!NIL_P(line = strio_getline(argc, argv, readable(ptr)))) { + rb_ary_push(ary, line); + } + return ary; +} + +/* + * call-seq: + * strio.write(string) -> integer + * strio.syswrite(string) -> integer + * + * Appends the given string to the underlying buffer string of *strio*. + * The stream must be opened for writing. If the argument is not a + * string, it will be converted to a string using to_s. + * Returns the number of bytes written. See IO#write. + */ +static VALUE +strio_write(VALUE self, VALUE str) +{ + struct StringIO *ptr = writable(StringIO(self)); + long len, olen; + rb_encoding *enc, *enc2; + + if (TYPE(str) != T_STRING) + str = rb_obj_as_string(str); + enc = rb_enc_get(ptr->string); + enc2 = rb_enc_get(str); + if (enc != enc2 && enc != rb_ascii8bit_encoding()) { + str = rb_str_conv_enc(str, enc2, enc); + } + len = RSTRING_LEN(str); + if (len == 0) return INT2FIX(0); + check_modifiable(ptr); + olen = RSTRING_LEN(ptr->string); + if (ptr->flags & FMODE_APPEND) { + ptr->pos = olen; + } + if (ptr->pos == olen) { + rb_str_cat(ptr->string, RSTRING_PTR(str), len); + } + else { + strio_extend(ptr, ptr->pos, len); + memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len); + OBJ_INFECT(ptr->string, str); + } + OBJ_INFECT(ptr->string, self); + ptr->pos += len; + return LONG2NUM(len); +} + +/* + * call-seq: + * strio << obj -> strio + * + * See IO#<<. + */ +#define strio_addstr rb_io_addstr + +/* + * call-seq: + * strio.print() -> nil + * strio.print(obj, ...) -> nil + * + * See IO#print. + */ +#define strio_print rb_io_print + +/* + * call-seq: + * strio.printf(format_string [, obj, ...] ) -> nil + * + * See IO#printf. + */ +#define strio_printf rb_io_printf + +/* + * call-seq: + * strio.putc(obj) -> obj + * + * See IO#putc. + */ +static VALUE +strio_putc(VALUE self, VALUE ch) +{ + struct StringIO *ptr = writable(StringIO(self)); + int c = NUM2CHR(ch); + long olen; + + check_modifiable(ptr); + olen = RSTRING_LEN(ptr->string); + if (ptr->flags & FMODE_APPEND) { + ptr->pos = olen; + } + strio_extend(ptr, ptr->pos, 1); + RSTRING_PTR(ptr->string)[ptr->pos++] = c; + OBJ_INFECT(ptr->string, self); + return ch; +} + +/* + * call-seq: + * strio.puts(obj, ...) -> nil + * + * See IO#puts. + */ +#define strio_puts rb_io_puts + +/* + * call-seq: + * strio.read([length [, buffer]]) -> string, buffer, or nil + * + * See IO#read. + */ +static VALUE +strio_read(int argc, VALUE *argv, VALUE self) +{ + struct StringIO *ptr = readable(StringIO(self)); + VALUE str = Qnil; + long len; + + switch (argc) { + case 2: + str = argv[1]; + StringValue(str); + rb_str_modify(str); + case 1: + if (!NIL_P(argv[0])) { + len = NUM2LONG(argv[0]); + if (len < 0) { + rb_raise(rb_eArgError, "negative length %ld given", len); + } + if (len > 0 && ptr->pos >= RSTRING_LEN(ptr->string)) { + if (!NIL_P(str)) rb_str_resize(str, 0); + return Qnil; + } + break; + } + /* fall through */ + case 0: + len = RSTRING_LEN(ptr->string); + if (len <= ptr->pos) { + if (NIL_P(str)) { + str = rb_str_new(0, 0); + } + else { + rb_str_resize(str, 0); + } + return str; + } + else { + len -= ptr->pos; + } + break; + default: + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + if (NIL_P(str)) { + str = strio_substr(ptr, ptr->pos, len); + if (argc > 0) rb_enc_associate(str, rb_ascii8bit_encoding()); + } + else { + long rest = RSTRING_LEN(ptr->string) - ptr->pos; + if (len > rest) len = rest; + rb_str_resize(str, len); + MEMCPY(RSTRING_PTR(str), RSTRING_PTR(ptr->string) + ptr->pos, char, len); + } + if (NIL_P(str)) { + str = rb_str_new(0, 0); + len = 0; + } + else { + ptr->pos += len = RSTRING_LEN(str); + } + return str; +} + +/* + * call-seq: + * strio.sysread(integer[, outbuf]) -> string + * + * Similar to #read, but raises +EOFError+ at end of string instead of + * returning +nil+, as well as IO#sysread does. + */ +static VALUE +strio_sysread(int argc, VALUE *argv, VALUE self) +{ + VALUE val = strio_read(argc, argv, self); + if (NIL_P(val)) { + rb_eof_error(); + } + return val; +} + +#define strio_syswrite strio_write + +/* call-seq: strio.path -> nil */ +#define strio_path strio_nil + +/* + * call-seq: + * strio.isatty -> nil + * strio.tty? -> nil + * + */ +#define strio_isatty strio_false + +/* call-seq: strio.pid -> nil */ +#define strio_pid strio_nil + +/* call-seq: strio.fileno -> nil */ +#define strio_fileno strio_nil + +/* + * call-seq: + * strio.size -> integer + * + * Returns the size of the buffer string. + */ +static VALUE +strio_size(VALUE self) +{ + VALUE string = StringIO(self)->string; + if (NIL_P(string)) { + rb_raise(rb_eIOError, "not opened"); + } + return ULONG2NUM(RSTRING_LEN(string)); +} + +/* + * call-seq: + * strio.truncate(integer) -> 0 + * + * Truncates the buffer string to at most _integer_ bytes. The *strio* + * must be opened for writing. + */ +static VALUE +strio_truncate(VALUE self, VALUE len) +{ + VALUE string = writable(StringIO(self))->string; + long l = NUM2LONG(len); + long plen = RSTRING_LEN(string); + if (l < 0) { + error_inval("negative legnth"); + } + rb_str_resize(string, l); + if (plen < l) { + MEMZERO(RSTRING_PTR(string) + plen, char, l - plen); + } + return len; +} + +/* + * call-seq: + * strio.external_encoding => encoding + * + * Returns the Encoding object that represents the encoding of the file. + * If strio is write mode and no encoding is specified, returns nil. + */ + +static VALUE +strio_external_encoding(VALUE self) +{ + return rb_enc_from_encoding(rb_enc_get(StringIO(self)->string)); +} + +/* + * call-seq: + * strio.internal_encoding => encoding + * + * Returns the Encoding of the internal string if conversion is + * specified. Otherwise returns nil. + */ + +static VALUE +strio_internal_encoding(VALUE self) +{ + return Qnil; +} + +/* + * call-seq: + * strio.set_encoding(ext_enc) => strio + * + * Tagged with the encoding specified. + */ + +static VALUE +strio_set_encoding(VALUE self, VALUE ext_enc) +{ + rb_encoding* enc; + VALUE str = StringIO(self)->string; + enc = rb_to_encoding(ext_enc); + rb_enc_associate(str, enc); + return self; +} + +/* + * Pseudo I/O on String object. + */ +void +Init_stringio() +{ + VALUE StringIO = rb_define_class("StringIO", rb_cData); + + rb_include_module(StringIO, rb_mEnumerable); + rb_define_alloc_func(StringIO, strio_s_allocate); + rb_define_singleton_method(StringIO, "open", strio_s_open, -1); + rb_define_method(StringIO, "initialize", strio_initialize, -1); + rb_define_method(StringIO, "initialize_copy", strio_copy, 1); + rb_define_method(StringIO, "reopen", strio_reopen, -1); + + rb_define_method(StringIO, "string", strio_get_string, 0); + rb_define_method(StringIO, "string=", strio_set_string, 1); + rb_define_method(StringIO, "lineno", strio_get_lineno, 0); + rb_define_method(StringIO, "lineno=", strio_set_lineno, 1); + + rb_define_method(StringIO, "binmode", strio_binmode, 0); + rb_define_method(StringIO, "close", strio_close, 0); + rb_define_method(StringIO, "close_read", strio_close_read, 0); + rb_define_method(StringIO, "close_write", strio_close_write, 0); + rb_define_method(StringIO, "closed?", strio_closed, 0); + rb_define_method(StringIO, "closed_read?", strio_closed_read, 0); + rb_define_method(StringIO, "closed_write?", strio_closed_write, 0); + rb_define_method(StringIO, "eof", strio_eof, 0); + rb_define_method(StringIO, "eof?", strio_eof, 0); + rb_define_method(StringIO, "fcntl", strio_fcntl, -1); + rb_define_method(StringIO, "flush", strio_flush, 0); + rb_define_method(StringIO, "fsync", strio_fsync, 0); + rb_define_method(StringIO, "pos", strio_get_pos, 0); + rb_define_method(StringIO, "pos=", strio_set_pos, 1); + rb_define_method(StringIO, "rewind", strio_rewind, 0); + rb_define_method(StringIO, "seek", strio_seek, -1); + rb_define_method(StringIO, "sync", strio_get_sync, 0); + rb_define_method(StringIO, "sync=", strio_set_sync, 1); + rb_define_method(StringIO, "tell", strio_tell, 0); + rb_define_method(StringIO, "path", strio_path, 0); + + rb_define_method(StringIO, "each", strio_each, -1); + rb_define_method(StringIO, "each_line", strio_each, -1); + rb_define_method(StringIO, "lines", strio_each, -1); + rb_define_method(StringIO, "each_byte", strio_each_byte, 0); + rb_define_method(StringIO, "bytes", strio_each_byte, 0); + rb_define_method(StringIO, "each_char", strio_each_char, 0); + rb_define_method(StringIO, "chars", strio_each_char, 0); + rb_define_method(StringIO, "getc", strio_getc, 0); + rb_define_method(StringIO, "ungetc", strio_ungetc, 1); + rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1); + rb_define_method(StringIO, "readchar", strio_readchar, 0); + rb_define_method(StringIO, "getbyte", strio_getbyte, 0); + rb_define_method(StringIO, "readbyte", strio_readbyte, 0); + rb_define_method(StringIO, "gets", strio_gets, -1); + rb_define_method(StringIO, "readline", strio_readline, -1); + rb_define_method(StringIO, "readlines", strio_readlines, -1); + rb_define_method(StringIO, "read", strio_read, -1); + rb_define_method(StringIO, "sysread", strio_sysread, -1); + rb_define_method(StringIO, "readpartial", strio_sysread, -1); + + rb_define_method(StringIO, "write", strio_write, 1); + rb_define_method(StringIO, "<<", strio_addstr, 1); + rb_define_method(StringIO, "print", strio_print, -1); + rb_define_method(StringIO, "printf", strio_printf, -1); + rb_define_method(StringIO, "putc", strio_putc, 1); + rb_define_method(StringIO, "puts", strio_puts, -1); + rb_define_method(StringIO, "syswrite", strio_syswrite, 1); + + rb_define_method(StringIO, "isatty", strio_isatty, 0); + rb_define_method(StringIO, "tty?", strio_isatty, 0); + rb_define_method(StringIO, "pid", strio_pid, 0); + rb_define_method(StringIO, "fileno", strio_fileno, 0); + rb_define_method(StringIO, "size", strio_size, 0); + rb_define_method(StringIO, "length", strio_size, 0); + rb_define_method(StringIO, "truncate", strio_truncate, 1); + + rb_define_method(StringIO, "external_encoding", strio_external_encoding, 0); + rb_define_method(StringIO, "internal_encoding", strio_internal_encoding, 0); + rb_define_method(StringIO, "set_encoding", strio_set_encoding, 1); +} diff --git a/ext/strscan/depend b/ext/strscan/depend new file mode 100644 index 0000000..76f6e0b --- /dev/null +++ b/ext/strscan/depend @@ -0,0 +1,2 @@ +strscan.o: strscan.c $(hdrdir)/ruby.h $(hdrdir)/re.h $(hdrdir)/regex.h \ + $(hdrdir)/oniguruma.h $(topdir)/config.h $(hdrdir)/defines.h diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb new file mode 100644 index 0000000..0d21966 --- /dev/null +++ b/ext/strscan/extconf.rb @@ -0,0 +1,2 @@ +require 'mkmf' +create_makefile 'strscan' diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c new file mode 100644 index 0000000..f44f265 --- /dev/null +++ b/ext/strscan/strscan.c @@ -0,0 +1,1329 @@ +/* + $Id: strscan.c 25546 2009-10-28 16:11:20Z yugui $ + + Copyright (c) 1999-2006 Minero Aoki + + This program is free software. + You can distribute/modify this program under the terms of + the Ruby License. For details, see the file COPYING. +*/ + +#include "ruby/ruby.h" +#include "ruby/re.h" +#include "ruby/encoding.h" + +#define STRSCAN_VERSION "0.7.0" + +/* ======================================================================= + Data Type Definitions + ======================================================================= */ + +static VALUE StringScanner; +static VALUE ScanError; + +struct strscanner +{ + /* multi-purpose flags */ + unsigned long flags; +#define FLAG_MATCHED (1 << 0) + + /* the string to scan */ + VALUE str; + + /* scan pointers */ + long prev; /* legal only when MATCHED_P(s) */ + long curr; /* always legal */ + + /* the regexp register; legal only when MATCHED_P(s) */ + struct re_registers regs; +}; + +#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) +#define MATCHED(s) (s)->flags |= FLAG_MATCHED +#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED + +#define S_PBEG(s) (RSTRING_PTR((s)->str)) +#define S_LEN(s) (RSTRING_LEN((s)->str)) +#define S_PEND(s) (S_PBEG(s) + S_LEN(s)) +#define CURPTR(s) (S_PBEG(s) + (s)->curr) +#define S_RESTLEN(s) (S_LEN(s) - (s)->curr) + +#define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str)) + +#define GET_SCANNER(obj,var) do {\ + Data_Get_Struct(obj, struct strscanner, var);\ + if (NIL_P(var->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\ +} while (0) + +/* ======================================================================= + Function Prototypes + ======================================================================= */ + +static VALUE infect _((VALUE str, struct strscanner *p)); +static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i)); +static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len)); + +void check_strscan _((VALUE obj)); +static void strscan_mark _((struct strscanner *p)); +static void strscan_free _((struct strscanner *p)); +static VALUE strscan_s_allocate _((VALUE klass)); +static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self)); +static VALUE strscan_init_copy _((VALUE vself, VALUE vorig)); + +static VALUE strscan_s_mustc _((VALUE self)); +static VALUE strscan_terminate _((VALUE self)); +static VALUE strscan_clear _((VALUE self)); +static VALUE strscan_get_string _((VALUE self)); +static VALUE strscan_set_string _((VALUE self, VALUE str)); +static VALUE strscan_concat _((VALUE self, VALUE str)); +static VALUE strscan_get_pos _((VALUE self)); +static VALUE strscan_set_pos _((VALUE self, VALUE pos)); +static VALUE strscan_do_scan _((VALUE self, VALUE regex, + int succptr, int getstr, int headonly)); +static VALUE strscan_scan _((VALUE self, VALUE re)); +static VALUE strscan_match_p _((VALUE self, VALUE re)); +static VALUE strscan_skip _((VALUE self, VALUE re)); +static VALUE strscan_check _((VALUE self, VALUE re)); +static VALUE strscan_scan_full _((VALUE self, VALUE re, + VALUE succp, VALUE getp)); +static VALUE strscan_scan_until _((VALUE self, VALUE re)); +static VALUE strscan_skip_until _((VALUE self, VALUE re)); +static VALUE strscan_check_until _((VALUE self, VALUE re)); +static VALUE strscan_search_full _((VALUE self, VALUE re, + VALUE succp, VALUE getp)); +static void adjust_registers_to_matched _((struct strscanner *p)); +static VALUE strscan_getch _((VALUE self)); +static VALUE strscan_get_byte _((VALUE self)); +static VALUE strscan_getbyte _((VALUE self)); +static VALUE strscan_peek _((VALUE self, VALUE len)); +static VALUE strscan_peep _((VALUE self, VALUE len)); +static VALUE strscan_unscan _((VALUE self)); +static VALUE strscan_bol_p _((VALUE self)); +static VALUE strscan_eos_p _((VALUE self)); +static VALUE strscan_empty_p _((VALUE self)); +static VALUE strscan_rest_p _((VALUE self)); +static VALUE strscan_matched_p _((VALUE self)); +static VALUE strscan_matched _((VALUE self)); +static VALUE strscan_matched_size _((VALUE self)); +static VALUE strscan_aref _((VALUE self, VALUE idx)); +static VALUE strscan_pre_match _((VALUE self)); +static VALUE strscan_post_match _((VALUE self)); +static VALUE strscan_rest _((VALUE self)); +static VALUE strscan_rest_size _((VALUE self)); + +static VALUE strscan_inspect _((VALUE self)); +static VALUE inspect1 _((struct strscanner *p)); +static VALUE inspect2 _((struct strscanner *p)); + +/* ======================================================================= + Utils + ======================================================================= */ + +static VALUE +infect(VALUE str, struct strscanner *p) +{ + OBJ_INFECT(str, p->str); + return str; +} + +static VALUE +str_new(struct strscanner *p, const char *ptr, long len) +{ + VALUE str = rb_str_new(ptr, len); + rb_enc_copy(str, p->str); + return str; +} + +static VALUE +extract_range(struct strscanner *p, long beg_i, long end_i) +{ + if (beg_i > S_LEN(p)) return Qnil; + if (end_i > S_LEN(p)) + end_i = S_LEN(p); + return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p); +} + +static VALUE +extract_beg_len(struct strscanner *p, long beg_i, long len) +{ + if (beg_i > S_LEN(p)) return Qnil; + if (beg_i + len > S_LEN(p)) + len = S_LEN(p) - beg_i; + return infect(str_new(p, S_PBEG(p) + beg_i, len), p); +} + +/* ======================================================================= + Constructor + ======================================================================= */ + +static void +strscan_mark(struct strscanner *p) +{ + rb_gc_mark(p->str); +} + +static void +strscan_free(struct strscanner *p) +{ + onig_region_free(&(p->regs), 0); + ruby_xfree(p); +} + +static VALUE +strscan_s_allocate(VALUE klass) +{ + struct strscanner *p; + + p = ALLOC(struct strscanner); + MEMZERO(p, struct strscanner, 1); + CLEAR_MATCH_STATUS(p); + onig_region_init(&(p->regs)); + p->str = Qnil; + return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p); +} + +/* + * call-seq: StringScanner.new(string, dup = false) + * + * Creates a new StringScanner object to scan over the given +string+. + * +dup+ argument is obsolete and not used now. + */ +static VALUE +strscan_initialize(int argc, VALUE *argv, VALUE self) +{ + struct strscanner *p; + VALUE str, need_dup; + + Data_Get_Struct(self, struct strscanner, p); + rb_scan_args(argc, argv, "11", &str, &need_dup); + StringValue(str); + p->str = str; + + return self; +} + +void +check_strscan(VALUE obj) +{ + if (TYPE(obj) != T_DATA || RDATA(obj)->dmark != (RUBY_DATA_FUNC)strscan_mark) { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected StringScanner)", + rb_obj_classname(obj)); + } +} + +/* + * call-seq: + * dup + * clone + * + * Duplicates a StringScanner object. + */ +static VALUE +strscan_init_copy(VALUE vself, VALUE vorig) +{ + struct strscanner *self, *orig; + + Data_Get_Struct(vself, struct strscanner, self); + check_strscan(vorig); + Data_Get_Struct(vorig, struct strscanner, orig); + if (self != orig) { + self->flags = orig->flags; + self->str = orig->str; + self->prev = orig->prev; + self->curr = orig->curr; + onig_region_copy(&self->regs, &orig->regs); + } + + return vself; +} + +/* ======================================================================= + Instance Methods + ======================================================================= */ + +/* + * call-seq: StringScanner.must_C_version + * + * This method is defined for backward compatibility. + */ +static VALUE +strscan_s_mustc(VALUE self) +{ + return self; +} + +/* + * Reset the scan pointer (index 0) and clear matching data. + */ +static VALUE +strscan_reset(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + p->curr = 0; + CLEAR_MATCH_STATUS(p); + return self; +} + +/* + * call-seq: + * terminate + * clear + * + * Set the scan pointer to the end of the string and clear matching data. + */ +static VALUE +strscan_terminate(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + p->curr = S_LEN(p); + CLEAR_MATCH_STATUS(p); + return self; +} + +/* + * Equivalent to #terminate. + * This method is obsolete; use #terminate instead. + */ +static VALUE +strscan_clear(VALUE self) +{ + rb_warning("StringScanner#clear is obsolete; use #terminate instead"); + return strscan_terminate(self); +} + +/* + * Returns the string being scanned. + */ +static VALUE +strscan_get_string(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + return p->str; +} + +/* + * call-seq: string=(str) + * + * Changes the string being scanned to +str+ and resets the scanner. + * Returns +str+. + */ +static VALUE +strscan_set_string(VALUE self, VALUE str) +{ + struct strscanner *p; + + Data_Get_Struct(self, struct strscanner, p); + StringValue(str); + p->str = str; + p->curr = 0; + CLEAR_MATCH_STATUS(p); + return str; +} + +/* + * call-seq: + * concat(str) + * <<(str) + * + * Appends +str+ to the string being scanned. + * This method does not affect scan pointer. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan(/Fri /) + * s << " +1000 GMT" + * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT" + * s.scan(/Dec/) # -> "Dec" + */ +static VALUE +strscan_concat(VALUE self, VALUE str) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + StringValue(str); + rb_str_append(p->str, str); + return self; +} + +/* + * Returns the position of the scan pointer. In the 'reset' position, this + * value is zero. In the 'terminated' position (i.e. the string is exhausted), + * this value is the length of the string. + * + * In short, it's a 0-based index into the string. + * + * s = StringScanner.new('test string') + * s.pos # -> 0 + * s.scan_until /str/ # -> "test str" + * s.pos # -> 8 + * s.terminate # -> # + * s.pos # -> 11 + */ +static VALUE +strscan_get_pos(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + return INT2FIX(p->curr); +} + +/* + * call-seq: pos=(n) + * + * Modify the scan pointer. + * + * s = StringScanner.new('test string') + * s.pos = 7 # -> 7 + * s.rest # -> "ring" + */ +static VALUE +strscan_set_pos(VALUE self, VALUE v) +{ + struct strscanner *p; + long i; + + GET_SCANNER(self, p); + i = NUM2INT(v); + if (i < 0) i += S_LEN(p); + if (i < 0) rb_raise(rb_eRangeError, "index out of range"); + if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range"); + p->curr = i; + return INT2NUM(i); +} + +static VALUE +strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) +{ + regex_t *rb_reg_prepare_re(VALUE re, VALUE str); + struct strscanner *p; + regex_t *re; + int ret; + int tmpreg; + + Check_Type(regex, T_REGEXP); + GET_SCANNER(self, p); + + CLEAR_MATCH_STATUS(p); + if (S_RESTLEN(p) < 0) { + return Qnil; + } + re = rb_reg_prepare_re(regex, p->str); + tmpreg = re != RREGEXP(regex)->ptr; + if (!tmpreg) RREGEXP(regex)->usecnt++; + + if (headonly) { + ret = onig_match(re, (UChar* )CURPTR(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE); + } + else { + ret = onig_search(re, + (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), + &(p->regs), ONIG_OPTION_NONE); + } + if (!tmpreg) RREGEXP(regex)->usecnt--; + if (tmpreg) { + if (RREGEXP(regex)->usecnt) { + onig_free(re); + } + else { + onig_free(RREGEXP(regex)->ptr); + RREGEXP(regex)->ptr = re; + } + } + + if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); + if (ret < 0) { + /* not matched */ + return Qnil; + } + + MATCHED(p); + p->prev = p->curr; + if (succptr) { + p->curr += p->regs.end[0]; + } + if (getstr) { + return extract_beg_len(p, p->prev, p->regs.end[0]); + } + else { + return INT2FIX(p->regs.end[0]); + } +} + +/* + * call-seq: scan(pattern) => String + * + * Tries to match with +pattern+ at the current position. If there's a match, + * the scanner advances the "scan pointer" and returns the matched string. + * Otherwise, the scanner returns +nil+. + * + * s = StringScanner.new('test string') + * p s.scan(/\w+/) # -> "test" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "string" + * p s.scan(/./) # -> nil + * + */ +static VALUE +strscan_scan(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 1, 1, 1); +} + +/* + * call-seq: match?(pattern) + * + * Tests whether the given +pattern+ is matched from the current scan pointer. + * Returns the length of the match, or +nil+. The scan pointer is not advanced. + * + * s = StringScanner.new('test string') + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\w+/) # -> 4 + * p s.match?(/\s+/) # -> nil + */ +static VALUE +strscan_match_p(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 0, 0, 1); +} + +/* + * call-seq: skip(pattern) + * + * Attempts to skip over the given +pattern+ beginning with the scan pointer. + * If it matches, the scan pointer is advanced to the end of the match, and the + * length of the match is returned. Otherwise, +nil+ is returned. + * + * It's similar to #scan, but without returning the matched string. + * + * s = StringScanner.new('test string') + * p s.skip(/\w+/) # -> 4 + * p s.skip(/\w+/) # -> nil + * p s.skip(/\s+/) # -> 1 + * p s.skip(/\w+/) # -> 6 + * p s.skip(/./) # -> nil + * + */ +static VALUE +strscan_skip(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 1, 0, 1); +} + +/* + * call-seq: check(pattern) + * + * This returns the value that #scan would return, without advancing the scan + * pointer. The match register is affected, though. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.check /Fri/ # -> "Fri" + * s.pos # -> 0 + * s.matched # -> "Fri" + * s.check /12/ # -> nil + * s.matched # -> nil + * + * Mnemonic: it "checks" to see whether a #scan will return a value. + */ +static VALUE +strscan_check(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 0, 1, 1); +} + +/* + * call-seq: scan_full(pattern, advance_pointer_p, return_string_p) + * + * Tests whether the given +pattern+ is matched from the current scan pointer. + * Advances the scan pointer if +advance_pointer_p+ is true. + * Returns the matched string if +return_string_p+ is true. + * The match register is affected. + * + * "full" means "#scan with full parameters". + */ +static VALUE +strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f) +{ + return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1); +} + +/* + * call-seq: scan_until(pattern) + * + * Scans the string _until_ the +pattern+ is matched. Returns the substring up + * to and including the end of the match, advancing the scan pointer to that + * location. If there is no match, +nil+ is returned. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan_until(/1/) # -> "Fri Dec 1" + * s.pre_match # -> "Fri Dec " + * s.scan_until(/XYZ/) # -> nil + */ +static VALUE +strscan_scan_until(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 1, 1, 0); +} + +/* + * call-seq: exist?(pattern) + * + * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string, + * without advancing the scan pointer. This predicates whether a #scan_until + * will return a value. + * + * s = StringScanner.new('test string') + * s.exist? /s/ # -> 3 + * s.scan /test/ # -> "test" + * s.exist? /s/ # -> 2 + * s.exist? /e/ # -> nil + */ +static VALUE +strscan_exist_p(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 0, 0, 0); +} + +/* + * call-seq: skip_until(pattern) + * + * Advances the scan pointer until +pattern+ is matched and consumed. Returns + * the number of bytes advanced, or +nil+ if no match was found. + * + * Look ahead to match +pattern+, and advance the scan pointer to the _end_ + * of the match. Return the number of characters advanced, or +nil+ if the + * match was unsuccessful. + * + * It's similar to #scan_until, but without returning the intervening string. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.skip_until /12/ # -> 10 + * s # + */ +static VALUE +strscan_skip_until(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 1, 0, 0); +} + +/* + * call-seq: check_until(pattern) + * + * This returns the value that #scan_until would return, without advancing the + * scan pointer. The match register is affected, though. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.check_until /12/ # -> "Fri Dec 12" + * s.pos # -> 0 + * s.matched # -> 12 + * + * Mnemonic: it "checks" to see whether a #scan_until will return a value. + */ +static VALUE +strscan_check_until(VALUE self, VALUE re) +{ + return strscan_do_scan(self, re, 0, 1, 0); +} + +/* + * call-seq: search_full(pattern, advance_pointer_p, return_string_p) + * + * Scans the string _until_ the +pattern+ is matched. + * Advances the scan pointer if +advance_pointer_p+, otherwise not. + * Returns the matched string if +return_string_p+ is true, otherwise + * returns the number of bytes advanced. + * This method does affect the match register. + */ +static VALUE +strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f) +{ + return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0); +} + +static void +adjust_registers_to_matched(struct strscanner *p) +{ + onig_region_clear(&(p->regs)); + onig_region_set(&(p->regs), 0, 0, p->curr - p->prev); +} + +/* + * Scans one character and returns it. + * This method is multibyte character sensitive. + * + * s = StringScanner.new("ab") + * s.getch # => "a" + * s.getch # => "b" + * s.getch # => nil + * + * $KCODE = 'EUC' + * s = StringScanner.new("\244\242") + * s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP + * s.getch # => nil + */ +static VALUE +strscan_getch(VALUE self) +{ + struct strscanner *p; + long len; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + if (EOS_P(p)) + return Qnil; + + len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str)); + if (p->curr + len > S_LEN(p)) { + len = S_LEN(p) - p->curr; + } + p->prev = p->curr; + p->curr += len; + MATCHED(p); + adjust_registers_to_matched(p); + return extract_range(p, p->prev + p->regs.beg[0], + p->prev + p->regs.end[0]); +} + +/* + * Scans one byte and returns it. + * This method is not multibyte character sensitive. + * See also: #getch. + * + * s = StringScanner.new('ab') + * s.get_byte # => "a" + * s.get_byte # => "b" + * s.get_byte # => nil + * + * $KCODE = 'EUC' + * s = StringScanner.new("\244\242") + * s.get_byte # => "\244" + * s.get_byte # => "\242" + * s.get_byte # => nil + */ +static VALUE +strscan_get_byte(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + if (EOS_P(p)) + return Qnil; + + p->prev = p->curr; + p->curr++; + MATCHED(p); + adjust_registers_to_matched(p); + return extract_range(p, p->prev + p->regs.beg[0], + p->prev + p->regs.end[0]); +} + +/* + * Equivalent to #get_byte. + * This method is obsolete; use #get_byte instead. + */ +static VALUE +strscan_getbyte(VALUE self) +{ + rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead"); + return strscan_get_byte(self); +} + +/* + * call-seq: peek(len) + * + * Extracts a string corresponding to string[pos,len], without + * advancing the scan pointer. + * + * s = StringScanner.new('test string') + * s.peek(7) # => "test st" + * s.peek(7) # => "test st" + * + */ +static VALUE +strscan_peek(VALUE self, VALUE vlen) +{ + struct strscanner *p; + long len; + + GET_SCANNER(self, p); + + len = NUM2LONG(vlen); + if (EOS_P(p)) + return infect(str_new(p, "", 0), p); + + if (p->curr + len > S_LEN(p)) + len = S_LEN(p) - p->curr; + return extract_beg_len(p, p->curr, len); +} + +/* + * Equivalent to #peek. + * This method is obsolete; use #peek instead. + */ +static VALUE +strscan_peep(VALUE self, VALUE vlen) +{ + rb_warning("StringScanner#peep is obsolete; use #peek instead"); + return strscan_peek(self, vlen); +} + +/* + * Set the scan pointer to the previous position. Only one previous position is + * remembered, and it changes with each scanning operation. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # => "test" + * s.unscan + * s.scan(/../) # => "te" + * s.scan(/\d/) # => nil + * s.unscan # ScanError: unscan failed: previous match record not exist + */ +static VALUE +strscan_unscan(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) + rb_raise(ScanError, "unscan failed: previous match record not exist"); + p->curr = p->prev; + CLEAR_MATCH_STATUS(p); + return self; +} + +/* + * Returns +true+ iff the scan pointer is at the beginning of the line. + * + * s = StringScanner.new("test\ntest\n") + * s.bol? # => true + * s.scan(/te/) + * s.bol? # => false + * s.scan(/st\n/) + * s.bol? # => true + * s.terminate + * s.bol? # => true + */ +static VALUE +strscan_bol_p(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (CURPTR(p) > S_PEND(p)) return Qnil; + if (p->curr == 0) return Qtrue; + return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse; +} + +/* + * Returns +true+ if the scan pointer is at the end of the string. + * + * s = StringScanner.new('test string') + * p s.eos? # => false + * s.scan(/test/) + * p s.eos? # => false + * s.terminate + * p s.eos? # => true + */ +static VALUE +strscan_eos_p(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + return EOS_P(p) ? Qtrue : Qfalse; +} + +/* + * Equivalent to #eos?. + * This method is obsolete, use #eos? instead. + */ +static VALUE +strscan_empty_p(VALUE self) +{ + rb_warning("StringScanner#empty? is obsolete; use #eos? instead"); + return strscan_eos_p(self); +} + +/* + * Returns true iff there is more data in the string. See #eos?. + * This method is obsolete; use #eos? instead. + * + * s = StringScanner.new('test string') + * s.eos? # These two + * s.rest? # are opposites. + */ +static VALUE +strscan_rest_p(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + return EOS_P(p) ? Qfalse : Qtrue; +} + +/* + * Returns +true+ iff the last match was successful. + * + * s = StringScanner.new('test string') + * s.match?(/\w+/) # => 4 + * s.matched? # => true + * s.match?(/\d+/) # => nil + * s.matched? # => false + */ +static VALUE +strscan_matched_p(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + return MATCHED_P(p) ? Qtrue : Qfalse; +} + +/* + * Returns the last matched string. + * + * s = StringScanner.new('test string') + * s.match?(/\w+/) # -> 4 + * s.matched # -> "test" + */ +static VALUE +strscan_matched(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + return extract_range(p, p->prev + p->regs.beg[0], + p->prev + p->regs.end[0]); +} + +/* + * Returns the size of the most recent match (see #matched), or +nil+ if there + * was no recent match. + * + * s = StringScanner.new('test string') + * s.check /\w+/ # -> "test" + * s.matched_size # -> 4 + * s.check /\d+/ # -> nil + * s.matched_size # -> nil + */ +static VALUE +strscan_matched_size(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + return INT2NUM(p->regs.end[0] - p->regs.beg[0]); +} + +/* + * Equivalent to #matched_size. + * This method is obsolete; use #matched_size instead. + */ +static VALUE +strscan_matchedsize(VALUE self) +{ + rb_warning("StringScanner#matchedsize is obsolete; use #matched_size instead"); + return strscan_matched_size(self); +} + +/* + * call-seq: [](n) + * + * Return the n-th subgroup in the most recent match. + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 " + * s[0] # -> "Fri Dec 12 " + * s[1] # -> "Fri" + * s[2] # -> "Dec" + * s[3] # -> "12" + * s.post_match # -> "1975 14:39" + * s.pre_match # -> "" + */ +static VALUE +strscan_aref(VALUE self, VALUE idx) +{ + struct strscanner *p; + long i; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + + i = NUM2LONG(idx); + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return Qnil; + if (i >= p->regs.num_regs) return Qnil; + if (p->regs.beg[i] == -1) return Qnil; + + return extract_range(p, p->prev + p->regs.beg[i], + p->prev + p->regs.end[i]); +} + +/* + * Return the pre-match (in the regular expression sense) of the last scan. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.scan(/\s+/) # -> " " + * s.pre_match # -> "test" + * s.post_match # -> "string" + */ +static VALUE +strscan_pre_match(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + return extract_range(p, 0, p->prev + p->regs.beg[0]); +} + +/* + * Return the post-match (in the regular expression sense) of the last scan. + * + * s = StringScanner.new('test string') + * s.scan(/\w+/) # -> "test" + * s.scan(/\s+/) # -> " " + * s.pre_match # -> "test" + * s.post_match # -> "string" + */ +static VALUE +strscan_post_match(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + return extract_range(p, p->prev + p->regs.end[0], S_LEN(p)); +} + +/* + * Returns the "rest" of the string (i.e. everything after the scan pointer). + * If there is no more data (eos? = true), it returns "". + */ +static VALUE +strscan_rest(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (EOS_P(p)) { + return infect(str_new(p, "", 0), p); + } + return extract_range(p, p->curr, S_LEN(p)); +} + +/* + * s.rest_size is equivalent to s.rest.size. + */ +static VALUE +strscan_rest_size(VALUE self) +{ + struct strscanner *p; + long i; + + GET_SCANNER(self, p); + if (EOS_P(p)) { + return INT2FIX(0); + } + i = S_LEN(p) - p->curr; + return INT2FIX(i); +} + +/* + * s.restsize is equivalent to s.rest_size. + * This method is obsolete; use #rest_size instead. + */ +static VALUE +strscan_restsize(VALUE self) +{ + rb_warning("StringScanner#restsize is obsolete; use #rest_size instead"); + return strscan_rest_size(self); +} + +#define INSPECT_LENGTH 5 +#define BUFSIZE 256 + +/* + * Returns a string that represents the StringScanner object, showing: + * - the current position + * - the size of the string + * - the characters surrounding the scan pointer + * + * s = StringScanner.new("Fri Dec 12 1975 14:39") + * s.inspect # -> '#' + * s.scan_until /12/ # -> "Fri Dec 12" + * s.inspect # -> '#' + */ +static VALUE +strscan_inspect(VALUE self) +{ + struct strscanner *p; + char buf[BUFSIZE]; + long len; + VALUE a, b; + + Data_Get_Struct(self, struct strscanner, p); + if (NIL_P(p->str)) { + len = snprintf(buf, BUFSIZE, "#<%s (uninitialized)>", + rb_class2name(CLASS_OF(self))); + return infect(rb_str_new(buf, len), p); + } + if (EOS_P(p)) { + len = snprintf(buf, BUFSIZE, "#<%s fin>", + rb_class2name(CLASS_OF(self))); + return infect(rb_str_new(buf, len), p); + } + if (p->curr == 0) { + b = inspect2(p); + len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld @ %s>", + rb_class2name(CLASS_OF(self)), + p->curr, S_LEN(p), + RSTRING_PTR(b)); + return infect(rb_str_new(buf, len), p); + } + a = inspect1(p); + b = inspect2(p); + len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld %s @ %s>", + rb_class2name(CLASS_OF(self)), + p->curr, S_LEN(p), + RSTRING_PTR(a), + RSTRING_PTR(b)); + return infect(rb_str_new(buf, len), p); +} + +static VALUE +inspect1(struct strscanner *p) +{ + char buf[BUFSIZE]; + char *bp = buf; + long len; + + if (p->curr == 0) return rb_str_new2(""); + if (p->curr > INSPECT_LENGTH) { + strcpy(bp, "..."); bp += 3; + len = INSPECT_LENGTH; + } + else { + len = p->curr; + } + memcpy(bp, CURPTR(p) - len, len); bp += len; + return rb_str_dump(rb_str_new(buf, bp - buf)); +} + +static VALUE +inspect2(struct strscanner *p) +{ + char buf[BUFSIZE]; + char *bp = buf; + long len; + + if (EOS_P(p)) return rb_str_new2(""); + len = S_LEN(p) - p->curr; + if (len > INSPECT_LENGTH) { + len = INSPECT_LENGTH; + memcpy(bp, CURPTR(p), len); bp += len; + strcpy(bp, "..."); bp += 3; + } + else { + memcpy(bp, CURPTR(p), len); bp += len; + } + return rb_str_dump(rb_str_new(buf, bp - buf)); +} + +/* ======================================================================= + Ruby Interface + ======================================================================= */ + +/* + * Document-class: StringScanner + * + * StringScanner provides for lexical scanning operations on a String. Here is + * an example of its usage: + * + * s = StringScanner.new('This is an example string') + * s.eos? # -> false + * + * p s.scan(/\w+/) # -> "This" + * p s.scan(/\w+/) # -> nil + * p s.scan(/\s+/) # -> " " + * p s.scan(/\s+/) # -> nil + * p s.scan(/\w+/) # -> "is" + * s.eos? # -> false + * + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "an" + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "example" + * p s.scan(/\s+/) # -> " " + * p s.scan(/\w+/) # -> "string" + * s.eos? # -> true + * + * p s.scan(/\s+/) # -> nil + * p s.scan(/\w+/) # -> nil + * + * Scanning a string means remembering the position of a scan pointer, + * which is just an index. The point of scanning is to move forward a bit at + * a time, so matches are sought after the scan pointer; usually immediately + * after it. + * + * Given the string "test string", here are the pertinent scan pointer + * positions: + * + * t e s t s t r i n g + * 0 1 2 ... 1 + * 0 + * + * When you #scan for a pattern (a regular expression), the match must occur + * at the character after the scan pointer. If you use #scan_until, then the + * match can occur anywhere after the scan pointer. In both cases, the scan + * pointer moves just beyond the last character of the match, ready to + * scan again from the next character onwards. This is demonstrated by the + * example above. + * + * == Method Categories + * + * There are other methods besides the plain scanners. You can look ahead in + * the string without actually scanning. You can access the most recent match. + * You can modify the string being scanned, reset or terminate the scanner, + * find out or change the position of the scan pointer, skip ahead, and so on. + * + * === Advancing the Scan Pointer + * + * - #getch + * - #get_byte + * - #scan + * - #scan_until + * - #skip + * - #skip_until + * + * === Looking Ahead + * + * - #check + * - #check_until + * - #exist? + * - #match? + * - #peek + * + * === Finding Where we Are + * + * - #beginning_of_line? (#bol?) + * - #eos? + * - #rest? + * - #rest_size + * - #pos + * + * === Setting Where we Are + * + * - #reset + * - #terminate + * - #pos= + * + * === Match Data + * + * - #matched + * - #matched? + * - #matched_size + * - [] + * - #pre_match + * - #post_match + * + * === Miscellaneous + * + * - << + * - #concat + * - #string + * - #string= + * - #unscan + * + * There are aliases to several of the methods. + */ +void +Init_strscan() +{ + ID id_scanerr = rb_intern("ScanError"); + VALUE tmp; + + StringScanner = rb_define_class("StringScanner", rb_cObject); + ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError); + if (!rb_const_defined(rb_cObject, id_scanerr)) { + rb_const_set(rb_cObject, id_scanerr, ScanError); + } + tmp = rb_str_new2(STRSCAN_VERSION); + rb_obj_freeze(tmp); + rb_const_set(StringScanner, rb_intern("Version"), tmp); + tmp = rb_str_new2("$Id: strscan.c 25546 2009-10-28 16:11:20Z yugui $"); + rb_obj_freeze(tmp); + rb_const_set(StringScanner, rb_intern("Id"), tmp); + + rb_define_alloc_func(StringScanner, strscan_s_allocate); + rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1); + rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1); + rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0); + rb_define_method(StringScanner, "reset", strscan_reset, 0); + rb_define_method(StringScanner, "terminate", strscan_terminate, 0); + rb_define_method(StringScanner, "clear", strscan_clear, 0); + rb_define_method(StringScanner, "string", strscan_get_string, 0); + rb_define_method(StringScanner, "string=", strscan_set_string, 1); + rb_define_method(StringScanner, "concat", strscan_concat, 1); + rb_define_method(StringScanner, "<<", strscan_concat, 1); + rb_define_method(StringScanner, "pos", strscan_get_pos, 0); + rb_define_method(StringScanner, "pos=", strscan_set_pos, 1); + rb_define_method(StringScanner, "pointer", strscan_get_pos, 0); + rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1); + + rb_define_method(StringScanner, "scan", strscan_scan, 1); + rb_define_method(StringScanner, "skip", strscan_skip, 1); + rb_define_method(StringScanner, "match?", strscan_match_p, 1); + rb_define_method(StringScanner, "check", strscan_check, 1); + rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3); + + rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1); + rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1); + rb_define_method(StringScanner, "exist?", strscan_exist_p, 1); + rb_define_method(StringScanner, "check_until", strscan_check_until, 1); + rb_define_method(StringScanner, "search_full", strscan_search_full, 3); + + rb_define_method(StringScanner, "getch", strscan_getch, 0); + rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); + rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); + rb_define_method(StringScanner, "peek", strscan_peek, 1); + rb_define_method(StringScanner, "peep", strscan_peep, 1); + + rb_define_method(StringScanner, "unscan", strscan_unscan, 0); + + rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); + rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?")); + rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); + rb_define_method(StringScanner, "empty?", strscan_empty_p, 0); + rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); + + rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); + rb_define_method(StringScanner, "matched", strscan_matched, 0); + rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); + rb_define_method(StringScanner, "matchedsize", strscan_matchedsize, 0); + rb_define_method(StringScanner, "[]", strscan_aref, 1); + rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); + rb_define_method(StringScanner, "post_match", strscan_post_match, 0); + + rb_define_method(StringScanner, "rest", strscan_rest, 0); + rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); + rb_define_method(StringScanner, "restsize", strscan_restsize, 0); + + rb_define_method(StringScanner, "inspect", strscan_inspect, 0); +} diff --git a/ext/syslog/depend b/ext/syslog/depend new file mode 100644 index 0000000..45cbea2 --- /dev/null +++ b/ext/syslog/depend @@ -0,0 +1,2 @@ +syslog.o: syslog.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h \ + $(hdrdir)/intern.h diff --git a/ext/syslog/extconf.rb b/ext/syslog/extconf.rb new file mode 100644 index 0000000..c354a77 --- /dev/null +++ b/ext/syslog/extconf.rb @@ -0,0 +1,10 @@ +# $RoughId: extconf.rb,v 1.3 2001/11/24 17:49:26 knu Exp $ +# $Id: extconf.rb 11708 2007-02-12 23:01:19Z shyouhei $ + +require 'mkmf' + +have_header("syslog.h") && + have_func("openlog") && + have_func("setlogmask") && + create_makefile("syslog") + diff --git a/ext/syslog/syslog.c b/ext/syslog/syslog.c new file mode 100644 index 0000000..8f4bf7b --- /dev/null +++ b/ext/syslog/syslog.c @@ -0,0 +1,393 @@ +/* + * UNIX Syslog extension for Ruby + * Amos Gouaux, University of Texas at Dallas + * + * + * $RoughId: syslog.c,v 1.21 2002/02/25 12:21:17 knu Exp $ + * $Id: syslog.c 18530 2008-08-12 09:35:12Z knu $ + */ + +#include "ruby/ruby.h" +#include "ruby/util.h" +#include + +/* Syslog class */ +static VALUE mSyslog, mSyslogConstants; +static const char *syslog_ident = NULL; +static int syslog_options = -1, syslog_facility = -1, syslog_mask = -1; +static int syslog_opened = 0; + +/* Package helper routines */ +static void syslog_write(int pri, int argc, VALUE *argv) +{ + VALUE str; + + rb_secure(4); + if (argc < 1) { + rb_raise(rb_eArgError, "no log message supplied"); + } + + if (!syslog_opened) { + rb_raise(rb_eRuntimeError, "must open syslog before write"); + } + + str = rb_f_sprintf(argc, argv); + + syslog(pri, "%s", RSTRING_PTR(str)); +} + +/* Syslog module methods */ +static VALUE mSyslog_close(VALUE self) +{ + rb_secure(4); + if (!syslog_opened) { + rb_raise(rb_eRuntimeError, "syslog not opened"); + } + + closelog(); + + free((void *)syslog_ident); + syslog_ident = NULL; + syslog_options = syslog_facility = syslog_mask = -1; + syslog_opened = 0; + + return Qnil; +} + +static VALUE mSyslog_open(int argc, VALUE *argv, VALUE self) +{ + VALUE ident, opt, fac; + + if (syslog_opened) { + rb_raise(rb_eRuntimeError, "syslog already open"); + } + + rb_scan_args(argc, argv, "03", &ident, &opt, &fac); + + if (NIL_P(ident)) { + ident = rb_gv_get("$0"); + } + SafeStringValue(ident); + syslog_ident = strdup(RSTRING_PTR(ident)); + + if (NIL_P(opt)) { + syslog_options = LOG_PID | LOG_CONS; + } else { + syslog_options = NUM2INT(opt); + } + + if (NIL_P(fac)) { + syslog_facility = LOG_USER; + } else { + syslog_facility = NUM2INT(fac); + } + + openlog(syslog_ident, syslog_options, syslog_facility); + + syslog_opened = 1; + + setlogmask(syslog_mask = setlogmask(0)); + + /* be like File.new.open {...} */ + if (rb_block_given_p()) { + rb_ensure(rb_yield, self, mSyslog_close, self); + } + + return self; +} + +static VALUE mSyslog_reopen(int argc, VALUE *argv, VALUE self) +{ + mSyslog_close(self); + + return mSyslog_open(argc, argv, self); +} + +static VALUE mSyslog_isopen(VALUE self) +{ + return syslog_opened ? Qtrue : Qfalse; +} + +static VALUE mSyslog_ident(VALUE self) +{ + return syslog_opened ? rb_str_new2(syslog_ident) : Qnil; +} + +static VALUE mSyslog_options(VALUE self) +{ + return syslog_opened ? INT2NUM(syslog_options) : Qnil; +} + +static VALUE mSyslog_facility(VALUE self) +{ + return syslog_opened ? INT2NUM(syslog_facility) : Qnil; +} + +static VALUE mSyslog_get_mask(VALUE self) +{ + return syslog_opened ? INT2NUM(syslog_mask) : Qnil; +} + +static VALUE mSyslog_set_mask(VALUE self, VALUE mask) +{ + rb_secure(4); + if (!syslog_opened) { + rb_raise(rb_eRuntimeError, "must open syslog before setting log mask"); + } + + setlogmask(syslog_mask = NUM2INT(mask)); + + return mask; +} + +static VALUE mSyslog_log(int argc, VALUE *argv, VALUE self) +{ + VALUE pri; + + if (argc < 2) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2+)", argc); + } + + argc--; + pri = *argv++; + + if (!FIXNUM_P(pri)) { + rb_raise(rb_eTypeError, "type mismatch: %s given", rb_class2name(CLASS_OF(pri))); + } + + syslog_write(FIX2INT(pri), argc, argv); + + return self; +} + +static VALUE mSyslog_inspect(VALUE self) +{ + char buf[1024]; + + if (syslog_opened) { + snprintf(buf, sizeof(buf), + "<#%s: opened=true, ident=\"%s\", options=%d, facility=%d, mask=%d>", + rb_class2name(self), + syslog_ident, + syslog_options, + syslog_facility, + syslog_mask); + } else { + snprintf(buf, sizeof(buf), + "<#%s: opened=false>", rb_class2name(self)); + } + + return rb_str_new2(buf); +} + +static VALUE mSyslog_instance(VALUE self) +{ + return self; +} + +#define define_syslog_shortcut_method(pri, name) \ +static VALUE mSyslog_##name(int argc, VALUE *argv, VALUE self) \ +{ \ + syslog_write(pri, argc, argv); \ +\ + return self; \ +} + +#ifdef LOG_EMERG +define_syslog_shortcut_method(LOG_EMERG, emerg) +#endif +#ifdef LOG_ALERT +define_syslog_shortcut_method(LOG_ALERT, alert) +#endif +#ifdef LOG_CRIT +define_syslog_shortcut_method(LOG_CRIT, crit) +#endif +#ifdef LOG_ERR +define_syslog_shortcut_method(LOG_ERR, err) +#endif +#ifdef LOG_WARNING +define_syslog_shortcut_method(LOG_WARNING, warning) +#endif +#ifdef LOG_NOTICE +define_syslog_shortcut_method(LOG_NOTICE, notice) +#endif +#ifdef LOG_INFO +define_syslog_shortcut_method(LOG_INFO, info) +#endif +#ifdef LOG_DEBUG +define_syslog_shortcut_method(LOG_DEBUG, debug) +#endif + +static VALUE mSyslogConstants_LOG_MASK(VALUE klass, VALUE pri) +{ + return INT2FIX(LOG_MASK(NUM2INT(pri))); +} + +static VALUE mSyslogConstants_LOG_UPTO(VALUE klass, VALUE pri) +{ + return INT2FIX(LOG_UPTO(NUM2INT(pri))); +} + +/* Init for package syslog */ +void Init_syslog() +{ + mSyslog = rb_define_module("Syslog"); + + mSyslogConstants = rb_define_module_under(mSyslog, "Constants"); + + rb_include_module(mSyslog, mSyslogConstants); + + rb_define_module_function(mSyslog, "open", mSyslog_open, -1); + rb_define_module_function(mSyslog, "reopen", mSyslog_reopen, -1); + rb_define_module_function(mSyslog, "open!", mSyslog_reopen, -1); + rb_define_module_function(mSyslog, "opened?", mSyslog_isopen, 0); + + rb_define_module_function(mSyslog, "ident", mSyslog_ident, 0); + rb_define_module_function(mSyslog, "options", mSyslog_options, 0); + rb_define_module_function(mSyslog, "facility", mSyslog_facility, 0); + + rb_define_module_function(mSyslog, "log", mSyslog_log, -1); + rb_define_module_function(mSyslog, "close", mSyslog_close, 0); + rb_define_module_function(mSyslog, "mask", mSyslog_get_mask, 0); + rb_define_module_function(mSyslog, "mask=", mSyslog_set_mask, 1); + + rb_define_module_function(mSyslog, "LOG_MASK", mSyslogConstants_LOG_MASK, 1); + rb_define_module_function(mSyslog, "LOG_UPTO", mSyslogConstants_LOG_UPTO, 1); + + rb_define_module_function(mSyslog, "inspect", mSyslog_inspect, 0); + rb_define_module_function(mSyslog, "instance", mSyslog_instance, 0); + + rb_define_module_function(mSyslogConstants, "LOG_MASK", mSyslogConstants_LOG_MASK, 1); + rb_define_module_function(mSyslogConstants, "LOG_UPTO", mSyslogConstants_LOG_UPTO, 1); + +#define rb_define_syslog_const(id) \ + rb_define_const(mSyslogConstants, #id, INT2NUM(id)) + + /* Various options when opening log */ +#ifdef LOG_PID + rb_define_syslog_const(LOG_PID); +#endif +#ifdef LOG_CONS + rb_define_syslog_const(LOG_CONS); +#endif +#ifdef LOG_ODELAY + rb_define_syslog_const(LOG_ODELAY); /* deprecated */ +#endif +#ifdef LOG_NDELAY + rb_define_syslog_const(LOG_NDELAY); +#endif +#ifdef LOG_NOWAIT + rb_define_syslog_const(LOG_NOWAIT); /* deprecated */ +#endif +#ifdef LOG_PERROR + rb_define_syslog_const(LOG_PERROR); +#endif + + /* Various syslog facilities */ +#ifdef LOG_AUTH + rb_define_syslog_const(LOG_AUTH); +#endif +#ifdef LOG_AUTHPRIV + rb_define_syslog_const(LOG_AUTHPRIV); +#endif +#ifdef LOG_CONSOLE + rb_define_syslog_const(LOG_CONSOLE); +#endif +#ifdef LOG_CRON + rb_define_syslog_const(LOG_CRON); +#endif +#ifdef LOG_DAEMON + rb_define_syslog_const(LOG_DAEMON); +#endif +#ifdef LOG_FTP + rb_define_syslog_const(LOG_FTP); +#endif +#ifdef LOG_KERN + rb_define_syslog_const(LOG_KERN); +#endif +#ifdef LOG_LPR + rb_define_syslog_const(LOG_LPR); +#endif +#ifdef LOG_MAIL + rb_define_syslog_const(LOG_MAIL); +#endif +#ifdef LOG_NEWS + rb_define_syslog_const(LOG_NEWS); +#endif +#ifdef LOG_NTP + rb_define_syslog_const(LOG_NTP); +#endif +#ifdef LOG_SECURITY + rb_define_syslog_const(LOG_SECURITY); +#endif +#ifdef LOG_SYSLOG + rb_define_syslog_const(LOG_SYSLOG); +#endif +#ifdef LOG_USER + rb_define_syslog_const(LOG_USER); +#endif +#ifdef LOG_UUCP + rb_define_syslog_const(LOG_UUCP); +#endif +#ifdef LOG_LOCAL0 + rb_define_syslog_const(LOG_LOCAL0); +#endif +#ifdef LOG_LOCAL1 + rb_define_syslog_const(LOG_LOCAL1); +#endif +#ifdef LOG_LOCAL2 + rb_define_syslog_const(LOG_LOCAL2); +#endif +#ifdef LOG_LOCAL3 + rb_define_syslog_const(LOG_LOCAL3); +#endif +#ifdef LOG_LOCAL4 + rb_define_syslog_const(LOG_LOCAL4); +#endif +#ifdef LOG_LOCAL5 + rb_define_syslog_const(LOG_LOCAL5); +#endif +#ifdef LOG_LOCAL6 + rb_define_syslog_const(LOG_LOCAL6); +#endif +#ifdef LOG_LOCAL7 + rb_define_syslog_const(LOG_LOCAL7); +#endif + +#define rb_define_syslog_shortcut(name) \ + rb_define_module_function(mSyslog, #name, mSyslog_##name, -1) + + /* Various syslog priorities and the shortcut methods */ +#ifdef LOG_EMERG + rb_define_syslog_const(LOG_EMERG); + rb_define_syslog_shortcut(emerg); +#endif +#ifdef LOG_ALERT + rb_define_syslog_const(LOG_ALERT); + rb_define_syslog_shortcut(alert); +#endif +#ifdef LOG_CRIT + rb_define_syslog_const(LOG_CRIT); + rb_define_syslog_shortcut(crit); +#endif +#ifdef LOG_ERR + rb_define_syslog_const(LOG_ERR); + rb_define_syslog_shortcut(err); +#endif +#ifdef LOG_WARNING + rb_define_syslog_const(LOG_WARNING); + rb_define_syslog_shortcut(warning); +#endif +#ifdef LOG_NOTICE + rb_define_syslog_const(LOG_NOTICE); + rb_define_syslog_shortcut(notice); +#endif +#ifdef LOG_INFO + rb_define_syslog_const(LOG_INFO); + rb_define_syslog_shortcut(info); +#endif +#ifdef LOG_DEBUG + rb_define_syslog_const(LOG_DEBUG); + rb_define_syslog_shortcut(debug); +#endif +} diff --git a/ext/syslog/syslog.txt b/ext/syslog/syslog.txt new file mode 100644 index 0000000..b49dc75 --- /dev/null +++ b/ext/syslog/syslog.txt @@ -0,0 +1,124 @@ +.\" syslog.txt - -*- Indented-Text -*- +$RoughId: syslog.txt,v 1.18 2002/02/25 08:20:14 knu Exp $ +$Id: syslog.txt 15821 2008-03-21 12:15:06Z knu $ + +UNIX Syslog extension for Ruby +Amos Gouaux, University of Texas at Dallas + +& +Akinori MUSHA + + +Contact: + - Akinori MUSHA (current maintainer) + +** Syslog(Module) + +Included Modules: Syslog::Constants + +require 'syslog' + +A Simple wrapper for the UNIX syslog system calls that might be handy +if you're writing a server in Ruby. For the details of the syslog(8) +architecture and constants, see the syslog(3) manual page of your +platform. + +Module Methods: + + open(ident = $0, logopt = Syslog::LOG_PID | Syslog::LOG_CONS, + facility = Syslog::LOG_USER) [{ |syslog| ... }] + + Opens syslog with the given options and returns the module + itself. If a block is given, calls it with an argument of + itself. If syslog is already opened, raises RuntimeError. + + Example: + Syslog.open('ftpd', Syslog::LOG_PID | Syslog::LOG_NDELAY, + Syslog::LOG_FTP) + + open!(ident = $0, logopt = Syslog::LOG_PID | Syslog::LOG_CONS, + facility = Syslog::LOG_USER) + reopen(ident = $0, logopt = Syslog::LOG_PID | Syslog::LOG_CONS, + facility = Syslog::LOG_USER) + + Same as open, but does a close first. + + opened? + + Returns true if syslog opened, otherwise false. + + ident + options + facility + + Returns the parameters given in the last open, respectively. + Every call of Syslog::open resets these values. + + log(pri, message, ...) + + Writes message to syslog. + + Example: + Syslog.log(Syslog::LOG_CRIT, "the sky is falling in %d seconds!", 10) + + crit(message, ...) + emerg(message, ...) + alert(message, ...) + err(message, ...) + warning(message, ...) + notice(message, ...) + info(message, ...) + debug(message, ...) + + These are shortcut methods of Syslog::log(). The lineup may + vary depending on what priorities are defined on your system. + + Example: + Syslog.crit("the sky is falling in %d seconds!", 5) + + mask + mask=(mask) + + Returns or sets the log priority mask. The value of the mask + is persistent and will not be reset by Syslog::open or + Syslog::close. + + Example: + Syslog.mask = Syslog::LOG_UPTO(Syslog::LOG_ERR) + + close + + Closes syslog. + + inspect + + Returns the "inspect" string of the Syslog module. + + instance + + Returns the module itself. (Just for backward compatibility) + + LOG_MASK(pri) + + Creates a mask for one priority. + + LOG_UPTO(pri) + + Creates a mask for all priorities up to pri. + +** Syslog::Constants(Module) + +require 'syslog' +include Syslog::Constants + +This module includes the LOG_* constants available on the system. + +Module Methods: + + LOG_MASK(pri) + + Creates a mask for one priority. + + LOG_UPTO(pri) + + Creates a mask for all priorities up to pri. diff --git a/ext/syslog/test.rb b/ext/syslog/test.rb new file mode 100644 index 0000000..a200280 --- /dev/null +++ b/ext/syslog/test.rb @@ -0,0 +1,164 @@ +#!/usr/bin/env ruby +# $RoughId: test.rb,v 1.9 2002/02/25 08:20:14 knu Exp $ +# $Id: test.rb 11708 2007-02-12 23:01:19Z shyouhei $ + +# Please only run this test on machines reasonable for testing. +# If in doubt, ask your admin. + +require 'test/unit' + +# Prepend current directory to load path for testing. +$:.unshift('.') + +require 'syslog' + +class TestSyslog < Test::Unit::TestCase + def test_new + assert_raises(NoMethodError) { + Syslog.new + } + end + + def test_instance + sl1 = Syslog.instance + sl2 = Syslog.open + sl3 = Syslog.instance + + assert_equal(Syslog, sl1) + assert_equal(Syslog, sl2) + assert_equal(Syslog, sl3) + ensure + Syslog.close if Syslog.opened? + end + + def test_open + # default parameters + Syslog.open + + assert_equal($0, Syslog.ident) + assert_equal(Syslog::LOG_PID | Syslog::LOG_CONS, Syslog.options) + assert_equal(Syslog::LOG_USER, Syslog.facility) + + # open without close + assert_raises(RuntimeError) { + Syslog.open + } + + Syslog.close + + # given parameters + Syslog.open("foo", Syslog::LOG_NDELAY | Syslog::LOG_PERROR, Syslog::LOG_DAEMON) + + assert_equal('foo', Syslog.ident) + assert_equal(Syslog::LOG_NDELAY | Syslog::LOG_PERROR, Syslog.options) + assert_equal(Syslog::LOG_DAEMON, Syslog.facility) + + Syslog.close + + # default parameters again (after close) + Syslog.open + Syslog.close + + assert_equal(nil, Syslog.ident) + assert_equal(nil, Syslog.options) + assert_equal(nil, Syslog.facility) + + # block + param = nil + Syslog.open { |param| } + assert_equal(Syslog, param) + ensure + Syslog.close if Syslog.opened? + end + + def test_opened? + assert_equal(false, Syslog.opened?) + + Syslog.open + assert_equal(true, Syslog.opened?) + + Syslog.close + assert_equal(false, Syslog.opened?) + + Syslog.open { + assert_equal(true, Syslog.opened?) + } + + assert_equal(false, Syslog.opened?) + end + + def test_close + assert_raises(RuntimeError) { + Syslog.close + } + end + + def test_mask + assert_equal(nil, Syslog.mask) + + Syslog.open + + orig = Syslog.mask + + Syslog.mask = Syslog.LOG_UPTO(Syslog::LOG_ERR) + assert_equal(Syslog.LOG_UPTO(Syslog::LOG_ERR), Syslog.mask) + + Syslog.mask = Syslog.LOG_MASK(Syslog::LOG_CRIT) + assert_equal(Syslog.LOG_MASK(Syslog::LOG_CRIT), Syslog.mask) + + Syslog.mask = orig + ensure + Syslog.close if Syslog.opened? + end + + def test_log + stderr = IO::pipe + + pid = fork { + stderr[0].close + STDERR.reopen(stderr[1]) + stderr[1].close + + options = Syslog::LOG_PERROR | Syslog::LOG_NDELAY + + Syslog.open("syslog_test", options) { |sl| + sl.log(Syslog::LOG_NOTICE, "test1 - hello, %s!", "world") + sl.notice("test1 - hello, %s!", "world") + } + + Syslog.open("syslog_test", options | Syslog::LOG_PID) { |sl| + sl.log(Syslog::LOG_CRIT, "test2 - pid") + sl.crit("test2 - pid") + } + exit! + } + + stderr[1].close + Process.waitpid(pid) + + # LOG_PERROR is not yet implemented on Cygwin. + return if RUBY_PLATFORM =~ /cygwin/ + + 2.times { + assert_equal("syslog_test: test1 - hello, world!\n", stderr[0].gets) + } + + 2.times { + assert_equal(format("syslog_test[%d]: test2 - pid\n", pid), stderr[0].gets) + } + end + + def test_inspect + Syslog.open { |sl| + assert_equal(format('<#%s: opened=true, ident="%s", options=%d, facility=%d, mask=%d>', + Syslog, + sl.ident, + sl.options, + sl.facility, + sl.mask), + sl.inspect) + } + + assert_equal(format('<#%s: opened=false>', Syslog), Syslog.inspect) + end +end diff --git a/file.c b/file.c new file mode 100644 index 0000000..b0fd4e3 --- /dev/null +++ b/file.c @@ -0,0 +1,4820 @@ +/********************************************************************** + + file.c - + + $Author: yugui $ + created at: Mon Nov 15 12:24:34 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#ifdef _WIN32 +#include "missing/file.h" +#endif +#ifdef __CYGWIN__ +#include +#include +#endif + +#include "ruby/ruby.h" +#include "ruby/io.h" +#include "ruby/util.h" +#include "dln.h" + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_SYS_FILE_H +# include +#else +int flock(int, int); +#endif + +#ifdef HAVE_SYS_PARAM_H +# include +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif + +#include + +#include + +#ifdef HAVE_UTIME_H +#include +#elif defined HAVE_SYS_UTIME_H +#include +#endif + +#ifdef HAVE_PWD_H +#include +#endif + +#include +#include + +#ifdef HAVE_SYS_MKDEV_H +#include +#endif + +#if defined(HAVE_FCNTL_H) +#include +#endif + +#if !defined HAVE_LSTAT && !defined lstat +#define lstat stat +#endif + +#ifdef __BEOS__ /* should not change ID if -1 */ +static int +be_chown(const char *path, uid_t owner, gid_t group) +{ + if (owner == -1 || group == -1) { + struct stat st; + if (stat(path, &st) < 0) return -1; + if (owner == -1) owner = st.st_uid; + if (group == -1) group = st.st_gid; + } + return chown(path, owner, group); +} +#define chown be_chown +static int +be_fchown(int fd, uid_t owner, gid_t group) +{ + if (owner == -1 || group == -1) { + struct stat st; + if (fstat(fd, &st) < 0) return -1; + if (owner == -1) owner = st.st_uid; + if (group == -1) group = st.st_gid; + } + return fchown(fd, owner, group); +} +#define fchown be_fchown +#endif /* __BEOS__ */ + +VALUE rb_cFile; +VALUE rb_mFileTest; +VALUE rb_cStat; + +static VALUE +rb_get_path_check(VALUE obj, int check) +{ + VALUE tmp; + ID to_path; + + if (check) rb_check_safe_obj(obj); + tmp = rb_check_string_type(obj); + if (!NIL_P(tmp)) goto exit; + + CONST_ID(to_path, "to_path"); + if (rb_respond_to(obj, to_path)) { + tmp = rb_funcall(obj, to_path, 0, 0); + } + else { + tmp = obj; + } + exit: + StringValueCStr(tmp); + if (check && obj != tmp) { + rb_check_safe_obj(tmp); + } + return rb_str_new4(tmp); +} + +VALUE +rb_get_path_no_checksafe(VALUE obj) +{ + return rb_get_path_check(obj, 0); +} + +VALUE +rb_get_path(VALUE obj) +{ + return rb_get_path_check(obj, 1); +} + +static long +apply2files(void (*func)(const char *, void *), VALUE vargs, void *arg) +{ + long i; + volatile VALUE path; + + rb_secure(4); + for (i=0; i filename + * + * Returns the pathname used to create file as a string. Does + * not normalize the name. + * + * File.new("testfile").path #=> "testfile" + * File.new("/tmp/../tmp/xxx", "w").path #=> "/tmp/../tmp/xxx" + * + */ + +static VALUE +rb_file_path(VALUE obj) +{ + rb_io_t *fptr; + + fptr = RFILE(rb_io_taint_check(obj))->fptr; + rb_io_check_initialized(fptr); + if (NIL_P(fptr->pathv)) return Qnil; + return rb_obj_taint(rb_str_dup(fptr->pathv)); +} + +static VALUE +stat_new_0(VALUE klass, struct stat *st) +{ + struct stat *nst = 0; + + if (st) { + nst = ALLOC(struct stat); + *nst = *st; + } + return Data_Wrap_Struct(klass, NULL, -1, nst); +} + +static VALUE +stat_new(struct stat *st) +{ + return stat_new_0(rb_cStat, st); +} + +static struct stat* +get_stat(VALUE self) +{ + struct stat* st; + Data_Get_Struct(self, struct stat, st); + if (!st) rb_raise(rb_eTypeError, "uninitialized File::Stat"); + return st; +} + +static struct timespec stat_mtimespec(struct stat *st); + +/* + * call-seq: + * stat <=> other_stat => -1, 0, 1 + * + * Compares File::Stat objects by comparing their + * respective modification times. + * + * f1 = File.new("f1", "w") + * sleep 1 + * f2 = File.new("f2", "w") + * f1.stat <=> f2.stat #=> -1 + */ + +static VALUE +rb_stat_cmp(VALUE self, VALUE other) +{ + if (rb_obj_is_kind_of(other, rb_obj_class(self))) { + struct timespec ts1 = stat_mtimespec(get_stat(self)); + struct timespec ts2 = stat_mtimespec(get_stat(other)); + if (ts1.tv_sec == ts2.tv_sec) { + if (ts1.tv_nsec == ts2.tv_nsec) return INT2FIX(0); + if (ts1.tv_nsec < ts2.tv_nsec) return INT2FIX(-1); + return INT2FIX(1); + } + if (ts1.tv_sec < ts2.tv_sec) return INT2FIX(-1); + return INT2FIX(1); + } + return Qnil; +} + +#define ST2UINT(val) ((val) & ~(~1UL << (sizeof(val) * CHAR_BIT - 1))) + +/* + * call-seq: + * stat.dev => fixnum + * + * Returns an integer representing the device on which stat + * resides. + * + * File.stat("testfile").dev #=> 774 + */ + +static VALUE +rb_stat_dev(VALUE self) +{ + return INT2NUM(get_stat(self)->st_dev); +} + +/* + * call-seq: + * stat.dev_major => fixnum + * + * Returns the major part of File_Stat#dev or + * nil. + * + * File.stat("/dev/fd1").dev_major #=> 2 + * File.stat("/dev/tty").dev_major #=> 5 + */ + +static VALUE +rb_stat_dev_major(VALUE self) +{ +#if defined(major) + long dev = get_stat(self)->st_dev; + return ULONG2NUM(major(dev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.dev_minor => fixnum + * + * Returns the minor part of File_Stat#dev or + * nil. + * + * File.stat("/dev/fd1").dev_minor #=> 1 + * File.stat("/dev/tty").dev_minor #=> 0 + */ + +static VALUE +rb_stat_dev_minor(VALUE self) +{ +#if defined(minor) + long dev = get_stat(self)->st_dev; + return ULONG2NUM(minor(dev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.ino => fixnum + * + * Returns the inode number for stat. + * + * File.stat("testfile").ino #=> 1083669 + * + */ + +static VALUE +rb_stat_ino(VALUE self) +{ +#ifdef HUGE_ST_INO + return ULL2NUM(get_stat(self)->st_ino); +#else + return ULONG2NUM(get_stat(self)->st_ino); +#endif +} + +/* + * call-seq: + * stat.mode => fixnum + * + * Returns an integer representing the permission bits of + * stat. The meaning of the bits is platform dependent; on + * Unix systems, see stat(2). + * + * File.chmod(0644, "testfile") #=> 1 + * s = File.stat("testfile") + * sprintf("%o", s.mode) #=> "100644" + */ + +static VALUE +rb_stat_mode(VALUE self) +{ + return UINT2NUM(ST2UINT(get_stat(self)->st_mode)); +} + +/* + * call-seq: + * stat.nlink => fixnum + * + * Returns the number of hard links to stat. + * + * File.stat("testfile").nlink #=> 1 + * File.link("testfile", "testfile.bak") #=> 0 + * File.stat("testfile").nlink #=> 2 + * + */ + +static VALUE +rb_stat_nlink(VALUE self) +{ + return UINT2NUM(get_stat(self)->st_nlink); +} + +/* + * call-seq: + * stat.uid => fixnum + * + * Returns the numeric user id of the owner of stat. + * + * File.stat("testfile").uid #=> 501 + * + */ + +static VALUE +rb_stat_uid(VALUE self) +{ + return UIDT2NUM(get_stat(self)->st_uid); +} + +/* + * call-seq: + * stat.gid => fixnum + * + * Returns the numeric group id of the owner of stat. + * + * File.stat("testfile").gid #=> 500 + * + */ + +static VALUE +rb_stat_gid(VALUE self) +{ + return GIDT2NUM(get_stat(self)->st_gid); +} + +/* + * call-seq: + * stat.rdev => fixnum or nil + * + * Returns an integer representing the device type on which + * stat resides. Returns nil if the operating + * system doesn't support this feature. + * + * File.stat("/dev/fd1").rdev #=> 513 + * File.stat("/dev/tty").rdev #=> 1280 + */ + +static VALUE +rb_stat_rdev(VALUE self) +{ +#ifdef HAVE_ST_RDEV + return ULONG2NUM(get_stat(self)->st_rdev); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.rdev_major => fixnum + * + * Returns the major part of File_Stat#rdev or + * nil. + * + * File.stat("/dev/fd1").rdev_major #=> 2 + * File.stat("/dev/tty").rdev_major #=> 5 + */ + +static VALUE +rb_stat_rdev_major(VALUE self) +{ +#if defined(HAVE_ST_RDEV) && defined(major) + long rdev = get_stat(self)->st_rdev; + return ULONG2NUM(major(rdev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.rdev_minor => fixnum + * + * Returns the minor part of File_Stat#rdev or + * nil. + * + * File.stat("/dev/fd1").rdev_minor #=> 1 + * File.stat("/dev/tty").rdev_minor #=> 0 + */ + +static VALUE +rb_stat_rdev_minor(VALUE self) +{ +#if defined(HAVE_ST_RDEV) && defined(minor) + long rdev = get_stat(self)->st_rdev; + return ULONG2NUM(minor(rdev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.size => fixnum + * + * Returns the size of stat in bytes. + * + * File.stat("testfile").size #=> 66 + */ + +static VALUE +rb_stat_size(VALUE self) +{ + return OFFT2NUM(get_stat(self)->st_size); +} + +/* + * call-seq: + * stat.blksize => integer or nil + * + * Returns the native file system's block size. Will return nil + * on platforms that don't support this information. + * + * File.stat("testfile").blksize #=> 4096 + * + */ + +static VALUE +rb_stat_blksize(VALUE self) +{ +#ifdef HAVE_ST_BLKSIZE + return ULONG2NUM(get_stat(self)->st_blksize); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.blocks => integer or nil + * + * Returns the number of native file system blocks allocated for this + * file, or nil if the operating system doesn't + * support this feature. + * + * File.stat("testfile").blocks #=> 2 + */ + +static VALUE +rb_stat_blocks(VALUE self) +{ +#ifdef HAVE_ST_BLOCKS + return ULONG2NUM(get_stat(self)->st_blocks); +#else + return Qnil; +#endif +} + +static struct timespec +stat_atimespec(struct stat *st) +{ + struct timespec ts; + ts.tv_sec = st->st_atime; +#if defined(HAVE_STRUCT_STAT_ST_ATIM) + ts.tv_nsec = st->st_atim.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) + ts.tv_nsec = st->st_atimespec.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + ts.tv_nsec = st->st_atimensec; +#else + ts.tv_nsec = 0; +#endif + return ts; +} + +static VALUE +stat_atime(struct stat *st) +{ + struct timespec ts = stat_atimespec(st); + return rb_time_nano_new(ts.tv_sec, ts.tv_nsec); +} + +static struct timespec +stat_mtimespec(struct stat *st) +{ + struct timespec ts; + ts.tv_sec = st->st_mtime; +#if defined(HAVE_STRUCT_STAT_ST_MTIM) + ts.tv_nsec = st->st_mtim.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_MTIMESPEC) + ts.tv_nsec = st->st_mtimespec.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC) + ts.tv_nsec = st->st_mtimensec; +#else + ts.tv_nsec = 0; +#endif + return ts; +} + +static VALUE +stat_mtime(struct stat *st) +{ + struct timespec ts = stat_mtimespec(st); + return rb_time_nano_new(ts.tv_sec, ts.tv_nsec); +} + +static struct timespec +stat_ctimespec(struct stat *st) +{ + struct timespec ts; + ts.tv_sec = st->st_ctime; +#if defined(HAVE_STRUCT_STAT_ST_CTIM) + ts.tv_nsec = st->st_ctim.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_CTIMESPEC) + ts.tv_nsec = st->st_ctimespec.tv_nsec; +#elif defined(HAVE_STRUCT_STAT_ST_CTIMENSEC) + ts.tv_nsec = st->st_ctimensec; +#else + ts.tv_nsec = 0; +#endif + return ts; +} + +static VALUE +stat_ctime(struct stat *st) +{ + struct timespec ts = stat_ctimespec(st); + return rb_time_nano_new(ts.tv_sec, ts.tv_nsec); +} + +/* + * call-seq: + * stat.atime => time + * + * Returns the last access time for this file as an object of class + * Time. + * + * File.stat("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969 + * + */ + +static VALUE +rb_stat_atime(VALUE self) +{ + return stat_atime(get_stat(self)); +} + +/* + * call-seq: + * stat.mtime -> aTime + * + * Returns the modification time of stat. + * + * File.stat("testfile").mtime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_stat_mtime(VALUE self) +{ + return stat_mtime(get_stat(self)); +} + +/* + * call-seq: + * stat.ctime -> aTime + * + * Returns the change time for stat (that is, the time + * directory information about the file was changed, not the file + * itself). + * + * File.stat("testfile").ctime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_stat_ctime(VALUE self) +{ + return stat_ctime(get_stat(self)); +} + +/* + * call-seq: + * stat.inspect => string + * + * Produce a nicely formatted description of stat. + * + * File.stat("/etc/passwd").inspect + * #=> "#" + */ + +static VALUE +rb_stat_inspect(VALUE self) +{ + VALUE str; + int i; + static const struct { + const char *name; + VALUE (*func)(VALUE); + } member[] = { + {"dev", rb_stat_dev}, + {"ino", rb_stat_ino}, + {"mode", rb_stat_mode}, + {"nlink", rb_stat_nlink}, + {"uid", rb_stat_uid}, + {"gid", rb_stat_gid}, + {"rdev", rb_stat_rdev}, + {"size", rb_stat_size}, + {"blksize", rb_stat_blksize}, + {"blocks", rb_stat_blocks}, + {"atime", rb_stat_atime}, + {"mtime", rb_stat_mtime}, + {"ctime", rb_stat_ctime}, + }; + + struct stat* st; + Data_Get_Struct(self, struct stat, st); + if (!st) { + return rb_sprintf("#<%s: uninitialized>", rb_obj_classname(self)); + } + + str = rb_str_buf_new2("#<"); + rb_str_buf_cat2(str, rb_obj_classname(self)); + rb_str_buf_cat2(str, " "); + + for (i = 0; i < sizeof(member)/sizeof(member[0]); i++) { + VALUE v; + + if (i > 0) { + rb_str_buf_cat2(str, ", "); + } + rb_str_buf_cat2(str, member[i].name); + rb_str_buf_cat2(str, "="); + v = (*member[i].func)(self); + if (i == 2) { /* mode */ + rb_str_catf(str, "0%lo", NUM2ULONG(v)); + } + else if (i == 0 || i == 6) { /* dev/rdev */ + rb_str_catf(str, "0x%lx", NUM2ULONG(v)); + } + else { + rb_str_append(str, rb_inspect(v)); + } + } + rb_str_buf_cat2(str, ">"); + OBJ_INFECT(str, self); + + return str; +} + +static int +rb_stat(VALUE file, struct stat *st) +{ + VALUE tmp; + + rb_secure(2); + tmp = rb_check_convert_type(file, T_FILE, "IO", "to_io"); + if (!NIL_P(tmp)) { + rb_io_t *fptr; + + GetOpenFile(tmp, fptr); + return fstat(fptr->fd, st); + } + FilePathValue(file); + return stat(StringValueCStr(file), st); +} + +#ifdef _WIN32 +static HANDLE +w32_io_info(VALUE *file, BY_HANDLE_FILE_INFORMATION *st) +{ + VALUE tmp; + HANDLE f, ret = 0; + + tmp = rb_check_convert_type(*file, T_FILE, "IO", "to_io"); + if (!NIL_P(tmp)) { + rb_io_t *fptr; + + GetOpenFile(tmp, fptr); + f = (HANDLE)rb_w32_get_osfhandle(fptr->fd); + if (f == (HANDLE)-1) return INVALID_HANDLE_VALUE; + } + else { + FilePathValue(*file); + f = CreateFile(StringValueCStr(*file), 0, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, + rb_w32_iswin95() ? 0 : FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (f == INVALID_HANDLE_VALUE) return f; + ret = f; + } + if (GetFileType(f) == FILE_TYPE_DISK) { + ZeroMemory(st, sizeof(*st)); + if (GetFileInformationByHandle(f, st)) return ret; + } + if (ret) CloseHandle(ret); + return INVALID_HANDLE_VALUE; +} +#endif + +/* + * call-seq: + * File.stat(file_name) => stat + * + * Returns a File::Stat object for the named file (see + * File::Stat). + * + * File.stat("testfile").mtime #=> Tue Apr 08 12:58:04 CDT 2003 + * + */ + +static VALUE +rb_file_s_stat(VALUE klass, VALUE fname) +{ + struct stat st; + + rb_secure(4); + FilePathValue(fname); + if (rb_stat(fname, &st) < 0) { + FilePathValue(fname); + rb_sys_fail(RSTRING_PTR(fname)); + } + return stat_new(&st); +} + +/* + * call-seq: + * ios.stat => stat + * + * Returns status information for ios as an object of type + * File::Stat. + * + * f = File.new("testfile") + * s = f.stat + * "%o" % s.mode #=> "100644" + * s.blksize #=> 4096 + * s.atime #=> Wed Apr 09 08:53:54 CDT 2003 + * + */ + +static VALUE +rb_io_stat(VALUE obj) +{ + rb_io_t *fptr; + struct stat st; + +#define rb_sys_fail_path(path) rb_sys_fail(NIL_P(path) ? 0 : RSTRING_PTR(path)) + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail_path(fptr->pathv); + } + return stat_new(&st); +} + +/* + * call-seq: + * File.lstat(file_name) => stat + * + * Same as File::stat, but does not follow the last symbolic + * link. Instead, reports on the link itself. + * + * File.symlink("testfile", "link2test") #=> 0 + * File.stat("testfile").size #=> 66 + * File.lstat("link2test").size #=> 8 + * File.stat("link2test").size #=> 66 + * + */ + +static VALUE +rb_file_s_lstat(VALUE klass, VALUE fname) +{ +#ifdef HAVE_LSTAT + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (lstat(StringValueCStr(fname), &st) == -1) { + rb_sys_fail(RSTRING_PTR(fname)); + } + return stat_new(&st); +#else + return rb_file_s_stat(klass, fname); +#endif +} + +/* + * call-seq: + * file.lstat => stat + * + * Same as IO#stat, but does not follow the last symbolic + * link. Instead, reports on the link itself. + * + * File.symlink("testfile", "link2test") #=> 0 + * File.stat("testfile").size #=> 66 + * f = File.new("link2test") + * f.lstat.size #=> 8 + * f.stat.size #=> 66 + */ + +static VALUE +rb_file_lstat(VALUE obj) +{ +#ifdef HAVE_LSTAT + rb_io_t *fptr; + struct stat st; + + rb_secure(2); + GetOpenFile(obj, fptr); + if (NIL_P(fptr->pathv)) return Qnil; + if (lstat(RSTRING_PTR(fptr->pathv), &st) == -1) { + rb_sys_fail_path(fptr->pathv); + } + return stat_new(&st); +#else + return rb_io_stat(obj); +#endif +} + +#ifndef HAVE_GROUP_MEMBER +static int +group_member(GETGROUPS_T gid) +{ +#ifndef _WIN32 + if (getgid() == gid || getegid() == gid) + return Qtrue; + +# ifdef HAVE_GETGROUPS +# ifndef NGROUPS +# ifdef NGROUPS_MAX +# define NGROUPS NGROUPS_MAX +# else +# define NGROUPS 32 +# endif +# endif + { + GETGROUPS_T gary[NGROUPS]; + int anum; + + anum = getgroups(NGROUPS, gary); + while (--anum >= 0) + if (gary[anum] == gid) + return Qtrue; + } +# endif +#endif + return Qfalse; +} +#endif + +#ifndef S_IXUGO +# define S_IXUGO (S_IXUSR | S_IXGRP | S_IXOTH) +#endif + +#if defined(S_IXGRP) && !defined(_WIN32) && !defined(__CYGWIN__) +#define USE_GETEUID 1 +#endif + +#ifndef HAVE_EACCESS +int +eaccess(const char *path, int mode) +{ +#ifdef USE_GETEUID + struct stat st; + rb_uid_t euid; + + if (stat(path, &st) < 0) return -1; + + euid = geteuid(); + + if (euid == 0) { + /* Root can read or write any file. */ + if (!(mode & X_OK)) + return 0; + + /* Root can execute any file that has any one of the execute + bits set. */ + if (st.st_mode & S_IXUGO) + return 0; + + return -1; + } + + if (st.st_uid == euid) /* owner */ + mode <<= 6; + else if (group_member(st.st_gid)) + mode <<= 3; + + if ((st.st_mode & mode) == mode) return 0; + + return -1; +#else + return access(path, mode); +#endif +} +#endif + + +/* + * Document-class: FileTest + * + * FileTest implements file test operations similar to + * those used in File::Stat. It exists as a standalone + * module, and its methods are also insinuated into the File + * class. (Note that this is not done by inclusion: the interpreter cheats). + * + */ + +/* + * File.directory?(file_name) => true or false + * File.directory?(file_name) => true or false + * + * Returns true if the named file is a directory, + * false otherwise. + * + * File.directory?(".") + */ + +/* + * Document-method: exist? + * + * call-seq: + * Dir.exist?(file_name) => true or false + * Dir.exists?(file_name) => true or false + * + * Returns true if the named file is a directory, + * false otherwise. + * + */ + +/* + * Document-method: directory? + * + * call-seq: + * File.directory?(file_name) => true or false + * + * Returns true if the named file is a directory, + * false otherwise. + * + * File.directory?(".") + */ + +VALUE +rb_file_directory_p(VALUE obj, VALUE fname) +{ +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISDIR(st.st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.pipe?(file_name) => true or false + * + * Returns true if the named file is a pipe. + */ + +static VALUE +rb_file_pipe_p(VALUE obj, VALUE fname) +{ +#ifdef S_IFIFO +# ifndef S_ISFIFO +# define S_ISFIFO(m) ((m & S_IFMT) == S_IFIFO) +# endif + + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISFIFO(st.st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * File.symlink?(file_name) => true or false + * + * Returns true if the named file is a symbolic link. + */ + +static VALUE +rb_file_symlink_p(VALUE obj, VALUE fname) +{ +#ifndef S_ISLNK +# ifdef _S_ISLNK +# define S_ISLNK(m) _S_ISLNK(m) +# else +# ifdef _S_IFLNK +# define S_ISLNK(m) ((m & S_IFMT) == _S_IFLNK) +# else +# ifdef S_IFLNK +# define S_ISLNK(m) ((m & S_IFMT) == S_IFLNK) +# endif +# endif +# endif +#endif + +#ifdef S_ISLNK + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (lstat(StringValueCStr(fname), &st) < 0) return Qfalse; + if (S_ISLNK(st.st_mode)) return Qtrue; +#endif + + return Qfalse; +} + +/* + * call-seq: + * File.socket?(file_name) => true or false + * + * Returns true if the named file is a socket. + */ + +static VALUE +rb_file_socket_p(VALUE obj, VALUE fname) +{ +#ifndef S_ISSOCK +# ifdef _S_ISSOCK +# define S_ISSOCK(m) _S_ISSOCK(m) +# else +# ifdef _S_IFSOCK +# define S_ISSOCK(m) ((m & S_IFMT) == _S_IFSOCK) +# else +# ifdef S_IFSOCK +# define S_ISSOCK(m) ((m & S_IFMT) == S_IFSOCK) +# endif +# endif +# endif +#endif + +#ifdef S_ISSOCK + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISSOCK(st.st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * File.blockdev?(file_name) => true or false + * + * Returns true if the named file is a block device. + */ + +static VALUE +rb_file_blockdev_p(VALUE obj, VALUE fname) +{ +#ifndef S_ISBLK +# ifdef S_IFBLK +# define S_ISBLK(m) ((m & S_IFMT) == S_IFBLK) +# else +# define S_ISBLK(m) (0) /* anytime false */ +# endif +#endif + +#ifdef S_ISBLK + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISBLK(st.st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * File.chardev?(file_name) => true or false + * + * Returns true if the named file is a character device. + */ +static VALUE +rb_file_chardev_p(VALUE obj, VALUE fname) +{ +#ifndef S_ISCHR +# define S_ISCHR(m) ((m & S_IFMT) == S_IFCHR) +#endif + + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISCHR(st.st_mode)) return Qtrue; + + return Qfalse; +} + +/* + * call-seq: + * File.exist?(file_name) => true or false + * File.exists?(file_name) => true or false + * + * Return true if the named file exists. + */ + +static VALUE +rb_file_exist_p(VALUE obj, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.readable?(file_name) => true or false + * + * Returns true if the named file is readable by the effective + * user id of this process. + */ + +static VALUE +rb_file_readable_p(VALUE obj, VALUE fname) +{ + rb_secure(2); + FilePathValue(fname); + if (eaccess(StringValueCStr(fname), R_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.readable_real?(file_name) => true or false + * + * Returns true if the named file is readable by the real + * user id of this process. + */ + +static VALUE +rb_file_readable_real_p(VALUE obj, VALUE fname) +{ + rb_secure(2); + FilePathValue(fname); + if (access(StringValueCStr(fname), R_OK) < 0) return Qfalse; + return Qtrue; +} + +#ifndef S_IRUGO +# define S_IRUGO (S_IRUSR | S_IRGRP | S_IROTH) +#endif + +#ifndef S_IWUGO +# define S_IWUGO (S_IWUSR | S_IWGRP | S_IWOTH) +#endif + +/* + * call-seq: + * File.world_readable?(file_name) => fixnum or nil + * + * If file_name is readable by others, returns an integer + * representing the file permission bits of file_name. Returns + * nil otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see stat(2). + * + * File.world_readable?("/etc/passwd") # => 420 + * m = File.world_readable?("/etc/passwd") + * sprintf("%o", m) # => "644" + */ + +static VALUE +rb_file_world_readable_p(VALUE obj, VALUE fname) +{ +#ifdef S_IROTH + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qnil; + if ((st.st_mode & (S_IROTH)) == S_IROTH) { + return UINT2NUM(st.st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } +#endif + return Qnil; +} + +/* + * call-seq: + * File.writable?(file_name) => true or false + * + * Returns true if the named file is writable by the effective + * user id of this process. + */ + +static VALUE +rb_file_writable_p(VALUE obj, VALUE fname) +{ + rb_secure(2); + FilePathValue(fname); + if (eaccess(StringValueCStr(fname), W_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.writable_real?(file_name) => true or false + * + * Returns true if the named file is writable by the real + * user id of this process. + */ + +static VALUE +rb_file_writable_real_p(VALUE obj, VALUE fname) +{ + rb_secure(2); + FilePathValue(fname); + if (access(StringValueCStr(fname), W_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.world_writable?(file_name) => fixnum or nil + * + * If file_name is writable by others, returns an integer + * representing the file permission bits of file_name. Returns + * nil otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see stat(2). + * + * File.world_writable?("/tmp") #=> 511 + * m = File.world_writable?("/tmp") + * sprintf("%o", m) #=> "777" + */ + +static VALUE +rb_file_world_writable_p(VALUE obj, VALUE fname) +{ +#ifdef S_IWOTH + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qnil; + if ((st.st_mode & (S_IWOTH)) == S_IWOTH) { + return UINT2NUM(st.st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } +#endif + return Qnil; +} + +/* + * call-seq: + * File.executable?(file_name) => true or false + * + * Returns true if the named file is executable by the effective + * user id of this process. + */ + +static VALUE +rb_file_executable_p(VALUE obj, VALUE fname) +{ + rb_secure(2); + FilePathValue(fname); + if (eaccess(StringValueCStr(fname), X_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.executable_real?(file_name) => true or false + * + * Returns true if the named file is executable by the real + * user id of this process. + */ + +static VALUE +rb_file_executable_real_p(VALUE obj, VALUE fname) +{ + rb_secure(2); + FilePathValue(fname); + if (access(StringValueCStr(fname), X_OK) < 0) return Qfalse; + return Qtrue; +} + +#ifndef S_ISREG +# define S_ISREG(m) ((m & S_IFMT) == S_IFREG) +#endif + +/* + * call-seq: + * File.file?(file_name) => true or false + * + * Returns true if the named file exists and is a + * regular file. + */ + +static VALUE +rb_file_file_p(VALUE obj, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISREG(st.st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.zero?(file_name) => true or false + * + * Returns true if the named file exists and has + * a zero size. + */ + +static VALUE +rb_file_zero_p(VALUE obj, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_size == 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.size?(file_name) => Integer or nil + * + * Returns +nil+ if +file_name+ doesn't exist or has zero size, the size of the + * file otherwise. + */ + +static VALUE +rb_file_size_p(VALUE obj, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qnil; + if (st.st_size == 0) return Qnil; + return OFFT2NUM(st.st_size); +} + +/* + * call-seq: + * File.owned?(file_name) => true or false + * + * Returns true if the named file exists and the + * effective used id of the calling process is the owner of + * the file. + */ + +static VALUE +rb_file_owned_p(VALUE obj, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_uid == geteuid()) return Qtrue; + return Qfalse; +} + +static VALUE +rb_file_rowned_p(VALUE obj, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_uid == getuid()) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.grpowned?(file_name) => true or false + * + * Returns true if the named file exists and the + * effective group id of the calling process is the owner of + * the file. Returns false on Windows. + */ + +static VALUE +rb_file_grpowned_p(VALUE obj, VALUE fname) +{ +#ifndef _WIN32 + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (group_member(st.st_gid)) return Qtrue; +#endif + return Qfalse; +} + +#if defined(S_ISUID) || defined(S_ISGID) || defined(S_ISVTX) +static VALUE +check3rdbyte(VALUE fname, int mode) +{ + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (stat(StringValueCStr(fname), &st) < 0) return Qfalse; + if (st.st_mode & mode) return Qtrue; + return Qfalse; +} +#endif + +/* + * call-seq: + * File.setuid?(file_name) => true or false + * + * Returns true if the named file has the setuid bit set. + */ + +static VALUE +rb_file_suid_p(VALUE obj, VALUE fname) +{ +#ifdef S_ISUID + return check3rdbyte(fname, S_ISUID); +#else + return Qfalse; +#endif +} + +/* + * call-seq: + * File.setgid?(file_name) => true or false + * + * Returns true if the named file has the setgid bit set. + */ + +static VALUE +rb_file_sgid_p(VALUE obj, VALUE fname) +{ +#ifdef S_ISGID + return check3rdbyte(fname, S_ISGID); +#else + return Qfalse; +#endif +} + +/* + * call-seq: + * File.sticky?(file_name) => true or false + * + * Returns true if the named file has the sticky bit set. + */ + +static VALUE +rb_file_sticky_p(VALUE obj, VALUE fname) +{ +#ifdef S_ISVTX + return check3rdbyte(fname, S_ISVTX); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * File.identical?(file_1, file_2) => true or false + * + * Returns true if the named files are identical. + * + * open("a", "w") {} + * p File.identical?("a", "a") #=> true + * p File.identical?("a", "./a") #=> true + * File.link("a", "b") + * p File.identical?("a", "b") #=> true + * File.symlink("a", "c") + * p File.identical?("a", "c") #=> true + * open("d", "w") {} + * p File.identical?("a", "d") #=> false + */ + +static VALUE +rb_file_identical_p(VALUE obj, VALUE fname1, VALUE fname2) +{ +#ifndef DOSISH + struct stat st1, st2; + + if (rb_stat(fname1, &st1) < 0) return Qfalse; + if (rb_stat(fname2, &st2) < 0) return Qfalse; + if (st1.st_dev != st2.st_dev) return Qfalse; + if (st1.st_ino != st2.st_ino) return Qfalse; +#else +#ifdef _WIN32 + BY_HANDLE_FILE_INFORMATION st1, st2; + HANDLE f1 = 0, f2 = 0; +#endif + + rb_secure(2); +#ifdef _WIN32 + f1 = w32_io_info(&fname1, &st1); + if (f1 == INVALID_HANDLE_VALUE) return Qfalse; + f2 = w32_io_info(&fname2, &st2); + if (f1) CloseHandle(f1); + if (f2 == INVALID_HANDLE_VALUE) return Qfalse; + if (f2) CloseHandle(f2); + + if (st1.dwVolumeSerialNumber == st2.dwVolumeSerialNumber && + st1.nFileIndexHigh == st2.nFileIndexHigh && + st1.nFileIndexLow == st2.nFileIndexLow) + return Qtrue; + if (!f1 || !f2) return Qfalse; + if (rb_w32_iswin95()) return Qfalse; +#else + FilePathValue(fname1); + fname1 = rb_str_new4(fname1); + FilePathValue(fname2); + if (access(RSTRING_PTR(fname1), 0)) return Qfalse; + if (access(RSTRING_PTR(fname2), 0)) return Qfalse; +#endif + fname1 = rb_file_expand_path(fname1, Qnil); + fname2 = rb_file_expand_path(fname2, Qnil); + if (RSTRING_LEN(fname1) != RSTRING_LEN(fname2)) return Qfalse; + if (rb_memcicmp(RSTRING_PTR(fname1), RSTRING_PTR(fname2), RSTRING_LEN(fname1))) + return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * File.size(file_name) => integer + * + * Returns the size of file_name. + */ + +static VALUE +rb_file_s_size(VALUE klass, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) { + FilePathValue(fname); + rb_sys_fail(RSTRING_PTR(fname)); + } + return OFFT2NUM(st.st_size); +} + +static VALUE +rb_file_ftype(const struct stat *st) +{ + const char *t; + + if (S_ISREG(st->st_mode)) { + t = "file"; + } + else if (S_ISDIR(st->st_mode)) { + t = "directory"; + } + else if (S_ISCHR(st->st_mode)) { + t = "characterSpecial"; + } +#ifdef S_ISBLK + else if (S_ISBLK(st->st_mode)) { + t = "blockSpecial"; + } +#endif +#ifdef S_ISFIFO + else if (S_ISFIFO(st->st_mode)) { + t = "fifo"; + } +#endif +#ifdef S_ISLNK + else if (S_ISLNK(st->st_mode)) { + t = "link"; + } +#endif +#ifdef S_ISSOCK + else if (S_ISSOCK(st->st_mode)) { + t = "socket"; + } +#endif + else { + t = "unknown"; + } + + return rb_usascii_str_new2(t); +} + +/* + * call-seq: + * File.ftype(file_name) => string + * + * Identifies the type of the named file; the return string is one of + * ``file'', ``directory'', + * ``characterSpecial'', ``blockSpecial'', + * ``fifo'', ``link'', + * ``socket'', or ``unknown''. + * + * File.ftype("testfile") #=> "file" + * File.ftype("/dev/tty") #=> "characterSpecial" + * File.ftype("/tmp/.X11-unix/X0") #=> "socket" + */ + +static VALUE +rb_file_s_ftype(VALUE klass, VALUE fname) +{ + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (lstat(StringValueCStr(fname), &st) == -1) { + rb_sys_fail(RSTRING_PTR(fname)); + } + + return rb_file_ftype(&st); +} + +/* + * call-seq: + * File.atime(file_name) => time + * + * Returns the last access time for the named file as a Time object). + * + * File.atime("testfile") #=> Wed Apr 09 08:51:48 CDT 2003 + * + */ + +static VALUE +rb_file_s_atime(VALUE klass, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) { + FilePathValue(fname); + rb_sys_fail(RSTRING_PTR(fname)); + } + return stat_atime(&st); +} + +/* + * call-seq: + * file.atime => time + * + * Returns the last access time (a Time object) + * for file, or epoch if file has not been accessed. + * + * File.new("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969 + * + */ + +static VALUE +rb_file_atime(VALUE obj) +{ + rb_io_t *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail_path(fptr->pathv); + } + return stat_atime(&st); +} + +/* + * call-seq: + * File.mtime(file_name) => time + * + * Returns the modification time for the named file as a Time object. + * + * File.mtime("testfile") #=> Tue Apr 08 12:58:04 CDT 2003 + * + */ + +static VALUE +rb_file_s_mtime(VALUE klass, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) { + FilePathValue(fname); + rb_sys_fail(RSTRING_PTR(fname)); + } + return stat_mtime(&st); +} + +/* + * call-seq: + * file.mtime -> time + * + * Returns the modification time for file. + * + * File.new("testfile").mtime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_file_mtime(VALUE obj) +{ + rb_io_t *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail_path(fptr->pathv); + } + return stat_mtime(&st); +} + +/* + * call-seq: + * File.ctime(file_name) => time + * + * Returns the change time for the named file (the time at which + * directory information about the file was changed, not the file + * itself). + * + * File.ctime("testfile") #=> Wed Apr 09 08:53:13 CDT 2003 + * + */ + +static VALUE +rb_file_s_ctime(VALUE klass, VALUE fname) +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) { + FilePathValue(fname); + rb_sys_fail(RSTRING_PTR(fname)); + } + return stat_ctime(&st); +} + +/* + * call-seq: + * file.ctime -> time + * + * Returns the change time for file (that is, the time directory + * information about the file was changed, not the file itself). + * + * File.new("testfile").ctime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_file_ctime(VALUE obj) +{ + rb_io_t *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail_path(fptr->pathv); + } + return stat_ctime(&st); +} + +static void +chmod_internal(const char *path, void *mode) +{ + if (chmod(path, *(int *)mode) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.chmod(mode_int, file_name, ... ) -> integer + * + * Changes permission bits on the named file(s) to the bit pattern + * represented by mode_int. Actual effects are operating system + * dependent (see the beginning of this section). On Unix systems, see + * chmod(2) for details. Returns the number of files + * processed. + * + * File.chmod(0644, "testfile", "out") #=> 2 + */ + +static VALUE +rb_file_s_chmod(int argc, VALUE *argv) +{ + VALUE vmode; + VALUE rest; + int mode; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "1*", &vmode, &rest); + mode = NUM2INT(vmode); + + n = apply2files(chmod_internal, rest, &mode); + return LONG2FIX(n); +} + +/* + * call-seq: + * file.chmod(mode_int) => 0 + * + * Changes permission bits on file to the bit pattern + * represented by mode_int. Actual effects are platform + * dependent; on Unix systems, see chmod(2) for details. + * Follows symbolic links. Also see File#lchmod. + * + * f = File.new("out", "w"); + * f.chmod(0644) #=> 0 + */ + +static VALUE +rb_file_chmod(VALUE obj, VALUE vmode) +{ + rb_io_t *fptr; + int mode; + + rb_secure(2); + mode = NUM2INT(vmode); + + GetOpenFile(obj, fptr); +#ifdef HAVE_FCHMOD + if (fchmod(fptr->fd, mode) == -1) + rb_sys_fail_path(fptr->pathv); +#else + if (NIL_P(fptr->pathv)) return Qnil; + if (chmod(RSTRING_PTR(fptr->pathv), mode) == -1) + rb_sys_fail_path(fptr->pathv); +#endif + + return INT2FIX(0); +} + +#if defined(HAVE_LCHMOD) +static void +lchmod_internal(const char *path, void *mode) +{ + if (lchmod(path, (int)(VALUE)mode) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.lchmod(mode_int, file_name, ...) => integer + * + * Equivalent to File::chmod, but does not follow symbolic + * links (so it will change the permissions associated with the link, + * not the file referenced by the link). Often not available. + * + */ + +static VALUE +rb_file_s_lchmod(int argc, VALUE *argv) +{ + VALUE vmode; + VALUE rest; + long mode, n; + + rb_secure(2); + rb_scan_args(argc, argv, "1*", &vmode, &rest); + mode = NUM2INT(vmode); + + n = apply2files(lchmod_internal, rest, (void *)(long)mode); + return LONG2FIX(n); +} +#else +static VALUE +rb_file_s_lchmod(int argc, VALUE *argv) +{ + rb_notimplement(); + return Qnil; /* not reached */ +} +#endif + +struct chown_args { + rb_uid_t owner; + rb_gid_t group; +}; + +static void +chown_internal(const char *path, void *arg) +{ + struct chown_args *args = arg; + if (chown(path, args->owner, args->group) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.chown(owner_int, group_int, file_name,... ) -> integer + * + * Changes the owner and group of the named file(s) to the given + * numeric owner and group id's. Only a process with superuser + * privileges may change the owner of a file. The current owner of a + * file may change the file's group to any group to which the owner + * belongs. A nil or -1 owner or group id is ignored. + * Returns the number of files processed. + * + * File.chown(nil, 100, "testfile") + * + */ + +static VALUE +rb_file_s_chown(int argc, VALUE *argv) +{ + VALUE o, g, rest; + struct chown_args arg; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "2*", &o, &g, &rest); + if (NIL_P(o)) { + arg.owner = -1; + } + else { + arg.owner = NUM2UIDT(o); + } + if (NIL_P(g)) { + arg.group = -1; + } + else { + arg.group = NUM2GIDT(g); + } + + n = apply2files(chown_internal, rest, &arg); + return LONG2FIX(n); +} + +/* + * call-seq: + * file.chown(owner_int, group_int ) => 0 + * + * Changes the owner and group of file to the given numeric + * owner and group id's. Only a process with superuser privileges may + * change the owner of a file. The current owner of a file may change + * the file's group to any group to which the owner belongs. A + * nil or -1 owner or group id is ignored. Follows + * symbolic links. See also File#lchown. + * + * File.new("testfile").chown(502, 1000) + * + */ + +static VALUE +rb_file_chown(VALUE obj, VALUE owner, VALUE group) +{ + rb_io_t *fptr; + int o, g; + + rb_secure(2); + o = NIL_P(owner) ? -1 : NUM2INT(owner); + g = NIL_P(group) ? -1 : NUM2INT(group); + GetOpenFile(obj, fptr); +#if defined(__CYGWIN32__) || defined(_WIN32) || defined(__EMX__) + if (NIL_P(fptr->pathv)) return Qnil; + if (chown(RSTRING_PTR(fptr->pathv), o, g) == -1) + rb_sys_fail_path(fptr->pathv); +#else + if (fchown(fptr->fd, o, g) == -1) + rb_sys_fail_path(fptr->pathv); +#endif + + return INT2FIX(0); +} + +#if defined(HAVE_LCHOWN) && !defined(__CHECKER__) +static void +lchown_internal(const char *path, void *arg) +{ + struct chown_args *args = arg; + if (lchown(path, args->owner, args->group) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * file.lchown(owner_int, group_int, file_name,..) => integer + * + * Equivalent to File::chown, but does not follow symbolic + * links (so it will change the owner associated with the link, not the + * file referenced by the link). Often not available. Returns number + * of files in the argument list. + * + */ + +static VALUE +rb_file_s_lchown(int argc, VALUE *argv) +{ + VALUE o, g, rest; + struct chown_args arg; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "2*", &o, &g, &rest); + if (NIL_P(o)) { + arg.owner = -1; + } + else { + arg.owner = NUM2UIDT(o); + } + if (NIL_P(g)) { + arg.group = -1; + } + else { + arg.group = NUM2GIDT(g); + } + + n = apply2files(lchown_internal, rest, &arg); + return LONG2FIX(n); +} +#else +static VALUE +rb_file_s_lchown(int argc, VALUE *argv) +{ + rb_notimplement(); +} +#endif + +struct timespec rb_time_timespec(VALUE time); + +#if defined(HAVE_UTIMES) + +static void +utime_internal(const char *path, void *arg) +{ + struct timespec *tsp = arg; + struct timeval tvbuf[2], *tvp = arg; + +#ifdef HAVE_UTIMENSAT + static int try_utimensat = 1; + + if (try_utimensat) { + struct timespec *tsp = arg; + if (utimensat(AT_FDCWD, path, tsp, 0) < 0) { + if (errno == ENOSYS) { + try_utimensat = 0; + goto no_utimensat; + } + rb_sys_fail(path); + } + return; + } +no_utimensat: +#endif + + if (tsp) { + tvbuf[0].tv_sec = tsp[0].tv_sec; + tvbuf[0].tv_usec = tsp[0].tv_nsec / 1000; + tvbuf[1].tv_sec = tsp[1].tv_sec; + tvbuf[1].tv_usec = tsp[1].tv_nsec / 1000; + tvp = tvbuf; + } + if (utimes(path, tvp) < 0) + rb_sys_fail(path); +} + +#else + +#if !defined HAVE_UTIME_H && !defined HAVE_SYS_UTIME_H +struct utimbuf { + long actime; + long modtime; +}; +#endif + +static void +utime_internal(const char *path, void *arg) +{ + struct timespec *tsp = arg; + struct utimbuf utbuf, *utp = NULL; + if (tsp) { + utbuf.actime = tsp[0].tv_sec; + utbuf.modtime = tsp[1].tv_sec; + utp = &utbuf; + } + if (utime(path, utp) < 0) + rb_sys_fail(path); +} + +#endif + +/* + * call-seq: + * File.utime(atime, mtime, file_name,...) => integer + * + * Sets the access and modification times of each + * named file to the first two arguments. Returns + * the number of file names in the argument list. + */ + +static VALUE +rb_file_s_utime(int argc, VALUE *argv) +{ + VALUE atime, mtime, rest; + struct timespec tss[2], *tsp = NULL; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "2*", &atime, &mtime, &rest); + + if (!NIL_P(atime) || !NIL_P(mtime)) { + tsp = tss; + tsp[0] = rb_time_timespec(atime); + tsp[1] = rb_time_timespec(mtime); + } + + n = apply2files(utime_internal, rest, tsp); + return LONG2FIX(n); +} + +NORETURN(static void sys_fail2(VALUE,VALUE)); +static void +sys_fail2(VALUE s1, VALUE s2) +{ + char *buf; +#ifdef MAX_PATH + const int max_pathlen = MAX_PATH; +#else + const int max_pathlen = MAXPATHLEN; +#endif + const char *e1, *e2; + int len = 5; + int l1 = RSTRING_LEN(s1), l2 = RSTRING_LEN(s2); + + e1 = e2 = ""; + if (l1 > max_pathlen) { + l1 = max_pathlen - 3; + e1 = "..."; + len += 3; + } + if (l2 > max_pathlen) { + l2 = max_pathlen - 3; + e2 = "..."; + len += 3; + } + len += l1 + l2; + buf = ALLOCA_N(char, len); + snprintf(buf, len, "(%.*s%s, %.*s%s)", + l1, RSTRING_PTR(s1), e1, + l2, RSTRING_PTR(s2), e2); + rb_sys_fail(buf); +} + +/* + * call-seq: + * File.link(old_name, new_name) => 0 + * + * Creates a new name for an existing file using a hard link. Will not + * overwrite new_name if it already exists (raising a subclass + * of SystemCallError). Not available on all platforms. + * + * File.link("testfile", ".testfile") #=> 0 + * IO.readlines(".testfile")[0] #=> "This is line one\n" + */ + +static VALUE +rb_file_s_link(VALUE klass, VALUE from, VALUE to) +{ +#ifdef HAVE_LINK + rb_secure(2); + FilePathValue(from); + FilePathValue(to); + + if (link(StringValueCStr(from), StringValueCStr(to)) < 0) { + sys_fail2(from, to); + } + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * File.symlink(old_name, new_name) => 0 + * + * Creates a symbolic link called new_name for the existing file + * old_name. Raises a NotImplemented exception on + * platforms that do not support symbolic links. + * + * File.symlink("testfile", "link2test") #=> 0 + * + */ + +static VALUE +rb_file_s_symlink(VALUE klass, VALUE from, VALUE to) +{ +#ifdef HAVE_SYMLINK + rb_secure(2); + FilePathValue(from); + FilePathValue(to); + + if (symlink(StringValueCStr(from), StringValueCStr(to)) < 0) { + sys_fail2(from, to); + } + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * File.readlink(link_name) -> file_name + * + * Returns the name of the file referenced by the given link. + * Not available on all platforms. + * + * File.symlink("testfile", "link2test") #=> 0 + * File.readlink("link2test") #=> "testfile" + */ + +static VALUE +rb_file_s_readlink(VALUE klass, VALUE path) +{ +#ifdef HAVE_READLINK + char *buf; + int size = 100; + int rv; + VALUE v; + + rb_secure(2); + FilePathValue(path); + buf = xmalloc(size); + while ((rv = readlink(RSTRING_PTR(path), buf, size)) == size +#ifdef _AIX + || (rv < 0 && errno == ERANGE) /* quirky behavior of GPFS */ +#endif + ) { + size *= 2; + buf = xrealloc(buf, size); + } + if (rv < 0) { + xfree(buf); + rb_sys_fail_path(path); + } + v = rb_tainted_str_new(buf, rv); + xfree(buf); + + return v; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static void +unlink_internal(const char *path, void *arg) +{ + if (unlink(path) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.delete(file_name, ...) => integer + * File.unlink(file_name, ...) => integer + * + * Deletes the named files, returning the number of names + * passed as arguments. Raises an exception on any error. + * See also Dir::rmdir. + */ + +static VALUE +rb_file_s_unlink(VALUE klass, VALUE args) +{ + long n; + + rb_secure(2); + n = apply2files(unlink_internal, args, 0); + return LONG2FIX(n); +} + +/* + * call-seq: + * File.rename(old_name, new_name) => 0 + * + * Renames the given file to the new name. Raises a + * SystemCallError if the file cannot be renamed. + * + * File.rename("afile", "afile.bak") #=> 0 + */ + +static VALUE +rb_file_s_rename(VALUE klass, VALUE from, VALUE to) +{ + const char *src, *dst; + + rb_secure(2); + FilePathValue(from); + FilePathValue(to); + src = StringValueCStr(from); + dst = StringValueCStr(to); +#if defined __CYGWIN__ + errno = 0; +#endif + if (rename(src, dst) < 0) { +#if defined DOSISH && !defined _WIN32 + switch (errno) { + case EEXIST: +#if defined (__EMX__) + case EACCES: +#endif + if (chmod(dst, 0666) == 0 && + unlink(dst) == 0 && + rename(src, dst) == 0) + return INT2FIX(0); + } +#endif + sys_fail2(from, to); + } + + return INT2FIX(0); +} + +/* + * call-seq: + * File.umask() => integer + * File.umask(integer) => integer + * + * Returns the current umask value for this process. If the optional + * argument is given, set the umask to that value and return the + * previous value. Umask values are subtracted from the + * default permissions, so a umask of 0222 would make a + * file read-only for everyone. + * + * File.umask(0006) #=> 18 + * File.umask #=> 6 + */ + +static VALUE +rb_file_s_umask(int argc, VALUE *argv) +{ + int omask = 0; + + rb_secure(2); + if (argc == 0) { + omask = umask(0); + umask(omask); + } + else if (argc == 1) { + omask = umask(NUM2INT(argv[0])); + } + else { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + return INT2FIX(omask); +} + +#ifdef __CYGWIN__ +#undef DOSISH +#endif +#if defined __CYGWIN__ || defined DOSISH +#define DOSISH_UNC +#define DOSISH_DRIVE_LETTER +#define isdirsep(x) ((x) == '/' || (x) == '\\') +#else +#define isdirsep(x) ((x) == '/') +#endif + +#if defined _WIN32 || defined __CYGWIN__ +#define USE_NTFS 1 +#else +#define USE_NTFS 0 +#endif + +#if USE_NTFS +#define istrailinggarbage(x) ((x) == '.' || (x) == ' ') +#else +#define istrailinggarbage(x) 0 +#endif + +#ifndef CharNext /* defined as CharNext[AW] on Windows. */ +# define CharNext(p) ((p) + 1) +#endif + +#ifdef DOSISH_DRIVE_LETTER +static inline int +has_drive_letter(const char *buf) +{ + if (ISALPHA(buf[0]) && buf[1] == ':') { + return 1; + } + else { + return 0; + } +} + +static char* +getcwdofdrv(int drv) +{ + char drive[4]; + char *drvcwd, *oldcwd; + + drive[0] = drv; + drive[1] = ':'; + drive[2] = '\0'; + + /* the only way that I know to get the current directory + of a particular drive is to change chdir() to that drive, + so save the old cwd before chdir() + */ + oldcwd = my_getcwd(); + if (chdir(drive) == 0) { + drvcwd = my_getcwd(); + chdir(oldcwd); + xfree(oldcwd); + } + else { + /* perhaps the drive is not exist. we return only drive letter */ + drvcwd = strdup(drive); + } + return drvcwd; +} +#endif + +static inline char * +skiproot(const char *path) +{ +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(path)) path += 2; +#endif + while (isdirsep(*path)) path++; + return (char *)path; +} + +#define nextdirsep rb_path_next +char * +rb_path_next(const char *s) +{ + while (*s && !isdirsep(*s)) { + s = CharNext(s); + } + return (char *)s; +} + +#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) +#define skipprefix rb_path_skip_prefix +#else +#define skipprefix(path) (path) +#endif +char * +rb_path_skip_prefix(const char *path) +{ +#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) +#ifdef DOSISH_UNC + if (isdirsep(path[0]) && isdirsep(path[1])) { + path += 2; + while (isdirsep(*path)) path++; + if (*(path = nextdirsep(path)) && path[1] && !isdirsep(path[1])) + path = nextdirsep(path + 1); + return (char *)path; + } +#endif +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(path)) + return (char *)(path + 2); +#endif +#endif + return (char *)path; +} + +#define strrdirsep rb_path_last_separator +char * +rb_path_last_separator(const char *path) +{ + char *last = NULL; + while (*path) { + if (isdirsep(*path)) { + const char *tmp = path++; + while (isdirsep(*path)) path++; + if (!*path) break; + last = (char *)tmp; + } + else { + path = CharNext(path); + } + } + return last; +} + +static char * +chompdirsep(const char *path) +{ + while (*path) { + if (isdirsep(*path)) { + const char *last = path++; + while (isdirsep(*path)) path++; + if (!*path) return (char *)last; + } + else { + path = CharNext(path); + } + } + return (char *)path; +} + +char * +rb_path_end(const char *path) +{ + if (isdirsep(*path)) path++; + return chompdirsep(path); +} + +#if USE_NTFS +static char * +ntfs_tail(const char *path) +{ + while (*path == '.') path++; + while (*path && *path != ':') { + if (istrailinggarbage(*path)) { + const char *last = path++; + while (istrailinggarbage(*path)) path++; + if (!*path || *path == ':') return (char *)last; + } + else if (isdirsep(*path)) { + const char *last = path++; + while (isdirsep(*path)) path++; + if (!*path) return (char *)last; + if (*path == ':') path++; + } + else { + path = CharNext(path); + } + } + return (char *)path; +} +#endif + +#define BUFCHECK(cond) do {\ + long bdiff = p - buf;\ + if (cond) {\ + do {buflen *= 2;} while (cond);\ + rb_str_resize(result, buflen);\ + buf = RSTRING_PTR(result);\ + p = buf + bdiff;\ + pend = buf + buflen;\ + }\ +} while (0) + +#define BUFINIT() (\ + p = buf = RSTRING_PTR(result),\ + buflen = RSTRING_LEN(result),\ + pend = p + buflen) + +#define SET_EXTERNAL_ENCODING() (\ + (void)(extenc || (extenc = rb_default_external_encoding())),\ + rb_enc_associate(result, extenc)) + +static int is_absolute_path(const char*); + +static VALUE +file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) +{ + const char *s, *b; + char *buf, *p, *pend, *root; + long buflen, dirlen; + int tainted; + rb_encoding *extenc = 0; + + FilePathValue(fname); + s = StringValuePtr(fname); + BUFINIT(); + tainted = OBJ_TAINTED(fname); + + if (s[0] == '~' && abs_mode == 0) { /* execute only if NOT absolute_path() */ + if (isdirsep(s[1]) || s[1] == '\0') { + const char *dir = getenv("HOME"); + + if (!dir) { + rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `%s'", s); + } + dirlen = strlen(dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, dir); +#if defined DOSISH || defined __CYGWIN__ + for (p = buf; *p; p = CharNext(p)) { + if (*p == '\\') { + *p = '/'; + } + } +#else + p = buf + strlen(dir); +#endif + s++; + tainted = 1; + SET_EXTERNAL_ENCODING(); + } + else { +#ifdef HAVE_PWD_H + struct passwd *pwPtr; + s++; +#endif + s = nextdirsep(b = s); + BUFCHECK(bdiff + (s-b) >= buflen); + memcpy(p, b, s-b); + p += s-b; + *p = '\0'; +#ifdef HAVE_PWD_H + pwPtr = getpwnam(buf); + if (!pwPtr) { + endpwent(); + rb_raise(rb_eArgError, "user %s doesn't exist", buf); + } + dirlen = strlen(pwPtr->pw_dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, pwPtr->pw_dir); + p = buf + strlen(pwPtr->pw_dir); + endpwent(); +#endif + } + } +#ifdef DOSISH_DRIVE_LETTER + /* skip drive letter */ + else if (has_drive_letter(s)) { + if (isdirsep(s[2])) { + /* specified drive letter, and full path */ + /* skip drive letter */ + BUFCHECK(bdiff + 2 >= buflen); + memcpy(p, s, 2); + p += 2; + s += 2; + rb_enc_copy(result, fname); + } + else { + /* specified drive, but not full path */ + int same = 0; + if (!NIL_P(dname)) { + file_expand_path(dname, Qnil, abs_mode, result); + BUFINIT(); + if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) { + /* ok, same drive */ + same = 1; + } + } + if (!same) { + char *dir = getcwdofdrv(*s); + + tainted = 1; + dirlen = strlen(dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, dir); + xfree(dir); + SET_EXTERNAL_ENCODING(); + } + p = chompdirsep(skiproot(buf)); + s += 2; + } + } +#endif + else if (!is_absolute_path(s)) { + if (!NIL_P(dname)) { + file_expand_path(dname, Qnil, abs_mode, result); + BUFINIT(); + } + else { + char *dir = my_getcwd(); + + tainted = 1; + dirlen = strlen(dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, dir); + xfree(dir); + SET_EXTERNAL_ENCODING(); + } +#if defined DOSISH || defined __CYGWIN__ + if (isdirsep(*s)) { + /* specified full path, but not drive letter nor UNC */ + /* we need to get the drive letter or UNC share name */ + p = skipprefix(buf); + } + else +#endif + p = chompdirsep(skiproot(buf)); + } + else { + b = s; + do s++; while (isdirsep(*s)); + p = buf + (s - b); + BUFCHECK(bdiff >= buflen); + memset(buf, '/', p - buf); + rb_enc_copy(result, fname); + } + if (p > buf && p[-1] == '/') + --p; + else { + BUFCHECK(bdiff + 1 >= buflen); + *p = '/'; + } + + BUFCHECK(bdiff + 1 >= buflen); + p[1] = 0; + root = skipprefix(buf); + + b = s; + while (*s) { + switch (*s) { + case '.': + if (b == s++) { /* beginning of path element */ + switch (*s) { + case '\0': + b = s; + break; + case '.': + if (*(s+1) == '\0' || isdirsep(*(s+1))) { + /* We must go back to the parent */ + char *n; + *p = '\0'; + if (!(n = strrdirsep(root))) { + *p = '/'; + } + else { + p = n; + } + b = ++s; + } +#if USE_NTFS + else { + do ++s; while (istrailinggarbage(*s)); + } +#endif + break; + case '/': +#if defined DOSISH || defined __CYGWIN__ + case '\\': +#endif + b = ++s; + break; + default: + /* ordinary path element, beginning don't move */ + break; + } + } +#if USE_NTFS + else { + --s; + case ' ': { + const char *e = s; + while (istrailinggarbage(*s)) s++; + if (!*s) { + s = e; + goto endpath; + } + } + } +#endif + break; + case '/': +#if defined DOSISH || defined __CYGWIN__ + case '\\': +#endif + if (s > b) { + long rootdiff = root - buf; + BUFCHECK(bdiff + (s-b+1) >= buflen); + root = buf + rootdiff; + memcpy(++p, b, s-b); + p += s-b; + *p = '/'; + } + b = ++s; + break; + default: + s = CharNext(s); + break; + } + } + + if (s > b) { +#if USE_NTFS + endpath: + if (s > b + 6 && strncasecmp(s - 6, ":$DATA", 6) == 0) { + /* alias of stream */ + /* get rid of a bug of x64 VC++ */ + if (*(s-7) == ':') s -= 7; /* prime */ + else if (memchr(b, ':', s - 6 - b)) s -= 6; /* alternative */ + } +#endif + BUFCHECK(bdiff + (s-b) >= buflen); + memcpy(++p, b, s-b); + p += s-b; + } + if (p == skiproot(buf) - 1) p++; + +#if USE_NTFS + *p = '\0'; + if ((s = strrdirsep(b = buf)) != 0 && !strpbrk(s, "*?")) { + size_t len; + WIN32_FIND_DATA wfd; +#ifdef __CYGWIN__ + int lnk_added = 0, is_symlink = 0; + struct stat st; + char w32buf[MAXPATHLEN]; + p = (char *)s; + if (lstat(buf, &st) == 0 && S_ISLNK(st.st_mode)) { + is_symlink = 1; + *p = '\0'; + } + if (cygwin_conv_to_win32_path((*buf ? buf : "/"), w32buf) == 0) { + b = w32buf; + } + if (is_symlink && b == w32buf) { + *p = '\\'; + strlcat(w32buf, p, sizeof(w32buf)); + len = strlen(p); + if (len > 4 && STRCASECMP(p + len - 4, ".lnk") != 0) { + lnk_added = 1; + strlcat(w32buf, ".lnk", sizeof(w32buf)); + } + } + *p = '/'; +#endif + HANDLE h = FindFirstFile(b, &wfd); + if (h != INVALID_HANDLE_VALUE) { + FindClose(h); + len = strlen(wfd.cFileName); +#ifdef __CYGWIN__ + if (lnk_added && len > 4 && + STRCASECMP(wfd.cFileName + len - 4, ".lnk") == 0) { + wfd.cFileName[len -= 4] = '\0'; + } +#else + p = (char *)s; +#endif + ++p; + BUFCHECK(bdiff + len >= buflen); + memcpy(p, wfd.cFileName, len + 1); + p += len; + } +#ifdef __CYGWIN__ + else { + p += strlen(p); + } +#endif + } +#endif + + if (tainted) OBJ_TAINT(result); + rb_str_set_len(result, p - buf); + rb_enc_check(fname, result); + return result; +} + +VALUE +rb_file_expand_path(VALUE fname, VALUE dname) +{ + return file_expand_path(fname, dname, 0, rb_usascii_str_new(0, MAXPATHLEN + 2)); +} + +/* + * call-seq: + * File.expand_path(file_name [, dir_string] ) -> abs_file_name + * + * Converts a pathname to an absolute pathname. Relative paths are + * referenced from the current working directory of the process unless + * dir_string is given, in which case it will be used as the + * starting point. The given pathname may start with a + * ``~'', which expands to the process owner's home + * directory (the environment variable HOME must be set + * correctly). ``~user'' expands to the named + * user's home directory. + * + * File.expand_path("~oracle/bin") #=> "/home/oracle/bin" + * File.expand_path("../../bin", "/tmp/x") #=> "/bin" + */ + +VALUE +rb_file_s_expand_path(int argc, VALUE *argv) +{ + VALUE fname, dname; + + if (argc == 1) { + return rb_file_expand_path(argv[0], Qnil); + } + rb_scan_args(argc, argv, "11", &fname, &dname); + + return rb_file_expand_path(fname, dname); +} + +VALUE +rb_file_absolute_path(VALUE fname, VALUE dname) +{ + return file_expand_path(fname, dname, 1, rb_usascii_str_new(0, MAXPATHLEN + 2)); +} + +/* + * call-seq: + * File.absolute_path(file_name [, dir_string] ) -> abs_file_name + * + * Converts a pathname to an absolute pathname. Relative paths are + * referenced from the current working directory of the process unless + * dir_string is given, in which case it will be used as the + * starting point. If the given pathname starts with a ``~'' + * it is NOT expanded, it is treated as a normal directory name. + * + * File.absolute_path("~oracle/bin") #=> "/~oracle/bin" + */ + +VALUE +rb_file_s_absolute_path(int argc, VALUE *argv) +{ + VALUE fname, dname; + + if (argc == 1) { + return rb_file_absolute_path(argv[0], Qnil); + } + rb_scan_args(argc, argv, "11", &fname, &dname); + + return rb_file_absolute_path(fname, dname); +} + +static int +rmext(const char *p, int l1, const char *e) +{ + int l0, l2; + + if (!e) return 0; + + for (l0 = 0; l0 < l1; ++l0) { + if (p[l0] != '.') break; + } + l2 = strlen(e); + if (l2 == 2 && e[1] == '*') { + unsigned char c = *e; + e = p + l1; + do { + if (e <= p + l0) return 0; + } while (*--e != c); + return e - p; + } + if (l1 < l2) return l1; + +#if CASEFOLD_FILESYSTEM +#define fncomp strncasecmp +#else +#define fncomp strncmp +#endif + if (fncomp(p+l1-l2, e, l2) == 0) { + return l1-l2; + } + return 0; +} + +/* + * call-seq: + * File.basename(file_name [, suffix] ) -> base_name + * + * Returns the last component of the filename given in file_name, + * which must be formed using forward slashes (``/'') + * regardless of the separator used on the local file system. If + * suffix is given and present at the end of file_name, + * it is removed. + * + * File.basename("/home/gumby/work/ruby.rb") #=> "ruby.rb" + * File.basename("/home/gumby/work/ruby.rb", ".rb") #=> "ruby" + */ + +static VALUE +rb_file_s_basename(int argc, VALUE *argv) +{ + VALUE fname, fext, basename; + const char *name, *p; +#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC + const char *root; +#endif + int f, n; + + if (rb_scan_args(argc, argv, "11", &fname, &fext) == 2) { + StringValue(fext); + } + FilePathStringValue(fname); + if (RSTRING_LEN(fname) == 0 || !*(name = RSTRING_PTR(fname))) + return rb_str_new_shared(fname); + name = skipprefix(name); +#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC + root = name; +#endif + while (isdirsep(*name)) + name++; + if (!*name) { + p = name - 1; + f = 1; +#if defined DOSISH_DRIVE_LETTER || defined DOSISH_UNC + if (name != root) { + /* has slashes */ + } +#ifdef DOSISH_DRIVE_LETTER + else if (*p == ':') { + p++; + f = 0; + } +#endif +#ifdef DOSISH_UNC + else { + p = "/"; + } +#endif +#endif + } + else { + if (!(p = strrdirsep(name))) { + p = name; + } + else { + while (isdirsep(*p)) p++; /* skip last / */ + } +#if USE_NTFS + n = ntfs_tail(p) - p; +#else + n = chompdirsep(p) - p; +#endif + if (NIL_P(fext) || !(f = rmext(p, n, StringValueCStr(fext)))) { + f = n; + } + if (f == RSTRING_LEN(fname)) return rb_str_new_shared(fname); + } + basename = rb_str_new(p, f); + rb_enc_copy(basename, fname); + OBJ_INFECT(basename, fname); + return basename; +} + +/* + * call-seq: + * File.dirname(file_name ) -> dir_name + * + * Returns all components of the filename given in file_name + * except the last one. The filename must be formed using forward + * slashes (``/'') regardless of the separator used on the + * local file system. + * + * File.dirname("/home/gumby/work/ruby.rb") #=> "/home/gumby/work" + */ + +static VALUE +rb_file_s_dirname(VALUE klass, VALUE fname) +{ + const char *name, *root, *p; + VALUE dirname; + + FilePathStringValue(fname); + name = StringValueCStr(fname); + root = skiproot(name); +#ifdef DOSISH_UNC + if (root > name + 1 && isdirsep(*name)) + root = skipprefix(name = root - 2); +#else + if (root > name + 1) + name = root - 1; +#endif + p = strrdirsep(root); + if (!p) { + p = root; + } + if (p == name) + return rb_usascii_str_new2("."); +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(name) && isdirsep(*(name + 2))) { + const char *top = skiproot(name + 2); + dirname = rb_str_new(name, 3); + rb_str_cat(dirname, top, p - top); + } + else +#endif + dirname = rb_str_new(name, p - name); +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(name) && root == name + 2 && p - name == 2) + rb_str_cat(dirname, ".", 1); +#endif + rb_enc_copy(dirname, fname); + OBJ_INFECT(dirname, fname); + return dirname; +} + +/* + * call-seq: + * File.extname(path) -> string + * + * Returns the extension (the portion of file name in path + * after the period). + * + * File.extname("test.rb") #=> ".rb" + * File.extname("a/b/d/test.rb") #=> ".rb" + * File.extname("test") #=> "" + * File.extname(".profile") #=> "" + * + */ + +static VALUE +rb_file_s_extname(VALUE klass, VALUE fname) +{ + const char *name, *p, *e; + VALUE extname; + + FilePathStringValue(fname); + name = StringValueCStr(fname); + p = strrdirsep(name); /* get the last path component */ + if (!p) + p = name; + else + name = ++p; + + e = 0; + while (*p && *p == '.') p++; + while (*p) { + if (*p == '.' || istrailinggarbage(*p)) { +#if USE_NTFS + const char *last = p++, *dot = last; + while (istrailinggarbage(*p)) { + if (*p == '.') dot = p; + p++; + } + if (!*p || *p == ':') { + p = last; + break; + } + if (*last == '.' || dot > last) e = dot; + continue; +#else + e = p; /* get the last dot of the last component */ +#endif + } +#if USE_NTFS + else if (*p == ':') { + break; + } +#endif + else if (isdirsep(*p)) + break; + p = CharNext(p); + } + if (!e || e == name || e+1 == p) /* no dot, or the only dot is first or end? */ + return rb_str_new(0, 0); + extname = rb_str_new(e, p - e); /* keep the dot, too! */ + rb_enc_copy(extname, fname); + OBJ_INFECT(extname, fname); + return extname; +} + +/* + * call-seq: + * File.path(path) -> string + * + * Returns the string representation of the path + * + * File.path("/dev/null") #=> "/dev/null" + * File.path(Pathname.new("/tmp")) #=> "/tmp" + * + */ + +static VALUE +rb_file_s_path(VALUE klass, VALUE fname) +{ + return rb_get_path(fname); +} + +/* + * call-seq: + * File.split(file_name) => array + * + * Splits the given string into a directory and a file component and + * returns them in a two-element array. See also + * File::dirname and File::basename. + * + * File.split("/home/gumby/.profile") #=> ["/home/gumby", ".profile"] + */ + +static VALUE +rb_file_s_split(VALUE klass, VALUE path) +{ + FilePathStringValue(path); /* get rid of converting twice */ + return rb_assoc_new(rb_file_s_dirname(Qnil, path), rb_file_s_basename(1,&path)); +} + +static VALUE separator; + +static VALUE rb_file_join(VALUE ary, VALUE sep); + +static VALUE +file_inspect_join(VALUE ary, VALUE argp, int recur) +{ + VALUE *arg = (VALUE *)argp; + if (recur) return rb_usascii_str_new2("[...]"); + return rb_file_join(arg[0], arg[1]); +} + +static VALUE +rb_file_join(VALUE ary, VALUE sep) +{ + long len, i; + VALUE result, tmp; + const char *name, *tail; + + if (RARRAY_LEN(ary) == 0) return rb_str_new(0, 0); + + len = 1; + for (i=0; i 0 && !NIL_P(sep)) { + tail = chompdirsep(name); + if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) { + rb_str_set_len(result, tail - name); + } + else if (!*tail) { + rb_str_buf_append(result, sep); + } + } + rb_str_buf_append(result, tmp); + } + + return result; +} + +/* + * call-seq: + * File.join(string, ...) -> path + * + * Returns a new string formed by joining the strings using + * File::SEPARATOR. + * + * File.join("usr", "mail", "gumby") #=> "usr/mail/gumby" + * + */ + +static VALUE +rb_file_s_join(VALUE klass, VALUE args) +{ + return rb_file_join(args, separator); +} + +/* + * call-seq: + * File.truncate(file_name, integer) => 0 + * + * Truncates the file file_name to be at most integer + * bytes long. Not available on all platforms. + * + * f = File.new("out", "w") + * f.write("1234567890") #=> 10 + * f.close #=> nil + * File.truncate("out", 5) #=> 0 + * File.size("out") #=> 5 + * + */ + +static VALUE +rb_file_s_truncate(VALUE klass, VALUE path, VALUE len) +{ + off_t pos; + + rb_secure(2); + pos = NUM2OFFT(len); + FilePathValue(path); +#ifdef HAVE_TRUNCATE + if (truncate(StringValueCStr(path), pos) < 0) + rb_sys_fail(RSTRING_PTR(path)); +#else +# ifdef HAVE_CHSIZE + { + int tmpfd; + +# ifdef _WIN32 + if ((tmpfd = open(StringValueCStr(path), O_RDWR)) < 0) { + rb_sys_fail(RSTRING_PTR(path)); + } +# else + if ((tmpfd = open(StringValueCStr(path), 0)) < 0) { + rb_sys_fail(RSTRING_PTR(path)); + } +# endif + if (chsize(tmpfd, pos) < 0) { + close(tmpfd); + rb_sys_fail(RSTRING_PTR(path)); + } + close(tmpfd); + } +# else + rb_notimplement(); +# endif +#endif + return INT2FIX(0); +} + +/* + * call-seq: + * file.truncate(integer) => 0 + * + * Truncates file to at most integer bytes. The file + * must be opened for writing. Not available on all platforms. + * + * f = File.new("out", "w") + * f.syswrite("1234567890") #=> 10 + * f.truncate(5) #=> 0 + * f.close() #=> nil + * File.size("out") #=> 5 + */ + +static VALUE +rb_file_truncate(VALUE obj, VALUE len) +{ + rb_io_t *fptr; + off_t pos; + + rb_secure(2); + pos = NUM2OFFT(len); + GetOpenFile(obj, fptr); + if (!(fptr->mode & FMODE_WRITABLE)) { + rb_raise(rb_eIOError, "not opened for writing"); + } + rb_io_flush(obj); +#ifdef HAVE_FTRUNCATE + if (ftruncate(fptr->fd, pos) < 0) + rb_sys_fail_path(fptr->pathv); +#else +# ifdef HAVE_CHSIZE + if (chsize(fptr->fd, pos) < 0) + rb_sys_fail(fptr->pathv); +# else + rb_notimplement(); +# endif +#endif + return INT2FIX(0); +} + +# ifndef LOCK_SH +# define LOCK_SH 1 +# endif +# ifndef LOCK_EX +# define LOCK_EX 2 +# endif +# ifndef LOCK_NB +# define LOCK_NB 4 +# endif +# ifndef LOCK_UN +# define LOCK_UN 8 +# endif + +#ifdef __CYGWIN__ +#include +extern unsigned long __attribute__((stdcall)) GetLastError(void); +#endif + +static VALUE +rb_thread_flock(void *data) +{ +#ifdef __CYGWIN__ + int old_errno = errno; +#endif + int *op = data, ret = flock(op[0], op[1]); + +#ifdef __CYGWIN__ + if (GetLastError() == ERROR_NOT_LOCKED) { + ret = 0; + errno = old_errno; + } +#endif + return (VALUE)ret; +} + +/* + * call-seq: + * file.flock (locking_constant ) => 0 or false + * + * Locks or unlocks a file according to locking_constant (a + * logical or of the values in the table below). + * Returns false if File::LOCK_NB is + * specified and the operation would otherwise have blocked. Not + * available on all platforms. + * + * Locking constants (in class File): + * + * LOCK_EX | Exclusive lock. Only one process may hold an + * | exclusive lock for a given file at a time. + * ----------+------------------------------------------------ + * LOCK_NB | Don't block when locking. May be combined + * | with other lock options using logical or. + * ----------+------------------------------------------------ + * LOCK_SH | Shared lock. Multiple processes may each hold a + * | shared lock for a given file at the same time. + * ----------+------------------------------------------------ + * LOCK_UN | Unlock. + * + * Example: + * + * File.new("testfile").flock(File::LOCK_UN) #=> 0 + * + */ + +static VALUE +rb_file_flock(VALUE obj, VALUE operation) +{ +#ifndef __CHECKER__ + rb_io_t *fptr; + int op[2], op1; + + rb_secure(2); + op[1] = op1 = NUM2INT(operation); + GetOpenFile(obj, fptr); + op[0] = fptr->fd; + + if (fptr->mode & FMODE_WRITABLE) { + rb_io_flush(obj); + } + while ((int)rb_thread_blocking_region(rb_thread_flock, op, RUBY_UBF_IO, 0) < 0) { + switch (errno) { + case EAGAIN: + case EACCES: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + if (op1 & LOCK_NB) return Qfalse; + rb_thread_polling(); + rb_io_check_closed(fptr); + continue; + + case EINTR: +#if defined(ERESTART) + case ERESTART: +#endif + break; + + default: + rb_sys_fail_path(fptr->pathv); + } + } +#endif + return INT2FIX(0); +} +#undef flock + +static void +test_check(int n, int argc, VALUE *argv) +{ + int i; + + rb_secure(2); + n+=1; + if (n != argc) rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, n); + for (i=1; i obj + * + * Uses the integer aCmd to perform various tests on + * file1 (first table below) or on file1 and + * file2 (second table). + * + * File tests on a single file: + * + * Test Returns Meaning + * ?A | Time | Last access time for file1 + * ?b | boolean | True if file1 is a block device + * ?c | boolean | True if file1 is a character device + * ?C | Time | Last change time for file1 + * ?d | boolean | True if file1 exists and is a directory + * ?e | boolean | True if file1 exists + * ?f | boolean | True if file1 exists and is a regular file + * ?g | boolean | True if file1 has the \CF{setgid} bit + * | | set (false under NT) + * ?G | boolean | True if file1 exists and has a group + * | | ownership equal to the caller's group + * ?k | boolean | True if file1 exists and has the sticky bit set + * ?l | boolean | True if file1 exists and is a symbolic link + * ?M | Time | Last modification time for file1 + * ?o | boolean | True if file1 exists and is owned by + * | | the caller's effective uid + * ?O | boolean | True if file1 exists and is owned by + * | | the caller's real uid + * ?p | boolean | True if file1 exists and is a fifo + * ?r | boolean | True if file1 is readable by the effective + * | | uid/gid of the caller + * ?R | boolean | True if file is readable by the real + * | | uid/gid of the caller + * ?s | int/nil | If file1 has nonzero size, return the size, + * | | otherwise return nil + * ?S | boolean | True if file1 exists and is a socket + * ?u | boolean | True if file1 has the setuid bit set + * ?w | boolean | True if file1 exists and is writable by + * | | the effective uid/gid + * ?W | boolean | True if file1 exists and is writable by + * | | the real uid/gid + * ?x | boolean | True if file1 exists and is executable by + * | | the effective uid/gid + * ?X | boolean | True if file1 exists and is executable by + * | | the real uid/gid + * ?z | boolean | True if file1 exists and has a zero length + * + * Tests that take two files: + * + * ?- | boolean | True if file1 and file2 are identical + * ?= | boolean | True if the modification times of file1 + * | | and file2 are equal + * ?< | boolean | True if the modification time of file1 + * | | is prior to that of file2 + * ?> | boolean | True if the modification time of file1 + * | | is after that of file2 + */ + +static VALUE +rb_f_test(int argc, VALUE *argv) +{ + int cmd; + + if (argc == 0) rb_raise(rb_eArgError, "wrong number of arguments"); + cmd = NUM2CHR(argv[0]); + if (cmd == 0) goto unknown; + if (strchr("bcdefgGkloOprRsSuwWxXz", cmd)) { + CHECK(1); + switch (cmd) { + case 'b': + return rb_file_blockdev_p(0, argv[1]); + + case 'c': + return rb_file_chardev_p(0, argv[1]); + + case 'd': + return rb_file_directory_p(0, argv[1]); + + case 'a': + case 'e': + return rb_file_exist_p(0, argv[1]); + + case 'f': + return rb_file_file_p(0, argv[1]); + + case 'g': + return rb_file_sgid_p(0, argv[1]); + + case 'G': + return rb_file_grpowned_p(0, argv[1]); + + case 'k': + return rb_file_sticky_p(0, argv[1]); + + case 'l': + return rb_file_symlink_p(0, argv[1]); + + case 'o': + return rb_file_owned_p(0, argv[1]); + + case 'O': + return rb_file_rowned_p(0, argv[1]); + + case 'p': + return rb_file_pipe_p(0, argv[1]); + + case 'r': + return rb_file_readable_p(0, argv[1]); + + case 'R': + return rb_file_readable_real_p(0, argv[1]); + + case 's': + return rb_file_size_p(0, argv[1]); + + case 'S': + return rb_file_socket_p(0, argv[1]); + + case 'u': + return rb_file_suid_p(0, argv[1]); + + case 'w': + return rb_file_writable_p(0, argv[1]); + + case 'W': + return rb_file_world_writable_p(0, argv[1]); + + case 'x': + return rb_file_executable_p(0, argv[1]); + + case 'X': + return rb_file_executable_real_p(0, argv[1]); + + case 'z': + return rb_file_zero_p(0, argv[1]); + } + } + + if (strchr("MAC", cmd)) { + struct stat st; + VALUE fname = argv[1]; + + CHECK(1); + if (rb_stat(fname, &st) == -1) { + FilePathValue(fname); + rb_sys_fail(RSTRING_PTR(fname)); + } + + switch (cmd) { + case 'A': + return stat_atime(&st); + case 'M': + return stat_mtime(&st); + case 'C': + return stat_ctime(&st); + } + } + + if (cmd == '-') { + CHECK(2); + return rb_file_identical_p(0, argv[1], argv[2]); + } + + if (strchr("=<>", cmd)) { + struct stat st1, st2; + + CHECK(2); + if (rb_stat(argv[1], &st1) < 0) return Qfalse; + if (rb_stat(argv[2], &st2) < 0) return Qfalse; + + switch (cmd) { + case '=': + if (st1.st_mtime == st2.st_mtime) return Qtrue; + return Qfalse; + + case '>': + if (st1.st_mtime > st2.st_mtime) return Qtrue; + return Qfalse; + + case '<': + if (st1.st_mtime < st2.st_mtime) return Qtrue; + return Qfalse; + } + } + unknown: + /* unknown command */ + if (ISPRINT(cmd)) { + rb_raise(rb_eArgError, "unknown command ?%c", cmd); + } + else { + rb_raise(rb_eArgError, "unknown command ?\\x%02X", cmd); + } + return Qnil; /* not reached */ +} + + +/* + * Document-class: File::Stat + * + * Objects of class File::Stat encapsulate common status + * information for File objects. The information is + * recorded at the moment the File::Stat object is + * created; changes made to the file after that point will not be + * reflected. File::Stat objects are returned by + * IO#stat, File::stat, + * File#lstat, and File::lstat. Many of these + * methods return platform-specific values, and not all values are + * meaningful on all systems. See also Kernel#test. + */ + +static VALUE +rb_stat_s_alloc(VALUE klass) +{ + return stat_new_0(klass, 0); +} + +/* + * call-seq: + * + * File::Stat.new(file_name) => stat + * + * Create a File::Stat object for the given file name (raising an + * exception if the file doesn't exist). + */ + +static VALUE +rb_stat_init(VALUE obj, VALUE fname) +{ + struct stat st, *nst; + + rb_secure(2); + FilePathValue(fname); + if (stat(StringValueCStr(fname), &st) == -1) { + rb_sys_fail(RSTRING_PTR(fname)); + } + if (DATA_PTR(obj)) { + xfree(DATA_PTR(obj)); + DATA_PTR(obj) = NULL; + } + nst = ALLOC(struct stat); + *nst = st; + DATA_PTR(obj) = nst; + + return Qnil; +} + +/* :nodoc: */ +static VALUE +rb_stat_init_copy(VALUE copy, VALUE orig) +{ + struct stat *nst; + + if (copy == orig) return orig; + rb_check_frozen(copy); + /* need better argument type check */ + if (!rb_obj_is_instance_of(orig, rb_obj_class(copy))) { + rb_raise(rb_eTypeError, "wrong argument class"); + } + if (DATA_PTR(copy)) { + xfree(DATA_PTR(copy)); + DATA_PTR(copy) = 0; + } + if (DATA_PTR(orig)) { + nst = ALLOC(struct stat); + *nst = *(struct stat*)DATA_PTR(orig); + DATA_PTR(copy) = nst; + } + + return copy; +} + +/* + * call-seq: + * stat.ftype => string + * + * Identifies the type of stat. The return string is one of: + * ``file'', ``directory'', + * ``characterSpecial'', ``blockSpecial'', + * ``fifo'', ``link'', + * ``socket'', or ``unknown''. + * + * File.stat("/dev/tty").ftype #=> "characterSpecial" + * + */ + +static VALUE +rb_stat_ftype(VALUE obj) +{ + return rb_file_ftype(get_stat(obj)); +} + +/* + * call-seq: + * stat.directory? => true or false + * + * Returns true if stat is a directory, + * false otherwise. + * + * File.stat("testfile").directory? #=> false + * File.stat(".").directory? #=> true + */ + +static VALUE +rb_stat_d(VALUE obj) +{ + if (S_ISDIR(get_stat(obj)->st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * stat.pipe? => true or false + * + * Returns true if the operating system supports pipes and + * stat is a pipe; false otherwise. + */ + +static VALUE +rb_stat_p(VALUE obj) +{ +#ifdef S_IFIFO + if (S_ISFIFO(get_stat(obj)->st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.symlink? => true or false + * + * Returns true if stat is a symbolic link, + * false if it isn't or if the operating system doesn't + * support this feature. As File::stat automatically + * follows symbolic links, symlink? will always be + * false for an object returned by + * File::stat. + * + * File.symlink("testfile", "alink") #=> 0 + * File.stat("alink").symlink? #=> false + * File.lstat("alink").symlink? #=> true + * + */ + +static VALUE +rb_stat_l(VALUE obj) +{ +#ifdef S_ISLNK + if (S_ISLNK(get_stat(obj)->st_mode)) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.socket? => true or false + * + * Returns true if stat is a socket, + * false if it isn't or if the operating system doesn't + * support this feature. + * + * File.stat("testfile").socket? #=> false + * + */ + +static VALUE +rb_stat_S(VALUE obj) +{ +#ifdef S_ISSOCK + if (S_ISSOCK(get_stat(obj)->st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.blockdev? => true or false + * + * Returns true if the file is a block device, + * false if it isn't or if the operating system doesn't + * support this feature. + * + * File.stat("testfile").blockdev? #=> false + * File.stat("/dev/hda1").blockdev? #=> true + * + */ + +static VALUE +rb_stat_b(VALUE obj) +{ +#ifdef S_ISBLK + if (S_ISBLK(get_stat(obj)->st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.chardev? => true or false + * + * Returns true if the file is a character device, + * false if it isn't or if the operating system doesn't + * support this feature. + * + * File.stat("/dev/tty").chardev? #=> true + * + */ + +static VALUE +rb_stat_c(VALUE obj) +{ + if (S_ISCHR(get_stat(obj)->st_mode)) return Qtrue; + + return Qfalse; +} + +/* + * call-seq: + * stat.owned? => true or false + * + * Returns true if the effective user id of the process is + * the same as the owner of stat. + * + * File.stat("testfile").owned? #=> true + * File.stat("/etc/passwd").owned? #=> false + * + */ + +static VALUE +rb_stat_owned(VALUE obj) +{ + if (get_stat(obj)->st_uid == geteuid()) return Qtrue; + return Qfalse; +} + +static VALUE +rb_stat_rowned(VALUE obj) +{ + if (get_stat(obj)->st_uid == getuid()) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * stat.grpowned? => true or false + * + * Returns true if the effective group id of the process is the same as + * the group id of stat. On Windows NT, returns false. + * + * File.stat("testfile").grpowned? #=> true + * File.stat("/etc/passwd").grpowned? #=> false + * + */ + +static VALUE +rb_stat_grpowned(VALUE obj) +{ +#ifndef _WIN32 + if (group_member(get_stat(obj)->st_gid)) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.readable? => true or false + * + * Returns true if stat is readable by the + * effective user id of this process. + * + * File.stat("testfile").readable? #=> true + * + */ + +static VALUE +rb_stat_r(VALUE obj) +{ + struct stat *st = get_stat(obj); + +#ifdef USE_GETEUID + if (geteuid() == 0) return Qtrue; +#endif +#ifdef S_IRUSR + if (rb_stat_owned(obj)) + return st->st_mode & S_IRUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IRGRP + if (rb_stat_grpowned(obj)) + return st->st_mode & S_IRGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IROTH + if (!(st->st_mode & S_IROTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.readable_real? -> true or false + * + * Returns true if stat is readable by the real + * user id of this process. + * + * File.stat("testfile").readable_real? #=> true + * + */ + +static VALUE +rb_stat_R(VALUE obj) +{ + struct stat *st = get_stat(obj); + +#ifdef USE_GETEUID + if (getuid() == 0) return Qtrue; +#endif +#ifdef S_IRUSR + if (rb_stat_rowned(obj)) + return st->st_mode & S_IRUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IRGRP + if (group_member(get_stat(obj)->st_gid)) + return st->st_mode & S_IRGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IROTH + if (!(st->st_mode & S_IROTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.world_readable? => fixnum or nil + * + * If stat is readable by others, returns an integer + * representing the file permission bits of stat. Returns + * nil otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see stat(2). + * + * m = File.stat("/etc/passwd").world_readable? # => 420 + * sprintf("%o", m) # => "644" + */ + +static VALUE +rb_stat_wr(VALUE obj) +{ +#ifdef S_IROTH + if ((get_stat(obj)->st_mode & (S_IROTH)) == S_IROTH) { + return UINT2NUM(get_stat(obj)->st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } + else { + return Qnil; + } +#endif +} + +/* + * call-seq: + * stat.writable? -> true or false + * + * Returns true if stat is writable by the + * effective user id of this process. + * + * File.stat("testfile").writable? #=> true + * + */ + +static VALUE +rb_stat_w(VALUE obj) +{ + struct stat *st = get_stat(obj); + +#ifdef USE_GETEUID + if (geteuid() == 0) return Qtrue; +#endif +#ifdef S_IWUSR + if (rb_stat_owned(obj)) + return st->st_mode & S_IWUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IWGRP + if (rb_stat_grpowned(obj)) + return st->st_mode & S_IWGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IWOTH + if (!(st->st_mode & S_IWOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.writable_real? -> true or false + * + * Returns true if stat is writable by the real + * user id of this process. + * + * File.stat("testfile").writable_real? #=> true + * + */ + +static VALUE +rb_stat_W(VALUE obj) +{ + struct stat *st = get_stat(obj); + +#ifdef USE_GETEUID + if (getuid() == 0) return Qtrue; +#endif +#ifdef S_IWUSR + if (rb_stat_rowned(obj)) + return st->st_mode & S_IWUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IWGRP + if (group_member(get_stat(obj)->st_gid)) + return st->st_mode & S_IWGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IWOTH + if (!(st->st_mode & S_IWOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.world_writable? => fixnum or nil + * + * If stat is writable by others, returns an integer + * representing the file permission bits of stat. Returns + * nil otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see stat(2). + * + * m = File.stat("/tmp").world_writable? # => 511 + * sprintf("%o", m) # => "777" + */ + +static VALUE +rb_stat_ww(VALUE obj) +{ +#ifdef S_IROTH + if ((get_stat(obj)->st_mode & (S_IWOTH)) == S_IWOTH) { + return UINT2NUM(get_stat(obj)->st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } + else { + return Qnil; + } +#endif +} + +/* + * call-seq: + * stat.executable? => true or false + * + * Returns true if stat is executable or if the + * operating system doesn't distinguish executable files from + * nonexecutable files. The tests are made using the effective owner of + * the process. + * + * File.stat("testfile").executable? #=> false + * + */ + +static VALUE +rb_stat_x(VALUE obj) +{ + struct stat *st = get_stat(obj); + +#ifdef USE_GETEUID + if (geteuid() == 0) { + return st->st_mode & S_IXUGO ? Qtrue : Qfalse; + } +#endif +#ifdef S_IXUSR + if (rb_stat_owned(obj)) + return st->st_mode & S_IXUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IXGRP + if (rb_stat_grpowned(obj)) + return st->st_mode & S_IXGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IXOTH + if (!(st->st_mode & S_IXOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.executable_real? => true or false + * + * Same as executable?, but tests using the real owner of + * the process. + */ + +static VALUE +rb_stat_X(VALUE obj) +{ + struct stat *st = get_stat(obj); + +#ifdef USE_GETEUID + if (getuid() == 0) { + return st->st_mode & S_IXUGO ? Qtrue : Qfalse; + } +#endif +#ifdef S_IXUSR + if (rb_stat_rowned(obj)) + return st->st_mode & S_IXUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IXGRP + if (group_member(get_stat(obj)->st_gid)) + return st->st_mode & S_IXGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IXOTH + if (!(st->st_mode & S_IXOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.file? => true or false + * + * Returns true if stat is a regular file (not + * a device file, pipe, socket, etc.). + * + * File.stat("testfile").file? #=> true + * + */ + +static VALUE +rb_stat_f(VALUE obj) +{ + if (S_ISREG(get_stat(obj)->st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * stat.zero? => true or false + * + * Returns true if stat is a zero-length file; + * false otherwise. + * + * File.stat("testfile").zero? #=> false + * + */ + +static VALUE +rb_stat_z(VALUE obj) +{ + if (get_stat(obj)->st_size == 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * state.size => integer + * + * Returns the size of stat in bytes. + * + * File.stat("testfile").size #=> 66 + * + */ + +static VALUE +rb_stat_s(VALUE obj) +{ + off_t size = get_stat(obj)->st_size; + + if (size == 0) return Qnil; + return OFFT2NUM(size); +} + +/* + * call-seq: + * stat.setuid? => true or false + * + * Returns true if stat has the set-user-id + * permission bit set, false if it doesn't or if the + * operating system doesn't support this feature. + * + * File.stat("/bin/su").setuid? #=> true + */ + +static VALUE +rb_stat_suid(VALUE obj) +{ +#ifdef S_ISUID + if (get_stat(obj)->st_mode & S_ISUID) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.setgid? => true or false + * + * Returns true if stat has the set-group-id + * permission bit set, false if it doesn't or if the + * operating system doesn't support this feature. + * + * File.stat("/usr/sbin/lpc").setgid? #=> true + * + */ + +static VALUE +rb_stat_sgid(VALUE obj) +{ +#ifdef S_ISGID + if (get_stat(obj)->st_mode & S_ISGID) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.sticky? => true or false + * + * Returns true if stat has its sticky bit set, + * false if it doesn't or if the operating system doesn't + * support this feature. + * + * File.stat("testfile").sticky? #=> false + * + */ + +static VALUE +rb_stat_sticky(VALUE obj) +{ +#ifdef S_ISVTX + if (get_stat(obj)->st_mode & S_ISVTX) return Qtrue; +#endif + return Qfalse; +} + +VALUE rb_mFConst; + +void +rb_file_const(const char *name, VALUE value) +{ + rb_define_const(rb_mFConst, name, value); +} + +static int +is_absolute_path(const char *path) +{ +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(path) && isdirsep(path[2])) return 1; +#endif +#ifdef DOSISH_UNC + if (isdirsep(path[0]) && isdirsep(path[1])) return 1; +#endif +#ifndef DOSISH + if (path[0] == '/') return 1; +#endif + return 0; +} + +#ifndef ENABLE_PATH_CHECK +# if defined DOSISH || defined __CYGWIN__ +# define ENABLE_PATH_CHECK 0 +# else +# define ENABLE_PATH_CHECK 1 +# endif +#endif + +#if ENABLE_PATH_CHECK +static int +path_check_0(VALUE path, int execpath) +{ + struct stat st; + const char *p0 = StringValueCStr(path); + char *p = 0, *s; + + if (!is_absolute_path(p0)) { + char *buf = my_getcwd(); + VALUE newpath; + + newpath = rb_str_new2(buf); + xfree(buf); + + rb_str_cat2(newpath, "/"); + rb_str_cat2(newpath, p0); + p0 = RSTRING_PTR(path = newpath); + } + for (;;) { +#ifndef S_IWOTH +# define S_IWOTH 002 +#endif + if (stat(p0, &st) == 0 && S_ISDIR(st.st_mode) && (st.st_mode & S_IWOTH) +#ifdef S_ISVTX + && !(p && execpath && (st.st_mode & S_ISVTX)) +#endif + && !access(p0, W_OK)) { + rb_warn("Insecure world writable dir %s in %sPATH, mode 0%o", + p0, (execpath ? "" : "LOAD_"), st.st_mode); + if (p) *p = '/'; + return 0; + } + s = strrdirsep(p0); + if (p) *p = '/'; + if (!s || s == p0) return 1; + p = s; + *p = '\0'; + } +} +#endif + +static int +fpath_check(const char *path) +{ +#if ENABLE_PATH_CHECK + return path_check_0(rb_str_new2(path), Qfalse); +#else + return 1; +#endif +} + +int +rb_path_check(const char *path) +{ +#if ENABLE_PATH_CHECK + const char *p0, *p, *pend; + const char sep = PATH_SEP_CHAR; + + if (!path) return 1; + + pend = path + strlen(path); + p0 = path; + p = strchr(path, sep); + if (!p) p = pend; + + for (;;) { + if (!path_check_0(rb_str_new(p0, p - p0), Qtrue)) { + return 0; /* not safe */ + } + p0 = p + 1; + if (p0 > pend) break; + p = strchr(p0, sep); + if (!p) p = pend; + } +#endif + return 1; +} + +static int +file_load_ok(const char *path) +{ + int ret = 1; + int fd = open(path, O_RDONLY); + if (fd == -1) return 0; +#if !defined DOSISH + { + struct stat st; + if (fstat(fd, &st) || !S_ISREG(st.st_mode)) { + ret = 0; + } + } +#endif + (void)close(fd); + return ret; +} + +static int +is_explicit_relative(const char *path) +{ + if (*path++ != '.') return 0; + if (*path == '.') path++; + return isdirsep(*path); +} + +VALUE rb_get_load_path(void); + +int +rb_find_file_ext(VALUE *filep, const char *const *ext) +{ + const char *f = RSTRING_PTR(*filep); + VALUE fname, load_path, tmp; + long i, j, fnlen; + + if (!ext[0]) return 0; + + if (f[0] == '~') { + fname = rb_file_expand_path(*filep, Qnil); + if (rb_safe_level() >= 2 && OBJ_TAINTED(fname)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + OBJ_FREEZE(fname); + f = StringValueCStr(fname); + *filep = fname; + } + + if (is_absolute_path(f) || is_explicit_relative(f)) { + fname = rb_str_dup(*filep); + fnlen = RSTRING_LEN(fname); + for (i=0; ext[i]; i++) { + rb_str_cat2(fname, ext[i]); + if (file_load_ok(StringValueCStr(fname))) { + if (!is_absolute_path(f)) fname = rb_file_expand_path(fname, Qnil); + OBJ_FREEZE(fname); + *filep = fname; + return i+1; + } + rb_str_set_len(fname, fnlen); + } + return 0; + } + + load_path = rb_get_load_path(); + if (!load_path) return 0; + + fname = rb_str_dup(*filep); + RBASIC(fname)->klass = 0; + fnlen = RSTRING_LEN(fname); + tmp = rb_str_tmp_new(MAXPATHLEN + 2); + for (j=0; ext[j]; j++) { + rb_str_cat2(fname, ext[j]); + for (i = 0; i < RARRAY_LEN(load_path); i++) { + VALUE str = RARRAY_PTR(load_path)[i]; + + FilePathValue(str); + if (RSTRING_LEN(str) == 0) continue; + file_expand_path(fname, str, 0, tmp); + if (file_load_ok(RSTRING_PTR(tmp))) { + RBASIC(tmp)->klass = rb_obj_class(*filep); + OBJ_FREEZE(tmp); + *filep = tmp; + return j+1; + } + FL_UNSET(tmp, FL_TAINT | FL_UNTRUSTED); + } + rb_str_set_len(fname, fnlen); + } + RB_GC_GUARD(load_path); + return 0; +} + +VALUE +rb_find_file(VALUE path) +{ + VALUE tmp, load_path; + const char *f = StringValueCStr(path); + + if (f[0] == '~') { + path = rb_file_expand_path(path, Qnil); + if (rb_safe_level() >= 1 && OBJ_TAINTED(path)) { + rb_raise(rb_eSecurityError, "loading from unsafe path %s", f); + } + OBJ_FREEZE(path); + f = StringValueCStr(path); + } + + if (is_absolute_path(f) || is_explicit_relative(f)) { + if (rb_safe_level() >= 1 && !fpath_check(f)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + if (!file_load_ok(f)) return 0; + if (!is_absolute_path(f)) path = rb_file_expand_path(path, Qnil); + return path; + } + + if (rb_safe_level() >= 4) { + rb_raise(rb_eSecurityError, "loading from non-absolute path %s", f); + } + + RB_GC_GUARD(load_path) = rb_get_load_path(); + if (load_path) { + long i; + + tmp = rb_str_tmp_new(MAXPATHLEN + 2); + for (i = 0; i < RARRAY_LEN(load_path); i++) { + VALUE str = RARRAY_PTR(load_path)[i]; + FilePathValue(str); + if (RSTRING_LEN(str) > 0) { + file_expand_path(path, str, 0, tmp); + f = RSTRING_PTR(tmp); + if (file_load_ok(f)) goto found; + } + } + return 0; + found: + RBASIC(tmp)->klass = rb_obj_class(path); + OBJ_FREEZE(tmp); + } + else { + return 0; /* no path, no load */ + } + + if (rb_safe_level() >= 1 && !fpath_check(f)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + + return tmp; +} + +static void +define_filetest_function(const char *name, VALUE (*func)(ANYARGS), int argc) +{ + rb_define_module_function(rb_mFileTest, name, func, argc); + rb_define_singleton_method(rb_cFile, name, func, argc); +} + + +/* + * A File is an abstraction of any file object accessible + * by the program and is closely associated with class IO + * File includes the methods of module + * FileTest as class methods, allowing you to write (for + * example) File.exist?("foo"). + * + * In the description of File methods, + * permission bits are a platform-specific + * set of bits that indicate permissions of a file. On Unix-based + * systems, permissions are viewed as a set of three octets, for the + * owner, the group, and the rest of the world. For each of these + * entities, permissions may be set to read, write, or execute the + * file: + * + * The permission bits 0644 (in octal) would thus be + * interpreted as read/write for owner, and read-only for group and + * other. Higher-order bits may also be used to indicate the type of + * file (plain, directory, pipe, socket, and so on) and various other + * special features. If the permissions are for a directory, the + * meaning of the execute bit changes; when set the directory can be + * searched. + * + * On non-Posix operating systems, there may be only the ability to + * make a file read-only or read-write. In this case, the remaining + * permission bits will be synthesized to resemble typical values. For + * instance, on Windows NT the default permission bits are + * 0644, which means read/write for owner, read-only for + * all others. The only change that can be made is to make the file + * read-only, which is reported as 0444. + */ + +void +Init_File(void) +{ + rb_mFileTest = rb_define_module("FileTest"); + rb_cFile = rb_define_class("File", rb_cIO); + + define_filetest_function("directory?", rb_file_directory_p, 1); + define_filetest_function("exist?", rb_file_exist_p, 1); + define_filetest_function("exists?", rb_file_exist_p, 1); + define_filetest_function("readable?", rb_file_readable_p, 1); + define_filetest_function("readable_real?", rb_file_readable_real_p, 1); + define_filetest_function("world_readable?", rb_file_world_readable_p, 1); + define_filetest_function("writable?", rb_file_writable_p, 1); + define_filetest_function("writable_real?", rb_file_writable_real_p, 1); + define_filetest_function("world_writable?", rb_file_world_writable_p, 1); + define_filetest_function("executable?", rb_file_executable_p, 1); + define_filetest_function("executable_real?", rb_file_executable_real_p, 1); + define_filetest_function("file?", rb_file_file_p, 1); + define_filetest_function("zero?", rb_file_zero_p, 1); + define_filetest_function("size?", rb_file_size_p, 1); + define_filetest_function("size", rb_file_s_size, 1); + define_filetest_function("owned?", rb_file_owned_p, 1); + define_filetest_function("grpowned?", rb_file_grpowned_p, 1); + + define_filetest_function("pipe?", rb_file_pipe_p, 1); + define_filetest_function("symlink?", rb_file_symlink_p, 1); + define_filetest_function("socket?", rb_file_socket_p, 1); + + define_filetest_function("blockdev?", rb_file_blockdev_p, 1); + define_filetest_function("chardev?", rb_file_chardev_p, 1); + + define_filetest_function("setuid?", rb_file_suid_p, 1); + define_filetest_function("setgid?", rb_file_sgid_p, 1); + define_filetest_function("sticky?", rb_file_sticky_p, 1); + + define_filetest_function("identical?", rb_file_identical_p, 2); + + rb_define_singleton_method(rb_cFile, "stat", rb_file_s_stat, 1); + rb_define_singleton_method(rb_cFile, "lstat", rb_file_s_lstat, 1); + rb_define_singleton_method(rb_cFile, "ftype", rb_file_s_ftype, 1); + + rb_define_singleton_method(rb_cFile, "atime", rb_file_s_atime, 1); + rb_define_singleton_method(rb_cFile, "mtime", rb_file_s_mtime, 1); + rb_define_singleton_method(rb_cFile, "ctime", rb_file_s_ctime, 1); + + rb_define_singleton_method(rb_cFile, "utime", rb_file_s_utime, -1); + rb_define_singleton_method(rb_cFile, "chmod", rb_file_s_chmod, -1); + rb_define_singleton_method(rb_cFile, "chown", rb_file_s_chown, -1); + rb_define_singleton_method(rb_cFile, "lchmod", rb_file_s_lchmod, -1); + rb_define_singleton_method(rb_cFile, "lchown", rb_file_s_lchown, -1); + + rb_define_singleton_method(rb_cFile, "link", rb_file_s_link, 2); + rb_define_singleton_method(rb_cFile, "symlink", rb_file_s_symlink, 2); + rb_define_singleton_method(rb_cFile, "readlink", rb_file_s_readlink, 1); + + rb_define_singleton_method(rb_cFile, "unlink", rb_file_s_unlink, -2); + rb_define_singleton_method(rb_cFile, "delete", rb_file_s_unlink, -2); + rb_define_singleton_method(rb_cFile, "rename", rb_file_s_rename, 2); + rb_define_singleton_method(rb_cFile, "umask", rb_file_s_umask, -1); + rb_define_singleton_method(rb_cFile, "truncate", rb_file_s_truncate, 2); + rb_define_singleton_method(rb_cFile, "expand_path", rb_file_s_expand_path, -1); + rb_define_singleton_method(rb_cFile, "absolute_path", rb_file_s_absolute_path, -1); + rb_define_singleton_method(rb_cFile, "basename", rb_file_s_basename, -1); + rb_define_singleton_method(rb_cFile, "dirname", rb_file_s_dirname, 1); + rb_define_singleton_method(rb_cFile, "extname", rb_file_s_extname, 1); + rb_define_singleton_method(rb_cFile, "path", rb_file_s_path, 1); + + separator = rb_obj_freeze(rb_usascii_str_new2("/")); + rb_define_const(rb_cFile, "Separator", separator); + rb_define_const(rb_cFile, "SEPARATOR", separator); + rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1); + rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2); + +#ifdef DOSISH + rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_usascii_str_new2("\\"))); +#else + rb_define_const(rb_cFile, "ALT_SEPARATOR", Qnil); +#endif + rb_define_const(rb_cFile, "PATH_SEPARATOR", rb_obj_freeze(rb_str_new2(PATH_SEP))); + + rb_define_method(rb_cIO, "stat", rb_io_stat, 0); /* this is IO's method */ + rb_define_method(rb_cFile, "lstat", rb_file_lstat, 0); + + rb_define_method(rb_cFile, "atime", rb_file_atime, 0); + rb_define_method(rb_cFile, "mtime", rb_file_mtime, 0); + rb_define_method(rb_cFile, "ctime", rb_file_ctime, 0); + + rb_define_method(rb_cFile, "chmod", rb_file_chmod, 1); + rb_define_method(rb_cFile, "chown", rb_file_chown, 2); + rb_define_method(rb_cFile, "truncate", rb_file_truncate, 1); + + rb_define_method(rb_cFile, "flock", rb_file_flock, 1); + + rb_mFConst = rb_define_module_under(rb_cFile, "Constants"); + rb_include_module(rb_cIO, rb_mFConst); + rb_file_const("LOCK_SH", INT2FIX(LOCK_SH)); + rb_file_const("LOCK_EX", INT2FIX(LOCK_EX)); + rb_file_const("LOCK_UN", INT2FIX(LOCK_UN)); + rb_file_const("LOCK_NB", INT2FIX(LOCK_NB)); + + rb_define_method(rb_cFile, "path", rb_file_path, 0); + rb_define_method(rb_cFile, "to_path", rb_file_path, 0); + rb_define_global_function("test", rb_f_test, -1); + + rb_cStat = rb_define_class_under(rb_cFile, "Stat", rb_cObject); + rb_define_alloc_func(rb_cStat, rb_stat_s_alloc); + rb_define_method(rb_cStat, "initialize", rb_stat_init, 1); + rb_define_method(rb_cStat, "initialize_copy", rb_stat_init_copy, 1); + + rb_include_module(rb_cStat, rb_mComparable); + + rb_define_method(rb_cStat, "<=>", rb_stat_cmp, 1); + + rb_define_method(rb_cStat, "dev", rb_stat_dev, 0); + rb_define_method(rb_cStat, "dev_major", rb_stat_dev_major, 0); + rb_define_method(rb_cStat, "dev_minor", rb_stat_dev_minor, 0); + rb_define_method(rb_cStat, "ino", rb_stat_ino, 0); + rb_define_method(rb_cStat, "mode", rb_stat_mode, 0); + rb_define_method(rb_cStat, "nlink", rb_stat_nlink, 0); + rb_define_method(rb_cStat, "uid", rb_stat_uid, 0); + rb_define_method(rb_cStat, "gid", rb_stat_gid, 0); + rb_define_method(rb_cStat, "rdev", rb_stat_rdev, 0); + rb_define_method(rb_cStat, "rdev_major", rb_stat_rdev_major, 0); + rb_define_method(rb_cStat, "rdev_minor", rb_stat_rdev_minor, 0); + rb_define_method(rb_cStat, "size", rb_stat_size, 0); + rb_define_method(rb_cStat, "blksize", rb_stat_blksize, 0); + rb_define_method(rb_cStat, "blocks", rb_stat_blocks, 0); + rb_define_method(rb_cStat, "atime", rb_stat_atime, 0); + rb_define_method(rb_cStat, "mtime", rb_stat_mtime, 0); + rb_define_method(rb_cStat, "ctime", rb_stat_ctime, 0); + + rb_define_method(rb_cStat, "inspect", rb_stat_inspect, 0); + + rb_define_method(rb_cStat, "ftype", rb_stat_ftype, 0); + + rb_define_method(rb_cStat, "directory?", rb_stat_d, 0); + rb_define_method(rb_cStat, "readable?", rb_stat_r, 0); + rb_define_method(rb_cStat, "readable_real?", rb_stat_R, 0); + rb_define_method(rb_cStat, "world_readable?", rb_stat_wr, 0); + rb_define_method(rb_cStat, "writable?", rb_stat_w, 0); + rb_define_method(rb_cStat, "writable_real?", rb_stat_W, 0); + rb_define_method(rb_cStat, "world_writable?", rb_stat_ww, 0); + rb_define_method(rb_cStat, "executable?", rb_stat_x, 0); + rb_define_method(rb_cStat, "executable_real?", rb_stat_X, 0); + rb_define_method(rb_cStat, "file?", rb_stat_f, 0); + rb_define_method(rb_cStat, "zero?", rb_stat_z, 0); + rb_define_method(rb_cStat, "size?", rb_stat_s, 0); + rb_define_method(rb_cStat, "owned?", rb_stat_owned, 0); + rb_define_method(rb_cStat, "grpowned?", rb_stat_grpowned, 0); + + rb_define_method(rb_cStat, "pipe?", rb_stat_p, 0); + rb_define_method(rb_cStat, "symlink?", rb_stat_l, 0); + rb_define_method(rb_cStat, "socket?", rb_stat_S, 0); + + rb_define_method(rb_cStat, "blockdev?", rb_stat_b, 0); + rb_define_method(rb_cStat, "chardev?", rb_stat_c, 0); + + rb_define_method(rb_cStat, "setuid?", rb_stat_suid, 0); + rb_define_method(rb_cStat, "setgid?", rb_stat_sgid, 0); + rb_define_method(rb_cStat, "sticky?", rb_stat_sticky, 0); +} diff --git a/gc.c b/gc.c new file mode 100644 index 0000000..05804a1 --- /dev/null +++ b/gc.c @@ -0,0 +1,2925 @@ +/********************************************************************** + + gc.c - + + $Author: yugui $ + created at: Tue Oct 5 09:44:46 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/re.h" +#include "ruby/io.h" +#include "ruby/util.h" +#include "eval_intern.h" +#include "vm_core.h" +#include "gc.h" +#include +#include +#include + +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#ifdef HAVE_SYS_RESOURCE_H +#include +#endif + +#if defined _WIN32 || defined __CYGWIN__ +#include +#endif + +#ifdef HAVE_VALGRIND_MEMCHECK_H +# include +# ifndef VALGRIND_MAKE_MEM_DEFINED +# define VALGRIND_MAKE_MEM_DEFINED(p, n) VALGRIND_MAKE_READABLE(p, n) +# endif +# ifndef VALGRIND_MAKE_MEM_UNDEFINED +# define VALGRIND_MAKE_MEM_UNDEFINED(p, n) VALGRIND_MAKE_WRITABLE(p, n) +# endif +#else +# define VALGRIND_MAKE_MEM_DEFINED(p, n) /* empty */ +# define VALGRIND_MAKE_MEM_UNDEFINED(p, n) /* empty */ +#endif + +int rb_io_fptr_finalize(struct rb_io_t*); + +#define rb_setjmp(env) RUBY_SETJMP(env) +#define rb_jmp_buf rb_jmpbuf_t + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +# ifndef atarist +# ifndef alloca +# define alloca __builtin_alloca +# endif +# endif /* atarist */ +#else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (); +# endif +# endif /* AIX */ +# endif /* HAVE_ALLOCA_H */ +#endif /* __GNUC__ */ + +#ifndef GC_MALLOC_LIMIT +#define GC_MALLOC_LIMIT 8000000 +#endif + +#define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] + +#define MARK_STACK_MAX 1024 + +int ruby_gc_debug_indent = 0; + +#undef GC_DEBUG +#define GC_DEBUG + +/* for GC profile */ +#define GC_PROFILE_MORE_DETAIL 0 +typedef struct gc_profile_record { + double gc_time; + double gc_mark_time; + double gc_sweep_time; + double gc_invoke_time; + size_t heap_use_slots; + size_t heap_live_objects; + size_t heap_free_objects; + size_t heap_total_objects; + size_t heap_use_size; + size_t heap_total_size; + int have_finalize; + size_t allocate_increase; + size_t allocate_limit; +} gc_profile_record; + +static double +getrusage_time(void) +{ +#ifdef RUSAGE_SELF + struct rusage usage; + struct timeval time; + getrusage(RUSAGE_SELF, &usage); + time = usage.ru_utime; + return time.tv_sec + time.tv_usec * 1e-6; +#elif defined _WIN32 + FILETIME creation_time, exit_time, kernel_time, user_time; + ULARGE_INTEGER ui; + LONG_LONG q; + double t; + + if (GetProcessTimes(GetCurrentProcess(), + &creation_time, &exit_time, &kernel_time, &user_time) == 0) + { + return 0.0; + } + memcpy(&ui, &user_time, sizeof(FILETIME)); + q = ui.QuadPart / 10L; + t = (DWORD)(q % 1000000L) * 1e-6; + q /= 1000000L; +#ifdef __GNUC__ + t += q; +#else + t += (double)(DWORD)(q >> 16) * (1 << 16); + t += (DWORD)q & ~(~0 << 16); +#endif + return t; +#else + return 0.0; +#endif +} + +#define GC_PROF_TIMER_START do {\ + if (objspace->profile.run) {\ + if (!objspace->profile.record) {\ + objspace->profile.size = 1000;\ + objspace->profile.record = malloc(sizeof(gc_profile_record) * objspace->profile.size);\ + }\ + if (count >= objspace->profile.size) {\ + objspace->profile.size += 1000;\ + objspace->profile.record = realloc(objspace->profile.record, sizeof(gc_profile_record) * objspace->profile.size);\ + }\ + if (!objspace->profile.record) {\ + rb_bug("gc_profile malloc or realloc miss");\ + }\ + MEMZERO(&objspace->profile.record[count], gc_profile_record, 1);\ + gc_time = getrusage_time();\ + objspace->profile.record[count].gc_invoke_time = gc_time - objspace->profile.invoke_time;\ + }\ + } while(0) + +#define GC_PROF_TIMER_STOP do {\ + if (objspace->profile.run) {\ + gc_time = getrusage_time() - gc_time;\ + if (gc_time < 0) gc_time = 0;\ + objspace->profile.record[count].gc_time = gc_time;\ + objspace->profile.count++;\ + }\ + } while(0) + +#if GC_PROFILE_MORE_DETAIL +#define INIT_GC_PROF_PARAMS double gc_time = 0, mark_time = 0, sweep_time = 0;\ + size_t count = objspace->profile.count + +#define GC_PROF_MARK_TIMER_START do {\ + if (objspace->profile.run) {\ + mark_time = getrusage_time();\ + }\ + } while(0) + +#define GC_PROF_MARK_TIMER_STOP do {\ + if (objspace->profile.run) {\ + mark_time = getrusage_time() - mark_time;\ + if (mark_time < 0) mark_time = 0;\ + objspace->profile.record[count].gc_mark_time = mark_time;\ + }\ + } while(0) + +#define GC_PROF_SWEEP_TIMER_START do {\ + if (objspace->profile.run) {\ + sweep_time = getrusage_time();\ + }\ + } while(0) + +#define GC_PROF_SWEEP_TIMER_STOP do {\ + if (objspace->profile.run) {\ + sweep_time = getrusage_time() - sweep_time;\ + if (sweep_time < 0) sweep_time = 0;\ + objspace->profile.record[count].gc_sweep_time = sweep_time;\ + }\ + } while(0) +#define GC_PROF_SET_MALLOC_INFO do {\ + if (objspace->profile.run) {\ + size_t count = objspace->profile.count;\ + objspace->profile.record[count].allocate_increase = malloc_increase;\ + objspace->profile.record[count].allocate_limit = malloc_limit; \ + }\ + } while(0) +#define GC_PROF_SET_HEAP_INFO do {\ + if (objspace->profile.run) {\ + size_t count = objspace->profile.count;\ + objspace->profile.record[count].heap_use_slots = heaps_used;\ + objspace->profile.record[count].heap_live_objects = live;\ + objspace->profile.record[count].heap_free_objects = freed;\ + objspace->profile.record[count].heap_total_objects = heaps_used * HEAP_OBJ_LIMIT;\ + objspace->profile.record[count].have_finalize = final_list ? Qtrue : Qfalse;\ + objspace->profile.record[count].heap_use_size = live * sizeof(RVALUE);\ + objspace->profile.record[count].heap_total_size = heaps_used * (HEAP_OBJ_LIMIT * sizeof(RVALUE));\ + }\ + } while(0) + +#else +#define INIT_GC_PROF_PARAMS double gc_time = 0;\ + size_t count = objspace->profile.count +#define GC_PROF_MARK_TIMER_START +#define GC_PROF_MARK_TIMER_STOP +#define GC_PROF_SWEEP_TIMER_START +#define GC_PROF_SWEEP_TIMER_STOP +#define GC_PROF_SET_MALLOC_INFO +#define GC_PROF_SET_HEAP_INFO do {\ + if (objspace->profile.run) {\ + size_t count = objspace->profile.count;\ + objspace->profile.record[count].heap_total_objects = heaps_used * HEAP_OBJ_LIMIT;\ + objspace->profile.record[count].heap_use_size = live * sizeof(RVALUE);\ + objspace->profile.record[count].heap_total_size = heaps_used * HEAP_SIZE;\ + }\ + } while(0) +#endif + + +#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__CYGWIN__) +#pragma pack(push, 1) /* magic for reducing sizeof(RVALUE): 24 -> 20 */ +#endif + +typedef struct RVALUE { + union { + struct { + VALUE flags; /* always 0 for freed obj */ + struct RVALUE *next; + } free; + struct RBasic basic; + struct RObject object; + struct RClass klass; + struct RFloat flonum; + struct RString string; + struct RArray array; + struct RRegexp regexp; + struct RHash hash; + struct RData data; + struct RStruct rstruct; + struct RBignum bignum; + struct RFile file; + struct RNode node; + struct RMatch match; + struct RRational rational; + struct RComplex complex; + } as; +#ifdef GC_DEBUG + const char *file; + int line; +#endif +} RVALUE; + +#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__CYGWIN__) +#pragma pack(pop) +#endif + +struct heaps_slot { + void *membase; + RVALUE *slot; + int limit; +}; + +#define HEAP_MIN_SLOTS 10000 +#define FREE_MIN 4096 + +struct gc_list { + VALUE *varptr; + struct gc_list *next; +}; + +#define CALC_EXACT_MALLOC_SIZE 0 + +typedef struct rb_objspace { + struct { + size_t limit; + size_t increase; +#if CALC_EXACT_MALLOC_SIZE + size_t allocated_size; + size_t allocations; +#endif + } malloc_params; + struct { + size_t increment; + struct heaps_slot *ptr; + size_t length; + size_t used; + RVALUE *freelist; + RVALUE *range[2]; + RVALUE *freed; + } heap; + struct { + int dont_gc; + int during_gc; + } flags; + struct { + st_table *table; + RVALUE *deferred; + } final; + struct { + VALUE buffer[MARK_STACK_MAX]; + VALUE *ptr; + int overflow; + } markstack; + struct { + int run; + gc_profile_record *record; + size_t count; + size_t size; + double invoke_time; + } profile; + struct gc_list *global_list; + unsigned int count; + int gc_stress; +} rb_objspace_t; + +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE +#define rb_objspace (*GET_VM()->objspace) +static int ruby_initial_gc_stress = 0; +int *ruby_initial_gc_stress_ptr = &ruby_initial_gc_stress; +#else +static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, {HEAP_MIN_SLOTS}}; +int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; +#endif +#define malloc_limit objspace->malloc_params.limit +#define malloc_increase objspace->malloc_params.increase +#define heap_slots objspace->heap.slots +#define heaps objspace->heap.ptr +#define heaps_length objspace->heap.length +#define heaps_used objspace->heap.used +#define freelist objspace->heap.freelist +#define lomem objspace->heap.range[0] +#define himem objspace->heap.range[1] +#define heaps_inc objspace->heap.increment +#define heaps_freed objspace->heap.freed +#define dont_gc objspace->flags.dont_gc +#define during_gc objspace->flags.during_gc +#define finalizer_table objspace->final.table +#define deferred_final_list objspace->final.deferred +#define mark_stack objspace->markstack.buffer +#define mark_stack_ptr objspace->markstack.ptr +#define mark_stack_overflow objspace->markstack.overflow +#define global_List objspace->global_list +#define ruby_gc_stress objspace->gc_stress + +#define need_call_final (finalizer_table && finalizer_table->num_entries) + +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE +rb_objspace_t * +rb_objspace_alloc(void) +{ + rb_objspace_t *objspace = malloc(sizeof(rb_objspace_t)); + memset(objspace, 0, sizeof(*objspace)); + malloc_limit = GC_MALLOC_LIMIT; + ruby_gc_stress = ruby_initial_gc_stress; + + return objspace; +} +#endif + +/* tiny heap size */ +/* 32KB */ +/*#define HEAP_SIZE 0x8000 */ +/* 128KB */ +/*#define HEAP_SIZE 0x20000 */ +/* 64KB */ +/*#define HEAP_SIZE 0x10000 */ +/* 16KB */ +#define HEAP_SIZE 0x4000 +/* 8KB */ +/*#define HEAP_SIZE 0x2000 */ +/* 4KB */ +/*#define HEAP_SIZE 0x1000 */ +/* 2KB */ +/*#define HEAP_SIZE 0x800 */ + +#define HEAP_OBJ_LIMIT (HEAP_SIZE / sizeof(struct RVALUE)) + +extern VALUE rb_cMutex; +extern st_table *rb_class_tbl; + +int ruby_disable_gc_stress = 0; + +static void run_final(rb_objspace_t *objspace, VALUE obj); +static int garbage_collect(rb_objspace_t *objspace); + +void +rb_global_variable(VALUE *var) +{ + rb_gc_register_address(var); +} + +static void * +ruby_memerror_body(void *dummy) +{ + rb_memerror(); + return 0; +} + +static void +ruby_memerror(void) +{ + if (ruby_thread_has_gvl_p()) { + rb_memerror(); + } + else { + if (ruby_native_thread_p()) { + rb_thread_call_with_gvl(ruby_memerror_body, 0); + } + else { + /* no ruby thread */ + fprintf(stderr, "[FATAL] failed to allocate memory\n"); + exit(EXIT_FAILURE); + } + } +} + +void +rb_memerror(void) +{ + rb_thread_t *th = GET_THREAD(); + if (!nomem_error || + (rb_thread_raised_p(th, RAISED_NOMEMORY) && rb_safe_level() < 4)) { + fprintf(stderr, "[FATAL] failed to allocate memory\n"); + exit(EXIT_FAILURE); + } + if (rb_thread_raised_p(th, RAISED_NOMEMORY)) { + rb_thread_raised_clear(th); + GET_THREAD()->errinfo = nomem_error; + JUMP_TAG(TAG_RAISE); + } + rb_thread_raised_set(th, RAISED_NOMEMORY); + rb_exc_raise(nomem_error); +} + +/* + * call-seq: + * GC.stress => true or false + * + * returns current status of GC stress mode. + */ + +static VALUE +gc_stress_get(VALUE self) +{ + rb_objspace_t *objspace = &rb_objspace; + return ruby_gc_stress ? Qtrue : Qfalse; +} + +/* + * call-seq: + * GC.stress = bool => bool + * + * updates GC stress mode. + * + * When GC.stress = true, GC is invoked for all GC opportunity: + * all memory and object allocation. + * + * Since it makes Ruby very slow, it is only for debugging. + */ + +static VALUE +gc_stress_set(VALUE self, VALUE bool) +{ + rb_objspace_t *objspace = &rb_objspace; + rb_secure(2); + ruby_gc_stress = RTEST(bool); + return bool; +} + +/* + * call-seq: + * GC::Profiler.enable? => true or false + * + * returns current status of GC profile mode. + */ + +static VALUE +gc_profile_enable_get(VALUE self) +{ + rb_objspace_t *objspace = &rb_objspace; + return objspace->profile.run; +} + +/* + * call-seq: + * GC::Profiler.enable => nil + * + * updates GC profile mode. + * start profiler for GC. + * + */ + +static VALUE +gc_profile_enable(void) +{ + rb_objspace_t *objspace = &rb_objspace; + + objspace->profile.run = Qtrue; + return Qnil; +} + +/* + * call-seq: + * GC::Profiler.disable => nil + * + * updates GC profile mode. + * stop profiler for GC. + * + */ + +static VALUE +gc_profile_disable(void) +{ + rb_objspace_t *objspace = &rb_objspace; + + objspace->profile.run = Qfalse; + return Qnil; +} + +/* + * call-seq: + * GC::Profiler.clear => nil + * + * clear before profile data. + * + */ + +static VALUE +gc_profile_clear(void) +{ + rb_objspace_t *objspace = &rb_objspace; + MEMZERO(objspace->profile.record, gc_profile_record, objspace->profile.size); + objspace->profile.count = 0; + return Qnil; +} + +static void * +negative_size_allocation_error_with_gvl(void *ptr) +{ + rb_raise(rb_eNoMemError, "%s", (const char *)ptr); + return 0; /* should not be reached */ +} + +static void +negative_size_allocation_error(const char *msg) +{ + if (ruby_thread_has_gvl_p()) { + rb_raise(rb_eNoMemError, "%s", msg); + } + else { + if (ruby_native_thread_p()) { + rb_thread_call_with_gvl(negative_size_allocation_error_with_gvl, (void *)msg); + } + else { + fprintf(stderr, "[FATAL] %s\n", msg); + exit(EXIT_FAILURE); + } + } +} + +static void * +gc_with_gvl(void *ptr) +{ + return (void *)(VALUE)garbage_collect((rb_objspace_t *)ptr); +} + +static int +garbage_collect_with_gvl(rb_objspace_t *objspace) +{ + if (ruby_thread_has_gvl_p()) { + return garbage_collect(objspace); + } + else { + if (ruby_native_thread_p()) { + return (int)rb_thread_call_with_gvl(gc_with_gvl, (void *)objspace); + } + else { + /* no ruby thread */ + fprintf(stderr, "[FATAL] failed to allocate memory\n"); + exit(EXIT_FAILURE); + } + } +} + +static void * +vm_xmalloc(rb_objspace_t *objspace, size_t size) +{ + void *mem; + + if (size < 0) { + negative_size_allocation_error("negative allocation size (or too big)"); + } + if (size == 0) size = 1; + +#if CALC_EXACT_MALLOC_SIZE + size += sizeof(size_t); +#endif + + if ((ruby_gc_stress && !ruby_disable_gc_stress) || + (malloc_increase+size) > malloc_limit) { + garbage_collect_with_gvl(objspace); + } + mem = malloc(size); + if (!mem) { + if (garbage_collect_with_gvl(objspace)) { + mem = malloc(size); + } + if (!mem) { + ruby_memerror(); + } + } + malloc_increase += size; + +#if CALC_EXACT_MALLOC_SIZE + objspace->malloc_params.allocated_size += size; + objspace->malloc_params.allocations++; + ((size_t *)mem)[0] = size; + mem = (size_t *)mem + 1; +#endif + + return mem; +} + +static void * +vm_xrealloc(rb_objspace_t *objspace, void *ptr, size_t size) +{ + void *mem; + + if (size < 0) { + negative_size_allocation_error("negative re-allocation size"); + } + if (!ptr) return ruby_xmalloc(size); + if (size == 0) size = 1; + if (ruby_gc_stress && !ruby_disable_gc_stress) + garbage_collect_with_gvl(objspace); + +#if CALC_EXACT_MALLOC_SIZE + size += sizeof(size_t); + objspace->malloc_params.allocated_size -= size; + ptr = (size_t *)ptr - 1; +#endif + + mem = realloc(ptr, size); + if (!mem) { + if (garbage_collect_with_gvl(objspace)) { + mem = realloc(ptr, size); + } + if (!mem) { + ruby_memerror(); + } + } + malloc_increase += size; + +#if CALC_EXACT_MALLOC_SIZE + objspace->malloc_params.allocated_size += size; + ((size_t *)mem)[0] = size; + mem = (size_t *)mem + 1; +#endif + + return mem; +} + +static void +vm_xfree(rb_objspace_t *objspace, void *ptr) +{ +#if CALC_EXACT_MALLOC_SIZE + size_t size; + ptr = ((size_t *)ptr) - 1; + size = ((size_t*)ptr)[0]; + objspace->malloc_params.allocated_size -= size; + objspace->malloc_params.allocations--; +#endif + + free(ptr); +} + +void * +ruby_xmalloc(size_t size) +{ + return vm_xmalloc(&rb_objspace, size); +} + +void * +ruby_xmalloc2(size_t n, size_t size) +{ + size_t len = size * n; + if (n != 0 && size != len / n) { + rb_raise(rb_eArgError, "malloc: possible integer overflow"); + } + return vm_xmalloc(&rb_objspace, len); +} + +void * +ruby_xcalloc(size_t n, size_t size) +{ + void *mem = ruby_xmalloc2(n, size); + memset(mem, 0, n * size); + + return mem; +} + +void * +ruby_xrealloc(void *ptr, size_t size) +{ + return vm_xrealloc(&rb_objspace, ptr, size); +} + +void * +ruby_xrealloc2(void *ptr, size_t n, size_t size) +{ + size_t len = size * n; + if (n != 0 && size != len / n) { + rb_raise(rb_eArgError, "realloc: possible integer overflow"); + } + return ruby_xrealloc(ptr, len); +} + +void +ruby_xfree(void *x) +{ + if (x) + vm_xfree(&rb_objspace, x); +} + + +/* + * call-seq: + * GC.enable => true or false + * + * Enables garbage collection, returning true if garbage + * collection was previously disabled. + * + * GC.disable #=> false + * GC.enable #=> true + * GC.enable #=> false + * + */ + +VALUE +rb_gc_enable(void) +{ + rb_objspace_t *objspace = &rb_objspace; + int old = dont_gc; + + dont_gc = Qfalse; + return old; +} + +/* + * call-seq: + * GC.disable => true or false + * + * Disables garbage collection, returning true if garbage + * collection was already disabled. + * + * GC.disable #=> false + * GC.disable #=> true + * + */ + +VALUE +rb_gc_disable(void) +{ + rb_objspace_t *objspace = &rb_objspace; + int old = dont_gc; + + dont_gc = Qtrue; + return old; +} + +VALUE rb_mGC; + +void +rb_gc_register_mark_object(VALUE obj) +{ + VALUE ary = GET_THREAD()->vm->mark_object_ary; + rb_ary_push(ary, obj); +} + +void +rb_gc_register_address(VALUE *addr) +{ + rb_objspace_t *objspace = &rb_objspace; + struct gc_list *tmp; + + tmp = ALLOC(struct gc_list); + tmp->next = global_List; + tmp->varptr = addr; + global_List = tmp; +} + +void +rb_gc_unregister_address(VALUE *addr) +{ + rb_objspace_t *objspace = &rb_objspace; + struct gc_list *tmp = global_List; + + if (tmp->varptr == addr) { + global_List = tmp->next; + xfree(tmp); + return; + } + while (tmp->next) { + if (tmp->next->varptr == addr) { + struct gc_list *t = tmp->next; + + tmp->next = tmp->next->next; + xfree(t); + break; + } + tmp = tmp->next; + } +} + + +static void +allocate_heaps(rb_objspace_t *objspace, size_t next_heaps_length) +{ + struct heaps_slot *p; + size_t size; + + size = next_heaps_length*sizeof(struct heaps_slot); + + if (heaps_used > 0) { + p = (struct heaps_slot *)realloc(heaps, size); + if (p) heaps = p; + } + else { + p = heaps = (struct heaps_slot *)malloc(size); + } + + if (p == 0) { + during_gc = 0; + rb_memerror(); + } + heaps_length = next_heaps_length; +} + +static void +assign_heap_slot(rb_objspace_t *objspace) +{ + RVALUE *p, *pend, *membase; + size_t hi, lo, mid; + int objs; + + objs = HEAP_OBJ_LIMIT; + p = (RVALUE*)malloc(HEAP_SIZE); + + if (p == 0) { + during_gc = 0; + rb_memerror(); + } + + membase = p; + if ((VALUE)p % sizeof(RVALUE) != 0) { + p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE))); + if ((HEAP_SIZE - HEAP_OBJ_LIMIT * sizeof(RVALUE)) < ((char*)p - (char*)membase)) { + objs--; + } + } + + lo = 0; + hi = heaps_used; + while (lo < hi) { + register RVALUE *mid_membase; + mid = (lo + hi) / 2; + mid_membase = heaps[mid].membase; + if (mid_membase < membase) { + lo = mid + 1; + } + else if (mid_membase > membase) { + hi = mid; + } + else { + rb_bug("same heap slot is allocated: %p at %"PRIuVALUE, (void *)membase, (VALUE)mid); + } + } + if (hi < heaps_used) { + MEMMOVE(&heaps[hi+1], &heaps[hi], struct heaps_slot, heaps_used - hi); + } + heaps[hi].membase = membase; + heaps[hi].slot = p; + heaps[hi].limit = objs; + pend = p + objs; + if (lomem == 0 || lomem > p) lomem = p; + if (himem < pend) himem = pend; + heaps_used++; + + while (p < pend) { + p->as.free.flags = 0; + p->as.free.next = freelist; + freelist = p; + p++; + } +} + +static void +init_heap(rb_objspace_t *objspace) +{ + size_t add, i; + + add = HEAP_MIN_SLOTS / HEAP_OBJ_LIMIT; + + if ((heaps_used + add) > heaps_length) { + allocate_heaps(objspace, heaps_used + add); + } + + for (i = 0; i < add; i++) { + assign_heap_slot(objspace); + } + heaps_inc = 0; + objspace->profile.invoke_time = getrusage_time(); +} + + +static void +set_heaps_increment(rb_objspace_t *objspace) +{ + size_t next_heaps_length = heaps_used * 1.8; + heaps_inc = next_heaps_length - heaps_used; + + if (next_heaps_length > heaps_length) { + allocate_heaps(objspace, next_heaps_length); + } +} + +static int +heaps_increment(rb_objspace_t *objspace) +{ + if (heaps_inc > 0) { + assign_heap_slot(objspace); + heaps_inc--; + return Qtrue; + } + return Qfalse; +} + +#define RANY(o) ((RVALUE*)(o)) + +static VALUE +rb_newobj_from_heap(rb_objspace_t *objspace) +{ + VALUE obj; + + if ((ruby_gc_stress && !ruby_disable_gc_stress) || !freelist) { + if (!heaps_increment(objspace) && !garbage_collect(objspace)) { + during_gc = 0; + rb_memerror(); + } + } + + obj = (VALUE)freelist; + freelist = freelist->as.free.next; + + MEMZERO((void*)obj, RVALUE, 1); +#ifdef GC_DEBUG + RANY(obj)->file = rb_sourcefile(); + RANY(obj)->line = rb_sourceline(); +#endif + + return obj; +} + +#if USE_VALUE_CACHE +static VALUE +rb_fill_value_cache(rb_thread_t *th) +{ + rb_objspace_t *objspace = &rb_objspace; + int i; + VALUE rv; + + /* LOCK */ + for (i=0; ivalue_cache[i] = v; + RBASIC(v)->flags = FL_MARK; + } + th->value_cache_ptr = &th->value_cache[0]; + rv = rb_newobj_from_heap(objspace); + /* UNLOCK */ + return rv; +} +#endif + +int +rb_during_gc(void) +{ + rb_objspace_t *objspace = &rb_objspace; + return during_gc; +} + +VALUE +rb_newobj(void) +{ +#if USE_VALUE_CACHE || (defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE) + rb_thread_t *th = GET_THREAD(); +#endif +#if USE_VALUE_CACHE + VALUE v = *th->value_cache_ptr; +#endif +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE + rb_objspace_t *objspace = th->vm->objspace; +#else + rb_objspace_t *objspace = &rb_objspace; +#endif + + if (during_gc) { + dont_gc = 1; + during_gc = 0; + rb_bug("object allocation during garbage collection phase"); + } + +#if USE_VALUE_CACHE + if (v) { + RBASIC(v)->flags = 0; + th->value_cache_ptr++; + } + else { + v = rb_fill_value_cache(th); + } + +#if defined(GC_DEBUG) + printf("cache index: %d, v: %p, th: %p\n", + th->value_cache_ptr - th->value_cache, v, th); +#endif + return v; +#else + return rb_newobj_from_heap(objspace); +#endif +} + +NODE* +rb_node_newnode(enum node_type type, VALUE a0, VALUE a1, VALUE a2) +{ + NODE *n = (NODE*)rb_newobj(); + + n->flags |= T_NODE; + nd_set_type(n, type); + + n->u1.value = a0; + n->u2.value = a1; + n->u3.value = a2; + + return n; +} + +VALUE +rb_data_object_alloc(VALUE klass, void *datap, RUBY_DATA_FUNC dmark, RUBY_DATA_FUNC dfree) +{ + NEWOBJ(data, struct RData); + if (klass) Check_Type(klass, T_CLASS); + OBJSETUP(data, klass, T_DATA); + data->data = datap; + data->dfree = dfree; + data->dmark = dmark; + + return (VALUE)data; +} + +#ifdef __ia64 +#define SET_STACK_END (SET_MACHINE_STACK_END(&th->machine_stack_end), th->machine_register_stack_end = rb_ia64_bsp()) +#else +#define SET_STACK_END SET_MACHINE_STACK_END(&th->machine_stack_end) +#endif + +#define STACK_START (th->machine_stack_start) +#define STACK_END (th->machine_stack_end) +#define STACK_LEVEL_MAX (th->machine_stack_maxsize/sizeof(VALUE)) + +#if STACK_GROW_DIRECTION < 0 +# define STACK_LENGTH (STACK_START - STACK_END) +#elif STACK_GROW_DIRECTION > 0 +# define STACK_LENGTH (STACK_END - STACK_START + 1) +#else +# define STACK_LENGTH ((STACK_END < STACK_START) ? STACK_START - STACK_END\ + : STACK_END - STACK_START + 1) +#endif +#if !STACK_GROW_DIRECTION +int ruby_stack_grow_direction; +int +ruby_get_stack_grow_direction(VALUE *addr) +{ + VALUE *end; + SET_MACHINE_STACK_END(&end); + + if (end > addr) return ruby_stack_grow_direction = 1; + return ruby_stack_grow_direction = -1; +} +#endif + +#define GC_WATER_MARK 512 + +size_t +ruby_stack_length(VALUE **p) +{ + rb_thread_t *th = GET_THREAD(); + SET_STACK_END; + if (p) *p = STACK_UPPER(STACK_END, STACK_START, STACK_END); + return STACK_LENGTH; +} + +static int +stack_check(void) +{ + int ret; + rb_thread_t *th = GET_THREAD(); + SET_STACK_END; + ret = STACK_LENGTH > STACK_LEVEL_MAX - GC_WATER_MARK; +#ifdef __ia64 + if (!ret) { + ret = (VALUE*)rb_ia64_bsp() - th->machine_register_stack_start > + th->machine_register_stack_maxsize/sizeof(VALUE) - GC_WATER_MARK; + } +#endif + return ret; +} + +int +ruby_stack_check(void) +{ +#if defined(POSIX_SIGNAL) && defined(SIGSEGV) && defined(HAVE_SIGALTSTACK) + return 0; +#else + return stack_check(); +#endif +} + +static void +init_mark_stack(rb_objspace_t *objspace) +{ + mark_stack_overflow = 0; + mark_stack_ptr = mark_stack; +} + +#define MARK_STACK_EMPTY (mark_stack_ptr == mark_stack) + +static void gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev); +static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev); + +static void +gc_mark_all(rb_objspace_t *objspace) +{ + RVALUE *p, *pend; + size_t i; + + init_mark_stack(objspace); + for (i = 0; i < heaps_used; i++) { + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if ((p->as.basic.flags & FL_MARK) && + (p->as.basic.flags != FL_MARK)) { + gc_mark_children(objspace, (VALUE)p, 0); + } + p++; + } + } +} + +static void +gc_mark_rest(rb_objspace_t *objspace) +{ + VALUE tmp_arry[MARK_STACK_MAX]; + VALUE *p; + + p = (mark_stack_ptr - mark_stack) + tmp_arry; + MEMCPY(tmp_arry, mark_stack, VALUE, p - tmp_arry); + + init_mark_stack(objspace); + while (p != tmp_arry) { + p--; + gc_mark_children(objspace, *p, 0); + } +} + +static inline int +is_pointer_to_heap(rb_objspace_t *objspace, void *ptr) +{ + register RVALUE *p = RANY(ptr); + register struct heaps_slot *heap; + register size_t hi, lo, mid; + + if (p < lomem || p > himem) return Qfalse; + if ((VALUE)p % sizeof(RVALUE) != 0) return Qfalse; + + /* check if p looks like a pointer using bsearch*/ + lo = 0; + hi = heaps_used; + while (lo < hi) { + mid = (lo + hi) / 2; + heap = &heaps[mid]; + if (heap->slot <= p) { + if (p < heap->slot + heap->limit) + return Qtrue; + lo = mid + 1; + } + else { + hi = mid; + } + } + return Qfalse; +} + +static void +mark_locations_array(rb_objspace_t *objspace, register VALUE *x, register long n) +{ + VALUE v; + while (n--) { + v = *x; + VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)); + if (is_pointer_to_heap(objspace, (void *)v)) { + gc_mark(objspace, v, 0); + } + x++; + } +} + +static void +gc_mark_locations(rb_objspace_t *objspace, VALUE *start, VALUE *end) +{ + long n; + + if (end <= start) return; + n = end - start; + mark_locations_array(objspace, start, n); +} + +void +rb_gc_mark_locations(VALUE *start, VALUE *end) +{ + gc_mark_locations(&rb_objspace, start, end); +} + +#define rb_gc_mark_locations(start, end) gc_mark_locations(objspace, start, end) + +struct mark_tbl_arg { + rb_objspace_t *objspace; + int lev; +}; + +static int +mark_entry(ID key, VALUE value, st_data_t data) +{ + struct mark_tbl_arg *arg = (void*)data; + gc_mark(arg->objspace, value, arg->lev); + return ST_CONTINUE; +} + +static void +mark_tbl(rb_objspace_t *objspace, st_table *tbl, int lev) +{ + struct mark_tbl_arg arg; + if (!tbl) return; + arg.objspace = objspace; + arg.lev = lev; + st_foreach(tbl, mark_entry, (st_data_t)&arg); +} + +void +rb_mark_tbl(st_table *tbl) +{ + mark_tbl(&rb_objspace, tbl, 0); +} + +static int +mark_key(VALUE key, VALUE value, st_data_t data) +{ + struct mark_tbl_arg *arg = (void*)data; + gc_mark(arg->objspace, key, arg->lev); + return ST_CONTINUE; +} + +static void +mark_set(rb_objspace_t *objspace, st_table *tbl, int lev) +{ + struct mark_tbl_arg arg; + if (!tbl) return; + arg.objspace = objspace; + arg.lev = lev; + st_foreach(tbl, mark_key, (st_data_t)&arg); +} + +void +rb_mark_set(st_table *tbl) +{ + mark_set(&rb_objspace, tbl, 0); +} + +static int +mark_keyvalue(VALUE key, VALUE value, st_data_t data) +{ + struct mark_tbl_arg *arg = (void*)data; + gc_mark(arg->objspace, key, arg->lev); + gc_mark(arg->objspace, value, arg->lev); + return ST_CONTINUE; +} + +static void +mark_hash(rb_objspace_t *objspace, st_table *tbl, int lev) +{ + struct mark_tbl_arg arg; + if (!tbl) return; + arg.objspace = objspace; + arg.lev = lev; + st_foreach(tbl, mark_keyvalue, (st_data_t)&arg); +} + +void +rb_mark_hash(st_table *tbl) +{ + mark_hash(&rb_objspace, tbl, 0); +} + +void +rb_gc_mark_maybe(VALUE obj) +{ + if (is_pointer_to_heap(&rb_objspace, (void *)obj)) { + gc_mark(&rb_objspace, obj, 0); + } +} + +#define GC_LEVEL_MAX 250 + +static void +gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev) +{ + register RVALUE *obj; + + obj = RANY(ptr); + if (rb_special_const_p(ptr)) return; /* special const not marked */ + if (obj->as.basic.flags == 0) return; /* free cell */ + if (obj->as.basic.flags & FL_MARK) return; /* already marked */ + obj->as.basic.flags |= FL_MARK; + + if (lev > GC_LEVEL_MAX || (lev == 0 && stack_check())) { + if (!mark_stack_overflow) { + if (mark_stack_ptr - mark_stack < MARK_STACK_MAX) { + *mark_stack_ptr = ptr; + mark_stack_ptr++; + } + else { + mark_stack_overflow = 1; + } + } + return; + } + gc_mark_children(objspace, ptr, lev+1); +} + +void +rb_gc_mark(VALUE ptr) +{ + gc_mark(&rb_objspace, ptr, 0); +} + +static void +gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev) +{ + register RVALUE *obj = RANY(ptr); + + goto marking; /* skip */ + + again: + obj = RANY(ptr); + if (rb_special_const_p(ptr)) return; /* special const not marked */ + if (obj->as.basic.flags == 0) return; /* free cell */ + if (obj->as.basic.flags & FL_MARK) return; /* already marked */ + obj->as.basic.flags |= FL_MARK; + + marking: + if (FL_TEST(obj, FL_EXIVAR)) { + rb_mark_generic_ivar(ptr); + } + + switch (BUILTIN_TYPE(obj)) { + case T_NIL: + case T_FIXNUM: + rb_bug("rb_gc_mark() called for broken object"); + break; + + case T_NODE: + switch (nd_type(obj)) { + case NODE_IF: /* 1,2,3 */ + case NODE_FOR: + case NODE_ITER: + case NODE_WHEN: + case NODE_MASGN: + case NODE_RESCUE: + case NODE_RESBODY: + case NODE_CLASS: + case NODE_BLOCK_PASS: + gc_mark(objspace, (VALUE)obj->as.node.u2.node, lev); + /* fall through */ + case NODE_BLOCK: /* 1,3 */ + case NODE_OPTBLOCK: + case NODE_ARRAY: + case NODE_DSTR: + case NODE_DXSTR: + case NODE_DREGX: + case NODE_DREGX_ONCE: + case NODE_ENSURE: + case NODE_CALL: + case NODE_DEFS: + case NODE_OP_ASGN1: + case NODE_ARGS: + gc_mark(objspace, (VALUE)obj->as.node.u1.node, lev); + /* fall through */ + case NODE_SUPER: /* 3 */ + case NODE_FCALL: + case NODE_DEFN: + case NODE_ARGS_AUX: + ptr = (VALUE)obj->as.node.u3.node; + goto again; + + case NODE_METHOD: /* 1,2 */ + case NODE_WHILE: + case NODE_UNTIL: + case NODE_AND: + case NODE_OR: + case NODE_CASE: + case NODE_SCLASS: + case NODE_DOT2: + case NODE_DOT3: + case NODE_FLIP2: + case NODE_FLIP3: + case NODE_MATCH2: + case NODE_MATCH3: + case NODE_OP_ASGN_OR: + case NODE_OP_ASGN_AND: + case NODE_MODULE: + case NODE_ALIAS: + case NODE_VALIAS: + case NODE_ARGSCAT: + gc_mark(objspace, (VALUE)obj->as.node.u1.node, lev); + /* fall through */ + case NODE_FBODY: /* 2 */ + case NODE_GASGN: + case NODE_LASGN: + case NODE_DASGN: + case NODE_DASGN_CURR: + case NODE_IASGN: + case NODE_IASGN2: + case NODE_CVASGN: + case NODE_COLON3: + case NODE_OPT_N: + case NODE_EVSTR: + case NODE_UNDEF: + case NODE_POSTEXE: + ptr = (VALUE)obj->as.node.u2.node; + goto again; + + case NODE_HASH: /* 1 */ + case NODE_LIT: + case NODE_STR: + case NODE_XSTR: + case NODE_DEFINED: + case NODE_MATCH: + case NODE_RETURN: + case NODE_BREAK: + case NODE_NEXT: + case NODE_YIELD: + case NODE_COLON2: + case NODE_SPLAT: + case NODE_TO_ARY: + ptr = (VALUE)obj->as.node.u1.node; + goto again; + + case NODE_SCOPE: /* 2,3 */ + case NODE_CDECL: + case NODE_OPT_ARG: + gc_mark(objspace, (VALUE)obj->as.node.u3.node, lev); + ptr = (VALUE)obj->as.node.u2.node; + goto again; + + case NODE_ZARRAY: /* - */ + case NODE_ZSUPER: + case NODE_CFUNC: + case NODE_VCALL: + case NODE_GVAR: + case NODE_LVAR: + case NODE_DVAR: + case NODE_IVAR: + case NODE_CVAR: + case NODE_NTH_REF: + case NODE_BACK_REF: + case NODE_REDO: + case NODE_RETRY: + case NODE_SELF: + case NODE_NIL: + case NODE_TRUE: + case NODE_FALSE: + case NODE_ERRINFO: + case NODE_ATTRSET: + case NODE_BLOCK_ARG: + break; + case NODE_ALLOCA: + mark_locations_array(objspace, + (VALUE*)obj->as.node.u1.value, + obj->as.node.u3.cnt); + ptr = (VALUE)obj->as.node.u2.node; + goto again; + + default: /* unlisted NODE */ + if (is_pointer_to_heap(objspace, obj->as.node.u1.node)) { + gc_mark(objspace, (VALUE)obj->as.node.u1.node, lev); + } + if (is_pointer_to_heap(objspace, obj->as.node.u2.node)) { + gc_mark(objspace, (VALUE)obj->as.node.u2.node, lev); + } + if (is_pointer_to_heap(objspace, obj->as.node.u3.node)) { + gc_mark(objspace, (VALUE)obj->as.node.u3.node, lev); + } + } + return; /* no need to mark class. */ + } + + gc_mark(objspace, obj->as.basic.klass, lev); + switch (BUILTIN_TYPE(obj)) { + case T_ICLASS: + case T_CLASS: + case T_MODULE: + mark_tbl(objspace, RCLASS_M_TBL(obj), lev); + mark_tbl(objspace, RCLASS_IV_TBL(obj), lev); + ptr = RCLASS_SUPER(obj); + goto again; + + case T_ARRAY: + if (FL_TEST(obj, ELTS_SHARED)) { + ptr = obj->as.array.as.heap.aux.shared; + goto again; + } + else { + long i, len = RARRAY_LEN(obj); + VALUE *ptr = RARRAY_PTR(obj); + for (i=0; i < len; i++) { + gc_mark(objspace, *ptr++, lev); + } + } + break; + + case T_HASH: + mark_hash(objspace, obj->as.hash.ntbl, lev); + ptr = obj->as.hash.ifnone; + goto again; + + case T_STRING: +#define STR_ASSOC FL_USER3 /* copied from string.c */ + if (FL_TEST(obj, RSTRING_NOEMBED) && FL_ANY(obj, ELTS_SHARED|STR_ASSOC)) { + ptr = obj->as.string.as.heap.aux.shared; + goto again; + } + break; + + case T_DATA: + if (obj->as.data.dmark) (*obj->as.data.dmark)(DATA_PTR(obj)); + break; + + case T_OBJECT: + { + long i, len = ROBJECT_NUMIV(obj); + VALUE *ptr = ROBJECT_IVPTR(obj); + for (i = 0; i < len; i++) { + gc_mark(objspace, *ptr++, lev); + } + } + break; + + case T_FILE: + if (obj->as.file.fptr) { + gc_mark(objspace, obj->as.file.fptr->pathv, lev); + gc_mark(objspace, obj->as.file.fptr->tied_io_for_writing, lev); + gc_mark(objspace, obj->as.file.fptr->writeconv_asciicompat, lev); + gc_mark(objspace, obj->as.file.fptr->writeconv_pre_ecopts, lev); + gc_mark(objspace, obj->as.file.fptr->encs.ecopts, lev); + gc_mark(objspace, obj->as.file.fptr->write_lock, lev); + } + break; + + case T_REGEXP: + gc_mark(objspace, obj->as.regexp.src, lev); + break; + + case T_FLOAT: + case T_BIGNUM: + case T_ZOMBIE: + break; + + case T_MATCH: + gc_mark(objspace, obj->as.match.regexp, lev); + if (obj->as.match.str) { + ptr = obj->as.match.str; + goto again; + } + break; + + case T_RATIONAL: + gc_mark(objspace, obj->as.rational.num, lev); + gc_mark(objspace, obj->as.rational.den, lev); + break; + + case T_COMPLEX: + gc_mark(objspace, obj->as.complex.real, lev); + gc_mark(objspace, obj->as.complex.imag, lev); + break; + + case T_STRUCT: + { + long len = RSTRUCT_LEN(obj); + VALUE *ptr = RSTRUCT_PTR(obj); + + while (len--) { + gc_mark(objspace, *ptr++, lev); + } + } + break; + + default: + rb_bug("rb_gc_mark(): unknown data type 0x%lx(%p) %s", + BUILTIN_TYPE(obj), (void *)obj, + is_pointer_to_heap(objspace, obj) ? "corrupted object" : "non object"); + } +} + +static int obj_free(rb_objspace_t *, VALUE); + +static inline void +add_freelist(rb_objspace_t *objspace, RVALUE *p) +{ + VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE)); + p->as.free.flags = 0; + p->as.free.next = freelist; + freelist = p; +} + +static void +finalize_list(rb_objspace_t *objspace, RVALUE *p) +{ + while (p) { + RVALUE *tmp = p->as.free.next; + run_final(objspace, (VALUE)p); + if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */ + add_freelist(objspace, p); + } + else { + struct heaps_slot *slot = (struct heaps_slot *)RDATA(p)->dmark; + slot->limit--; + } + p = tmp; + } +} + +static void +free_unused_heaps(rb_objspace_t *objspace) +{ + size_t i, j; + RVALUE *last = 0; + + for (i = j = 1; j < heaps_used; i++) { + if (heaps[i].limit == 0) { + if (!last) { + last = heaps[i].membase; + } + else { + free(heaps[i].membase); + } + heaps_used--; + } + else { + if (i != j) { + heaps[j] = heaps[i]; + } + j++; + } + } + if (last) { + if (last < heaps_freed) { + free(heaps_freed); + heaps_freed = last; + } + else { + free(last); + } + } +} + +static void +gc_sweep(rb_objspace_t *objspace) +{ + RVALUE *p, *pend, *final_list; + size_t freed = 0; + size_t i; + size_t live = 0, free_min = 0, do_heap_free = 0; + + do_heap_free = (heaps_used * HEAP_OBJ_LIMIT) * 0.65; + free_min = (heaps_used * HEAP_OBJ_LIMIT) * 0.2; + + if (free_min < FREE_MIN) { + do_heap_free = heaps_used * HEAP_OBJ_LIMIT; + free_min = FREE_MIN; + } + + freelist = 0; + final_list = deferred_final_list; + deferred_final_list = 0; + for (i = 0; i < heaps_used; i++) { + int free_num = 0, final_num = 0; + RVALUE *free = freelist; + RVALUE *final = final_list; + int deferred; + + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if (!(p->as.basic.flags & FL_MARK)) { + if (p->as.basic.flags && + ((deferred = obj_free(objspace, (VALUE)p)) || + ((FL_TEST(p, FL_FINALIZE)) && need_call_final))) { + if (!deferred) { + p->as.free.flags = T_ZOMBIE; + RDATA(p)->dfree = 0; + } + p->as.free.flags |= FL_MARK; + p->as.free.next = final_list; + final_list = p; + final_num++; + } + else { + add_freelist(objspace, p); + free_num++; + } + } + else if (BUILTIN_TYPE(p) == T_ZOMBIE) { + /* objects to be finalized */ + /* do nothing remain marked */ + } + else { + RBASIC(p)->flags &= ~FL_MARK; + live++; + } + p++; + } + if (final_num + free_num == heaps[i].limit && freed > do_heap_free) { + RVALUE *pp; + + for (pp = final_list; pp != final; pp = pp->as.free.next) { + RDATA(pp)->dmark = (void *)&heaps[i]; + pp->as.free.flags |= FL_SINGLETON; /* freeing page mark */ + } + heaps[i].limit = final_num; + + freelist = free; /* cancel this page from freelist */ + } + else { + freed += free_num; + } + } + GC_PROF_SET_MALLOC_INFO; + if (malloc_increase > malloc_limit) { + malloc_limit += (malloc_increase - malloc_limit) * (double)live / (live + freed); + if (malloc_limit < GC_MALLOC_LIMIT) malloc_limit = GC_MALLOC_LIMIT; + } + malloc_increase = 0; + if (freed < free_min) { + set_heaps_increment(objspace); + heaps_increment(objspace); + } + during_gc = 0; + + /* clear finalization list */ + if (final_list) { + GC_PROF_SET_HEAP_INFO; + deferred_final_list = final_list; + RUBY_VM_SET_FINALIZER_INTERRUPT(GET_THREAD()); + } + else{ + free_unused_heaps(objspace); + GC_PROF_SET_HEAP_INFO; + } +} + +void +rb_gc_force_recycle(VALUE p) +{ + rb_objspace_t *objspace = &rb_objspace; + add_freelist(objspace, (RVALUE *)p); +} + +static inline void +make_deferred(RVALUE *p) +{ + p->as.basic.flags = (p->as.basic.flags & ~T_MASK) | T_ZOMBIE; +} + +static inline void +make_io_deferred(RVALUE *p) +{ + rb_io_t *fptr = p->as.file.fptr; + make_deferred(p); + p->as.data.dfree = (void (*)(void*))rb_io_fptr_finalize; + p->as.data.data = fptr; +} + +static int +obj_free(rb_objspace_t *objspace, VALUE obj) +{ + switch (BUILTIN_TYPE(obj)) { + case T_NIL: + case T_FIXNUM: + case T_TRUE: + case T_FALSE: + rb_bug("obj_free() called for broken object"); + break; + } + + if (FL_TEST(obj, FL_EXIVAR)) { + rb_free_generic_ivar((VALUE)obj); + FL_UNSET(obj, FL_EXIVAR); + } + + switch (BUILTIN_TYPE(obj)) { + case T_OBJECT: + if (!(RANY(obj)->as.basic.flags & ROBJECT_EMBED) && + RANY(obj)->as.object.as.heap.ivptr) { + xfree(RANY(obj)->as.object.as.heap.ivptr); + } + break; + case T_MODULE: + case T_CLASS: + rb_clear_cache_by_class((VALUE)obj); + st_free_table(RCLASS_M_TBL(obj)); + if (RCLASS_IV_TBL(obj)) { + st_free_table(RCLASS_IV_TBL(obj)); + } + if (RCLASS_IV_INDEX_TBL(obj)) { + st_free_table(RCLASS_IV_INDEX_TBL(obj)); + } + xfree(RANY(obj)->as.klass.ptr); + break; + case T_STRING: + rb_str_free(obj); + break; + case T_ARRAY: + rb_ary_free(obj); + break; + case T_HASH: + if (RANY(obj)->as.hash.ntbl) { + st_free_table(RANY(obj)->as.hash.ntbl); + } + break; + case T_REGEXP: + if (RANY(obj)->as.regexp.ptr) { + onig_free(RANY(obj)->as.regexp.ptr); + } + break; + case T_DATA: + if (DATA_PTR(obj)) { + if ((long)RANY(obj)->as.data.dfree == -1) { + xfree(DATA_PTR(obj)); + } + else if (RANY(obj)->as.data.dfree) { + make_deferred(RANY(obj)); + return 1; + } + } + break; + case T_MATCH: + if (RANY(obj)->as.match.rmatch) { + struct rmatch *rm = RANY(obj)->as.match.rmatch; + onig_region_free(&rm->regs, 0); + if (rm->char_offset) + xfree(rm->char_offset); + xfree(rm); + } + break; + case T_FILE: + if (RANY(obj)->as.file.fptr) { + make_io_deferred(RANY(obj)); + return 1; + } + break; + case T_RATIONAL: + case T_COMPLEX: + break; + case T_ICLASS: + /* iClass shares table with the module */ + break; + + case T_FLOAT: + break; + + case T_BIGNUM: + if (!(RBASIC(obj)->flags & RBIGNUM_EMBED_FLAG) && RBIGNUM_DIGITS(obj)) { + xfree(RBIGNUM_DIGITS(obj)); + } + break; + case T_NODE: + switch (nd_type(obj)) { + case NODE_SCOPE: + if (RANY(obj)->as.node.u1.tbl) { + xfree(RANY(obj)->as.node.u1.tbl); + } + break; + case NODE_ALLOCA: + xfree(RANY(obj)->as.node.u1.node); + break; + } + break; /* no need to free iv_tbl */ + + case T_STRUCT: + if ((RBASIC(obj)->flags & RSTRUCT_EMBED_LEN_MASK) == 0 && + RANY(obj)->as.rstruct.as.heap.ptr) { + xfree(RANY(obj)->as.rstruct.as.heap.ptr); + } + break; + + default: + rb_bug("gc_sweep(): unknown data type 0x%lx(%p)", + BUILTIN_TYPE(obj), (void*)obj); + } + + return 0; +} + +#define GC_NOTIFY 0 + +void rb_vm_mark(void *ptr); + +static void +mark_current_machine_context(rb_objspace_t *objspace, rb_thread_t *th) +{ + rb_jmp_buf save_regs_gc_mark; + VALUE *stack_start, *stack_end; + + SET_STACK_END; +#if STACK_GROW_DIRECTION < 0 + stack_start = th->machine_stack_end; + stack_end = th->machine_stack_start; +#elif STACK_GROW_DIRECTION > 0 + stack_start = th->machine_stack_start; + stack_end = th->machine_stack_end + 1; +#else + if (th->machine_stack_end < th->machine_stack_start) { + stack_start = th->machine_stack_end; + stack_end = th->machine_stack_start; + } + else { + stack_start = th->machine_stack_start; + stack_end = th->machine_stack_end + 1; + } +#endif + + FLUSH_REGISTER_WINDOWS; + /* This assumes that all registers are saved into the jmp_buf (and stack) */ + rb_setjmp(save_regs_gc_mark); + mark_locations_array(objspace, + (VALUE*)save_regs_gc_mark, + sizeof(save_regs_gc_mark) / sizeof(VALUE)); + + rb_gc_mark_locations(stack_start, stack_end); +#ifdef __ia64 + rb_gc_mark_locations(th->machine_register_stack_start, th->machine_register_stack_end); +#endif +#if defined(__mc68000__) + mark_locations_array((VALUE*)((char*)STACK_END + 2), + (STACK_START - STACK_END)); +#endif +} + +void rb_gc_mark_encodings(void); + +static int +garbage_collect(rb_objspace_t *objspace) +{ + struct gc_list *list; + rb_thread_t *th = GET_THREAD(); + INIT_GC_PROF_PARAMS; + + if (GC_NOTIFY) printf("start garbage_collect()\n"); + + if (!heaps) { + return Qfalse; + } + + if (dont_gc || during_gc) { + if (!freelist) { + if (!heaps_increment(objspace)) { + set_heaps_increment(objspace); + heaps_increment(objspace); + } + } + return Qtrue; + } + during_gc++; + objspace->count++; + + GC_PROF_TIMER_START; + GC_PROF_MARK_TIMER_START; + SET_STACK_END; + + init_mark_stack(objspace); + + th->vm->self ? rb_gc_mark(th->vm->self) : rb_vm_mark(th->vm); + + if (finalizer_table) { + mark_tbl(objspace, finalizer_table, 0); + } + + mark_current_machine_context(objspace, th); + + rb_gc_mark_threads(); + rb_gc_mark_symbols(); + rb_gc_mark_encodings(); + + /* mark protected global variables */ + for (list = global_List; list; list = list->next) { + rb_gc_mark_maybe(*list->varptr); + } + rb_mark_end_proc(); + rb_gc_mark_global_tbl(); + + mark_tbl(objspace, rb_class_tbl, 0); + + /* mark generic instance variables for special constants */ + rb_mark_generic_ivar_tbl(); + + rb_gc_mark_parser(); + + /* gc_mark objects whose marking are not completed*/ + while (!MARK_STACK_EMPTY) { + if (mark_stack_overflow) { + gc_mark_all(objspace); + } + else { + gc_mark_rest(objspace); + } + } + GC_PROF_MARK_TIMER_STOP; + + GC_PROF_SWEEP_TIMER_START; + gc_sweep(objspace); + GC_PROF_SWEEP_TIMER_STOP; + + GC_PROF_TIMER_STOP; + if (GC_NOTIFY) printf("end garbage_collect()\n"); + return Qtrue; +} + +int +rb_garbage_collect(void) +{ + return garbage_collect(&rb_objspace); +} + +void +rb_gc_mark_machine_stack(rb_thread_t *th) +{ + rb_objspace_t *objspace = &rb_objspace; +#if STACK_GROW_DIRECTION < 0 + rb_gc_mark_locations(th->machine_stack_end, th->machine_stack_start); +#elif STACK_GROW_DIRECTION > 0 + rb_gc_mark_locations(th->machine_stack_start, th->machine_stack_end); +#else + if (th->machine_stack_start < th->machine_stack_end) { + rb_gc_mark_locations(th->machine_stack_start, th->machine_stack_end); + } + else { + rb_gc_mark_locations(th->machine_stack_end, th->machine_stack_start); + } +#endif +#ifdef __ia64 + rb_gc_mark_locations(th->machine_register_stack_start, th->machine_register_stack_end); +#endif +} + + +/* + * call-seq: + * GC.start => nil + * gc.garbage_collect => nil + * ObjectSpace.garbage_collect => nil + * + * Initiates garbage collection, unless manually disabled. + * + */ + +VALUE +rb_gc_start(void) +{ + rb_gc(); + return Qnil; +} + +#undef Init_stack + +void +Init_stack(VALUE *addr) +{ + ruby_init_stack(addr); +} + +/* + * Document-class: ObjectSpace + * + * The ObjectSpace module contains a number of routines + * that interact with the garbage collection facility and allow you to + * traverse all living objects with an iterator. + * + * ObjectSpace also provides support for object + * finalizers, procs that will be called when a specific object is + * about to be destroyed by garbage collection. + * + * include ObjectSpace + * + * + * a = "A" + * b = "B" + * c = "C" + * + * + * define_finalizer(a, proc {|id| puts "Finalizer one on #{id}" }) + * define_finalizer(a, proc {|id| puts "Finalizer two on #{id}" }) + * define_finalizer(b, proc {|id| puts "Finalizer three on #{id}" }) + * + * produces: + * + * Finalizer three on 537763470 + * Finalizer one on 537763480 + * Finalizer two on 537763480 + * + */ + +void +Init_heap(void) +{ + init_heap(&rb_objspace); +} + +static VALUE +os_obj_of(rb_objspace_t *objspace, VALUE of) +{ + size_t i; + size_t n = 0; + RVALUE *membase = 0; + RVALUE *p, *pend; + volatile VALUE v; + + rb_garbage_collect(); + i = 0; + while (i < heaps_used) { + while (0 < i && (uintptr_t)membase < (uintptr_t)heaps[i-1].membase) + i--; + while (i < heaps_used && (uintptr_t)heaps[i].membase <= (uintptr_t)membase ) + i++; + if (heaps_used <= i) + break; + membase = heaps[i].membase; + + p = heaps[i].slot; pend = p + heaps[i].limit; + for (;p < pend; p++) { + if (p->as.basic.flags) { + switch (BUILTIN_TYPE(p)) { + case T_NONE: + case T_ICLASS: + case T_NODE: + case T_ZOMBIE: + continue; + case T_CLASS: + if (FL_TEST(p, FL_SINGLETON)) continue; + default: + if (!p->as.basic.klass) continue; + v = (VALUE)p; + if (!of || rb_obj_is_kind_of(v, of)) { + rb_yield(v); + n++; + } + } + } + } + } + + return SIZET2NUM(n); +} + +/* + * call-seq: + * ObjectSpace.each_object([module]) {|obj| ... } => fixnum + * + * Calls the block once for each living, nonimmediate object in this + * Ruby process. If module is specified, calls the block + * for only those classes or modules that match (or are a subclass of) + * module. Returns the number of objects found. Immediate + * objects (Fixnums, Symbols + * true, false, and nil) are + * never returned. In the example below, each_object + * returns both the numbers we defined and several constants defined in + * the Math module. + * + * a = 102.7 + * b = 95 # Won't be returned + * c = 12345678987654321 + * count = ObjectSpace.each_object(Numeric) {|x| p x } + * puts "Total count: #{count}" + * + * produces: + * + * 12345678987654321 + * 102.7 + * 2.71828182845905 + * 3.14159265358979 + * 2.22044604925031e-16 + * 1.7976931348623157e+308 + * 2.2250738585072e-308 + * Total count: 7 + * + */ + +static VALUE +os_each_obj(int argc, VALUE *argv, VALUE os) +{ + VALUE of; + + rb_secure(4); + if (argc == 0) { + of = 0; + } + else { + rb_scan_args(argc, argv, "01", &of); + } + RETURN_ENUMERATOR(os, 1, &of); + return os_obj_of(&rb_objspace, of); +} + +/* + * call-seq: + * ObjectSpace.undefine_finalizer(obj) + * + * Removes all finalizers for obj. + * + */ + +static VALUE +undefine_final(VALUE os, VALUE obj) +{ + rb_objspace_t *objspace = &rb_objspace; + if (finalizer_table) { + st_delete(finalizer_table, (st_data_t*)&obj, 0); + } + return obj; +} + +/* + * call-seq: + * ObjectSpace.define_finalizer(obj, aProc=proc()) + * + * Adds aProc as a finalizer, to be called after obj + * was destroyed. + * + */ + +static VALUE +define_final(int argc, VALUE *argv, VALUE os) +{ + rb_objspace_t *objspace = &rb_objspace; + VALUE obj, block, table; + + rb_scan_args(argc, argv, "11", &obj, &block); + if (argc == 1) { + block = rb_block_proc(); + } + else if (!rb_respond_to(block, rb_intern("call"))) { + rb_raise(rb_eArgError, "wrong type argument %s (should be callable)", + rb_obj_classname(block)); + } + if (!FL_ABLE(obj)) { + rb_raise(rb_eArgError, "cannot define finalizer for %s", + rb_obj_classname(obj)); + } + RBASIC(obj)->flags |= FL_FINALIZE; + + block = rb_ary_new3(2, INT2FIX(rb_safe_level()), block); + OBJ_FREEZE(block); + + if (!finalizer_table) { + finalizer_table = st_init_numtable(); + } + if (st_lookup(finalizer_table, obj, &table)) { + rb_ary_push(table, block); + } + else { + table = rb_ary_new3(1, block); + RBASIC(table)->klass = 0; + st_add_direct(finalizer_table, obj, table); + } + return block; +} + +void +rb_gc_copy_finalizer(VALUE dest, VALUE obj) +{ + rb_objspace_t *objspace = &rb_objspace; + VALUE table; + + if (!finalizer_table) return; + if (!FL_TEST(obj, FL_FINALIZE)) return; + if (st_lookup(finalizer_table, obj, &table)) { + st_insert(finalizer_table, dest, table); + } + FL_SET(dest, FL_FINALIZE); +} + +static VALUE +run_single_final(VALUE arg) +{ + VALUE *args = (VALUE *)arg; + rb_eval_cmd(args[0], args[1], (int)args[2]); + return Qnil; +} + +static void +run_final(rb_objspace_t *objspace, VALUE obj) +{ + long i; + int status; + VALUE args[3], table, objid; + + objid = rb_obj_id(obj); /* make obj into id */ + RBASIC(obj)->klass = 0; + + if (RDATA(obj)->dfree) { + (*RDATA(obj)->dfree)(DATA_PTR(obj)); + } + + if (finalizer_table && + st_delete(finalizer_table, (st_data_t*)&obj, &table)) { + args[1] = 0; + args[2] = (VALUE)rb_safe_level(); + if (!args[1] && RARRAY_LEN(table) > 0) { + args[1] = rb_obj_freeze(rb_ary_new3(1, objid)); + } + for (i=0; ias.basic.flags & FL_FINALIZE) { + if (BUILTIN_TYPE(p) != T_ZOMBIE) { + p->as.free.flags = FL_MARK | T_ZOMBIE; /* remain marked */ + RDATA(p)->dfree = 0; + } + p->as.free.next = *final_list; + *final_list = p; + return ST_CONTINUE; + } + else { + return ST_DELETE; + } +} + +void +rb_gc_call_finalizer_at_exit(void) +{ + rb_objspace_t *objspace = &rb_objspace; + RVALUE *p, *pend; + RVALUE *final_list = 0; + size_t i; + + /* run finalizers */ + if (finalizer_table) { + finalize_deferred(objspace); + while (finalizer_table->num_entries > 0) { + st_foreach(finalizer_table, chain_finalized_object, + (st_data_t)&final_list); + if (!(p = final_list)) break; + do { + final_list = p->as.free.next; + run_final(objspace, (VALUE)p); + } while ((p = final_list) != 0); + } + st_free_table(finalizer_table); + finalizer_table = 0; + } + /* finalizers are part of garbage collection */ + during_gc++; + /* run data object's finalizers */ + for (i = 0; i < heaps_used; i++) { + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if (BUILTIN_TYPE(p) == T_DATA && + DATA_PTR(p) && RANY(p)->as.data.dfree && + RANY(p)->as.basic.klass != rb_cThread && RANY(p)->as.basic.klass != rb_cMutex) { + p->as.free.flags = 0; + if ((long)RANY(p)->as.data.dfree == -1) { + xfree(DATA_PTR(p)); + } + else if (RANY(p)->as.data.dfree) { + make_deferred(RANY(p)); + RANY(p)->as.free.next = final_list; + final_list = p; + } + } + else if (BUILTIN_TYPE(p) == T_FILE) { + if (RANY(p)->as.file.fptr) { + make_io_deferred(RANY(p)); + RANY(p)->as.free.next = final_list; + final_list = p; + } + } + p++; + } + } + during_gc = 0; + if (final_list) { + finalize_list(objspace, final_list); + } +} + +void +rb_gc(void) +{ + rb_objspace_t *objspace = &rb_objspace; + garbage_collect(objspace); + gc_finalize_deferred(objspace); +} + +/* + * call-seq: + * ObjectSpace._id2ref(object_id) -> an_object + * + * Converts an object id to a reference to the object. May not be + * called on an object id passed as a parameter to a finalizer. + * + * s = "I am a string" #=> "I am a string" + * r = ObjectSpace._id2ref(s.object_id) #=> "I am a string" + * r == s #=> true + * + */ + +static VALUE +id2ref(VALUE obj, VALUE objid) +{ +#if SIZEOF_LONG == SIZEOF_VOIDP +#define NUM2PTR(x) NUM2ULONG(x) +#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP +#define NUM2PTR(x) NUM2ULL(x) +#endif + rb_objspace_t *objspace = &rb_objspace; + VALUE ptr; + void *p0; + + rb_secure(4); + ptr = NUM2PTR(objid); + p0 = (void *)ptr; + + if (ptr == Qtrue) return Qtrue; + if (ptr == Qfalse) return Qfalse; + if (ptr == Qnil) return Qnil; + if (FIXNUM_P(ptr)) return (VALUE)ptr; + ptr = objid ^ FIXNUM_FLAG; /* unset FIXNUM_FLAG */ + + if ((ptr % sizeof(RVALUE)) == (4 << 2)) { + ID symid = ptr / sizeof(RVALUE); + if (rb_id2name(symid) == 0) + rb_raise(rb_eRangeError, "%p is not symbol id value", p0); + return ID2SYM(symid); + } + + if (!is_pointer_to_heap(objspace, (void *)ptr) || + BUILTIN_TYPE(ptr) > T_FIXNUM || BUILTIN_TYPE(ptr) == T_ICLASS) { + rb_raise(rb_eRangeError, "%p is not id value", p0); + } + if (BUILTIN_TYPE(ptr) == 0 || RBASIC(ptr)->klass == 0) { + rb_raise(rb_eRangeError, "%p is recycled object", p0); + } + return (VALUE)ptr; +} + +/* + * Document-method: __id__ + * Document-method: object_id + * + * call-seq: + * obj.__id__ => fixnum + * obj.object_id => fixnum + * + * Returns an integer identifier for obj. The same number will + * be returned on all calls to id for a given object, and + * no two active objects will share an id. + * Object#object_id is a different concept from the + * :name notation, which returns the symbol id of + * name. Replaces the deprecated Object#id. + */ + +/* + * call-seq: + * obj.hash => fixnum + * + * Generates a Fixnum hash value for this object. This + * function must have the property that a.eql?(b) implies + * a.hash == b.hash. The hash value is used by class + * Hash. Any hash value that exceeds the capacity of a + * Fixnum will be truncated before being used. + */ + +VALUE +rb_obj_id(VALUE obj) +{ + /* + * 32-bit VALUE space + * MSB ------------------------ LSB + * false 00000000000000000000000000000000 + * true 00000000000000000000000000000010 + * nil 00000000000000000000000000000100 + * undef 00000000000000000000000000000110 + * symbol ssssssssssssssssssssssss00001110 + * object oooooooooooooooooooooooooooooo00 = 0 (mod sizeof(RVALUE)) + * fixnum fffffffffffffffffffffffffffffff1 + * + * object_id space + * LSB + * false 00000000000000000000000000000000 + * true 00000000000000000000000000000010 + * nil 00000000000000000000000000000100 + * undef 00000000000000000000000000000110 + * symbol 000SSSSSSSSSSSSSSSSSSSSSSSSSSS0 S...S % A = 4 (S...S = s...s * A + 4) + * object oooooooooooooooooooooooooooooo0 o...o % A = 0 + * fixnum fffffffffffffffffffffffffffffff1 bignum if required + * + * where A = sizeof(RVALUE)/4 + * + * sizeof(RVALUE) is + * 20 if 32-bit, double is 4-byte aligned + * 24 if 32-bit, double is 8-byte aligned + * 40 if 64-bit + */ + if (TYPE(obj) == T_SYMBOL) { + return (SYM2ID(obj) * sizeof(RVALUE) + (4 << 2)) | FIXNUM_FLAG; + } + if (SPECIAL_CONST_P(obj)) { + return LONG2NUM((SIGNED_VALUE)obj); + } + return (VALUE)((SIGNED_VALUE)obj|FIXNUM_FLAG); +} + +static int +set_zero(st_data_t key, st_data_t val, st_data_t arg) +{ + VALUE k = (VALUE)key; + VALUE hash = (VALUE)arg; + rb_hash_aset(hash, k, INT2FIX(0)); + return ST_CONTINUE; +} + +/* + * call-seq: + * ObjectSpace.count_objects([result_hash]) -> hash + * + * Counts objects for each type. + * + * It returns a hash as: + * {:TOTAL=>10000, :FREE=>3011, :T_OBJECT=>6, :T_CLASS=>404, ...} + * + * If the optional argument, result_hash, is given, + * it is overwritten and returned. + * This is intended to avoid probe effect. + * + * The contents of the returned hash is implementation defined. + * It may be changed in future. + * + * This method is not expected to work except C Ruby. + * + */ + +static VALUE +count_objects(int argc, VALUE *argv, VALUE os) +{ + rb_objspace_t *objspace = &rb_objspace; + size_t counts[T_MASK+1]; + size_t freed = 0; + size_t total = 0; + size_t i; + VALUE hash; + + if (rb_scan_args(argc, argv, "01", &hash) == 1) { + if (TYPE(hash) != T_HASH) + rb_raise(rb_eTypeError, "non-hash given"); + } + + for (i = 0; i <= T_MASK; i++) { + counts[i] = 0; + } + + for (i = 0; i < heaps_used; i++) { + RVALUE *p, *pend; + + p = heaps[i].slot; pend = p + heaps[i].limit; + for (;p < pend; p++) { + if (p->as.basic.flags) { + counts[BUILTIN_TYPE(p)]++; + } + else { + freed++; + } + } + total += heaps[i].limit; + } + + if (hash == Qnil) { + hash = rb_hash_new(); + } + else if (!RHASH_EMPTY_P(hash)) { + st_foreach(RHASH_TBL(hash), set_zero, hash); + } + rb_hash_aset(hash, ID2SYM(rb_intern("TOTAL")), SIZET2NUM(total)); + rb_hash_aset(hash, ID2SYM(rb_intern("FREE")), SIZET2NUM(freed)); + + for (i = 0; i <= T_MASK; i++) { + VALUE type; + switch (i) { +#define COUNT_TYPE(t) case t: type = ID2SYM(rb_intern(#t)); break; + COUNT_TYPE(T_NONE); + COUNT_TYPE(T_OBJECT); + COUNT_TYPE(T_CLASS); + COUNT_TYPE(T_MODULE); + COUNT_TYPE(T_FLOAT); + COUNT_TYPE(T_STRING); + COUNT_TYPE(T_REGEXP); + COUNT_TYPE(T_ARRAY); + COUNT_TYPE(T_HASH); + COUNT_TYPE(T_STRUCT); + COUNT_TYPE(T_BIGNUM); + COUNT_TYPE(T_FILE); + COUNT_TYPE(T_DATA); + COUNT_TYPE(T_MATCH); + COUNT_TYPE(T_COMPLEX); + COUNT_TYPE(T_RATIONAL); + COUNT_TYPE(T_NIL); + COUNT_TYPE(T_TRUE); + COUNT_TYPE(T_FALSE); + COUNT_TYPE(T_SYMBOL); + COUNT_TYPE(T_FIXNUM); + COUNT_TYPE(T_UNDEF); + COUNT_TYPE(T_NODE); + COUNT_TYPE(T_ICLASS); + COUNT_TYPE(T_ZOMBIE); +#undef COUNT_TYPE + default: type = INT2NUM(i); break; + } + if (counts[i]) + rb_hash_aset(hash, type, SIZET2NUM(counts[i])); + } + + return hash; +} + +/* + * call-seq: + * GC.count -> Integer + * + * The number of times GC occured. + * + * It returns the number of times GC occured since the process started. + * + */ + +static VALUE +gc_count(VALUE self) +{ + return UINT2NUM((&rb_objspace)->count); +} + +#if CALC_EXACT_MALLOC_SIZE +/* + * call-seq: + * GC.malloc_allocated_size -> Integer + * + * The allocated size by malloc(). + * + * It returns the allocated size by malloc(). + */ + +static VALUE +gc_malloc_allocated_size(VALUE self) +{ + return UINT2NUM((&rb_objspace)->malloc_params.allocated_size); +} + +/* + * call-seq: + * GC.malloc_allocations -> Integer + * + * The number of allocated memory object by malloc(). + * + * It returns the number of allocated memory object by malloc(). + */ + +static VALUE +gc_malloc_allocations(VALUE self) +{ + return UINT2NUM((&rb_objspace)->malloc_params.allocations); +} +#endif + +static VALUE +gc_profile_record_get(void) +{ + VALUE prof; + VALUE gc_profile = rb_ary_new(); + size_t i; + rb_objspace_t *objspace = (&rb_objspace); + + if (!objspace->profile.run) { + return Qnil; + } + + for (i =0; i < objspace->profile.count; i++) { + prof = rb_hash_new(); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_TIME")), DBL2NUM(objspace->profile.record[i].gc_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_INVOKE_TIME")), DBL2NUM(objspace->profile.record[i].gc_invoke_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SIZE")), rb_uint2inum(objspace->profile.record[i].heap_use_size)); + rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")), rb_uint2inum(objspace->profile.record[i].heap_total_size)); + rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")), rb_uint2inum(objspace->profile.record[i].heap_total_objects)); +#if GC_PROFILE_MORE_DETAIL + rb_hash_aset(prof, ID2SYM(rb_intern("GC_MARK_TIME")), DBL2NUM(objspace->profile.record[i].gc_mark_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_SWEEP_TIME")), DBL2NUM(objspace->profile.record[i].gc_sweep_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("ALLOCATE_INCREASE")), rb_uint2inum(objspace->profile.record[i].allocate_increase)); + rb_hash_aset(prof, ID2SYM(rb_intern("ALLOCATE_LIMIT")), rb_uint2inum(objspace->profile.record[i].allocate_limit)); + rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SLOTS")), rb_uint2inum(objspace->profile.record[i].heap_use_slots)); + rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_LIVE_OBJECTS")), rb_uint2inum(objspace->profile.record[i].heap_live_objects)); + rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_FREE_OBJECTS")), rb_uint2inum(objspace->profile.record[i].heap_free_objects)); + rb_hash_aset(prof, ID2SYM(rb_intern("HAVE_FINALIZE")), objspace->profile.record[i].have_finalize); +#endif + rb_ary_push(gc_profile, prof); + } + + return gc_profile; +} + +/* + * call-seq: + * GC::Profiler.result -> string + * + * Report profile data to string. + * + * It returns a string as: + * GC 1 invokes. + * Index Invoke Time(sec) Use Size(byte) Total Size(byte) Total Object GC time(ms) + * 1 0.012 159240 212940 10647 0.00000000000001530000 + */ + +static VALUE +gc_profile_result(void) +{ + rb_objspace_t *objspace = &rb_objspace; + VALUE record; + VALUE result; + int i; + + record = gc_profile_record_get(); + if (objspace->profile.run && objspace->profile.count) { + result = rb_sprintf("GC %d invokes.\n", NUM2INT(gc_count(0))); + rb_str_cat2(result, "Index Invoke Time(sec) Use Size(byte) Total Size(byte) Total Object GC Time(ms)\n"); + for (i = 0; i < (int)RARRAY_LEN(record); i++) { + VALUE r = RARRAY_PTR(record)[i]; + rb_str_catf(result, "%5d %19.3f %20d %20d %20d %30.20f\n", + i+1, NUM2DBL(rb_hash_aref(r, ID2SYM(rb_intern("GC_INVOKE_TIME")))), + NUM2INT(rb_hash_aref(r, ID2SYM(rb_intern("HEAP_USE_SIZE")))), + NUM2INT(rb_hash_aref(r, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")))), + NUM2INT(rb_hash_aref(r, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")))), + NUM2DBL(rb_hash_aref(r, ID2SYM(rb_intern("GC_TIME"))))*1000); + } +#if GC_PROFILE_MORE_DETAIL + rb_str_cat2(result, "\n\n"); + rb_str_cat2(result, "More detail.\n"); + rb_str_cat2(result, "Index Allocate Increase Allocate Limit Use Slot Have Finalize Mark Time(ms) Sweep Time(ms)\n"); + for (i = 0; i < (int)RARRAY_LEN(record); i++) { + VALUE r = RARRAY_PTR(record)[i]; + rb_str_catf(result, "%5d %17d %17d %9d %14s %25.20f %25.20f\n", + i+1, NUM2INT(rb_hash_aref(r, ID2SYM(rb_intern("ALLOCATE_INCREASE")))), + NUM2INT(rb_hash_aref(r, ID2SYM(rb_intern("ALLOCATE_LIMIT")))), + NUM2INT(rb_hash_aref(r, ID2SYM(rb_intern("HEAP_USE_SLOTS")))), + rb_hash_aref(r, ID2SYM(rb_intern("HAVE_FINALIZE")))? "true" : "false", + NUM2DBL(rb_hash_aref(r, ID2SYM(rb_intern("GC_MARK_TIME"))))*1000, + NUM2DBL(rb_hash_aref(r, ID2SYM(rb_intern("GC_SWEEP_TIME"))))*1000); + } +#endif + } + else { + result = rb_str_new2(""); + } + return result; +} + + +/* + * call-seq: + * GC::Profiler.report + * + * GC::Profiler.result display + * + */ + +static VALUE +gc_profile_report(int argc, VALUE *argv, VALUE self) +{ + VALUE out; + + if (argc == 0) { + out = rb_stdout; + } + else { + rb_scan_args(argc, argv, "01", &out); + } + rb_io_write(out, gc_profile_result()); + + return Qnil; +} + + +/* + * The GC module provides an interface to Ruby's mark and + * sweep garbage collection mechanism. Some of the underlying methods + * are also available via the ObjectSpace module. + */ + +void +Init_GC(void) +{ + VALUE rb_mObSpace; + VALUE rb_mProfiler; + + rb_mGC = rb_define_module("GC"); + rb_define_singleton_method(rb_mGC, "start", rb_gc_start, 0); + rb_define_singleton_method(rb_mGC, "enable", rb_gc_enable, 0); + rb_define_singleton_method(rb_mGC, "disable", rb_gc_disable, 0); + rb_define_singleton_method(rb_mGC, "stress", gc_stress_get, 0); + rb_define_singleton_method(rb_mGC, "stress=", gc_stress_set, 1); + rb_define_singleton_method(rb_mGC, "count", gc_count, 0); + rb_define_method(rb_mGC, "garbage_collect", rb_gc_start, 0); + + rb_mProfiler = rb_define_module_under(rb_mGC, "Profiler"); + rb_define_singleton_method(rb_mProfiler, "enabled?", gc_profile_enable_get, 0); + rb_define_singleton_method(rb_mProfiler, "enable", gc_profile_enable, 0); + rb_define_singleton_method(rb_mProfiler, "disable", gc_profile_disable, 0); + rb_define_singleton_method(rb_mProfiler, "clear", gc_profile_clear, 0); + rb_define_singleton_method(rb_mProfiler, "result", gc_profile_result, 0); + rb_define_singleton_method(rb_mProfiler, "report", gc_profile_report, -1); + + rb_mObSpace = rb_define_module("ObjectSpace"); + rb_define_module_function(rb_mObSpace, "each_object", os_each_obj, -1); + rb_define_module_function(rb_mObSpace, "garbage_collect", rb_gc_start, 0); + + rb_define_module_function(rb_mObSpace, "define_finalizer", define_final, -1); + rb_define_module_function(rb_mObSpace, "undefine_finalizer", undefine_final, 1); + + rb_define_module_function(rb_mObSpace, "_id2ref", id2ref, 1); + + nomem_error = rb_exc_new3(rb_eNoMemError, + rb_obj_freeze(rb_str_new2("failed to allocate memory"))); + OBJ_TAINT(nomem_error); + OBJ_FREEZE(nomem_error); + + rb_define_method(rb_mKernel, "hash", rb_obj_id, 0); + rb_define_method(rb_mKernel, "__id__", rb_obj_id, 0); + rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); + +#if CALC_EXACT_MALLOC_SIZE + rb_define_singleton_method(rb_mGC, "malloc_allocated_size", gc_malloc_allocated_size, 0); + rb_define_singleton_method(rb_mGC, "malloc_allocations", gc_malloc_allocations, 0); +#endif +} diff --git a/gc.h b/gc.h new file mode 100644 index 0000000..29fd6b4 --- /dev/null +++ b/gc.h @@ -0,0 +1,75 @@ + +#ifndef RUBY_GC_H +#define RUBY_GC_H 1 + +#if defined(__i386) && defined(__GNUC__) +#define SET_MACHINE_STACK_END(p) __asm__("mov %%esp, %0" : "=r" (*p)) +#else +NOINLINE(void rb_gc_set_stack_end(VALUE **stack_end_p)); +#define SET_MACHINE_STACK_END(p) rb_gc_set_stack_end(p) +#define USE_CONSERVATIVE_STACK_END +#endif + +/* for GC debug */ + +#ifndef RUBY_MARK_FREE_DEBUG +#define RUBY_MARK_FREE_DEBUG 0 +#endif + +#if RUBY_MARK_FREE_DEBUG +extern int ruby_gc_debug_indent; + +static void +rb_gc_debug_indent(void) +{ + printf("%*s", ruby_gc_debug_indent, ""); +} + +static void +rb_gc_debug_body(char *mode, char *msg, int st, void *ptr) +{ + if (st == 0) { + ruby_gc_debug_indent--; + } + rb_gc_debug_indent(); + printf("%s: %s %s (%p)\n", mode, st ? "->" : "<-", msg, ptr); + + if (st) { + ruby_gc_debug_indent++; + } + + fflush(stdout); +} + +#define RUBY_MARK_ENTER(msg) rb_gc_debug_body("mark", msg, 1, ptr) +#define RUBY_MARK_LEAVE(msg) rb_gc_debug_body("mark", msg, 0, ptr) +#define RUBY_FREE_ENTER(msg) rb_gc_debug_body("free", msg, 1, ptr) +#define RUBY_FREE_LEAVE(msg) rb_gc_debug_body("free", msg, 0, ptr) +#define RUBY_GC_INFO rb_gc_debug_indent(); printf + +#else +#define RUBY_MARK_ENTER(msg) +#define RUBY_MARK_LEAVE(msg) +#define RUBY_FREE_ENTER(msg) +#define RUBY_FREE_LEAVE(msg) +#define RUBY_GC_INFO if(0)printf +#endif + +#define RUBY_MARK_UNLESS_NULL(ptr) if(RTEST(ptr)){rb_gc_mark(ptr);} +#define RUBY_FREE_UNLESS_NULL(ptr) if(ptr){ruby_xfree(ptr);} + +#if STACK_GROW_DIRECTION > 0 +# define STACK_UPPER(x, a, b) a +#elif STACK_GROW_DIRECTION < 0 +# define STACK_UPPER(x, a, b) b +#else +RUBY_EXTERN int ruby_stack_grow_direction; +int ruby_get_stack_grow_direction(VALUE *addr); +# define stack_growup_p(x) ( \ + (ruby_stack_grow_direction ? \ + ruby_stack_grow_direction : \ + ruby_get_stack_grow_direction(x)) > 0) +# define STACK_UPPER(x, a, b) (stack_growup_p(x) ? a : b) +#endif + +#endif /* RUBY_GC_H */ diff --git a/gem_prelude.rb b/gem_prelude.rb new file mode 100644 index 0000000..b5da35e --- /dev/null +++ b/gem_prelude.rb @@ -0,0 +1,364 @@ +# depends on: array.rb dir.rb env.rb file.rb hash.rb module.rb regexp.rb +# vim: filetype=ruby +# THIS FILE WAS AUTOGENERATED, DO NOT EDIT + +# NOTICE: Ruby is during initialization here. +# * Encoding.default_external does not reflects -E. +# * Should not expect Encoding.default_internal. +# * Locale encoding is available. +if defined?(Gem) then + + module Kernel + + def gem(gem_name, *version_requirements) + Gem.push_gem_version_on_load_path(gem_name, *version_requirements) + end + + end + + module Gem + + class LoadError < ::LoadError + end + + ConfigMap = { + :sitedir => RbConfig::CONFIG["sitedir"], + :ruby_version => RbConfig::CONFIG["ruby_version"], + :libdir => RbConfig::CONFIG["libdir"], + :sitelibdir => RbConfig::CONFIG["sitelibdir"], + :arch => RbConfig::CONFIG["arch"], + :bindir => RbConfig::CONFIG["bindir"], + :EXEEXT => RbConfig::CONFIG["EXEEXT"], + :RUBY_SO_NAME => RbConfig::CONFIG["RUBY_SO_NAME"], + :ruby_install_name => RbConfig::CONFIG["ruby_install_name"] + } + + def self.dir + @gem_home ||= nil + set_home(ENV['GEM_HOME'] || default_dir) unless @gem_home + @gem_home + end + + def self.path + @gem_path ||= nil + unless @gem_path + paths = [ENV['GEM_PATH'] || default_path] + paths << APPLE_GEM_HOME if defined? APPLE_GEM_HOME + set_paths(paths.compact.join(File::PATH_SEPARATOR)) + end + @gem_path + end + + def self.post_install(&hook) + @post_install_hooks << hook + end + + def self.post_uninstall(&hook) + @post_uninstall_hooks << hook + end + + def self.pre_install(&hook) + @pre_install_hooks << hook + end + + def self.pre_uninstall(&hook) + @pre_uninstall_hooks << hook + end + + def self.set_home(home) + @gem_home = home + ensure_gem_subdirectories(@gem_home) + end + + def self.set_paths(gpaths) + if gpaths + @gem_path = gpaths.split(File::PATH_SEPARATOR) + @gem_path << Gem.dir + else + @gem_path = [Gem.dir] + end + @gem_path.uniq! + @gem_path.each do |gp| ensure_gem_subdirectories(gp) end + end + + def self.ensure_gem_subdirectories(path) + end + + def self.user_home + @user_home ||= File.expand_path("~") + rescue + if File::ALT_SEPARATOR then + "C:/" + else + "/" + end + end + + + @post_install_hooks ||= [] + @post_uninstall_hooks ||= [] + @pre_uninstall_hooks ||= [] + @pre_install_hooks ||= [] + + ## + # An Array of the default sources that come with RubyGems + + def self.default_sources + %w[http://gems.rubyforge.org/] + end + + ## + # Default home directory path to be used if an alternate value is not + # specified in the environment + + def self.default_dir + if defined? RUBY_FRAMEWORK_VERSION then + File.join File.dirname(ConfigMap[:sitedir]), 'Gems', + ConfigMap[:ruby_version] + elsif RUBY_VERSION > '1.9' then + File.join(ConfigMap[:libdir], ConfigMap[:ruby_install_name], 'gems', + ConfigMap[:ruby_version]) + else + File.join(ConfigMap[:libdir], ruby_engine, 'gems', + ConfigMap[:ruby_version]) + end + end + + ## + # Path for gems in the user's home directory + + def self.user_dir + File.join(Gem.user_home, '.gem', ruby_engine, + ConfigMap[:ruby_version]) + end + + ## + # Default gem load path + + def self.default_path + [user_dir, default_dir] + end + + ## + # Deduce Ruby's --program-prefix and --program-suffix from its install name + + def self.default_exec_format + baseruby = ConfigMap[:BASERUBY] || 'ruby' + ConfigMap[:RUBY_INSTALL_NAME].sub(baseruby, '%s') rescue '%s' + end + + ## + # The default directory for binaries + + def self.default_bindir + if defined? RUBY_FRAMEWORK_VERSION then # mac framework support + '/usr/bin' + else # generic install + ConfigMap[:bindir] + end + end + + ## + # The default system-wide source info cache directory + + def self.default_system_source_cache_dir + File.join Gem.dir, 'source_cache' + end + + ## + # The default user-specific source info cache directory + + def self.default_user_source_cache_dir + File.join Gem.user_home, '.gem', 'source_cache' + end + + ## + # A wrapper around RUBY_ENGINE const that may not be defined + + def self.ruby_engine + if defined? RUBY_ENGINE then + RUBY_ENGINE + else + 'ruby' + end + end + + + + # Methods before this line will be removed when QuickLoader is replaced + # with the real RubyGems + + GEM_PRELUDE_METHODS = Gem.methods(false) + + begin + verbose, debug = $VERBOSE, $DEBUG + $DEBUG = $VERBOSE = nil + + begin + require 'rubygems/defaults/operating_system' + rescue ::LoadError + end + + if defined?(RUBY_ENGINE) then + begin + require "rubygems/defaults/#{RUBY_ENGINE}" + rescue ::LoadError + end + end + ensure + $VERBOSE, $DEBUG = verbose, debug + end + + module QuickLoader + + @loaded_full_rubygems_library = false + + def self.load_full_rubygems_library + return if @loaded_full_rubygems_library + + @loaded_full_rubygems_library = true + + class << Gem + Gem::GEM_PRELUDE_METHODS.each do |method_name| + undef_method method_name + end + end + + Kernel.module_eval do + undef_method :gem if method_defined? :gem + end + + $".delete File.join(Gem::ConfigMap[:libdir], + Gem::ConfigMap[:ruby_install_name], + Gem::ConfigMap[:ruby_version], 'rubygems.rb') + + require 'rubygems' + end + + GemPaths = {} + GemVersions = {} + + def push_gem_version_on_load_path(gem_name, *version_requirements) + if version_requirements.empty? + unless GemPaths.has_key?(gem_name) + raise Gem::LoadError.new("Could not find RubyGem #{gem_name} (>= 0)\n") + end + + # highest version gems already active + return false + else + if version_requirements.length > 1 + QuickLoader.load_full_rubygems_library + return gem(gem_name, *version_requirements) + end + + requirement, version = version_requirements[0].split + requirement.strip! + + if loaded_version = GemVersions[gem_name] + case requirement + when ">", ">=" + if (loaded_version <=> Gem.calculate_integers_for_gem_version(version)) >= 0 + return false + end + when "~>" + required_version = Gem.calculate_integers_for_gem_version(version) + if (loaded_version[0] == required_version[0]) + return false + end + end + end + + QuickLoader.load_full_rubygems_library + gem(gem_name, *version_requirements) + end + end + + def calculate_integers_for_gem_version(gem_version) + numbers = gem_version.split(".").collect {|n| n.to_i} + numbers.pop while numbers.last == 0 + numbers << 0 if numbers.empty? + numbers + end + + def push_all_highest_version_gems_on_load_path + Gem.path.each do |path| + gems_directory = File.join(path, "gems") + if File.exist?(gems_directory) + Dir.entries(gems_directory).each do |gem_directory_name| + next if gem_directory_name == "." || gem_directory_name == ".." + dash = gem_directory_name.rindex("-") + next if dash.nil? + gem_name = gem_directory_name[0...dash] + current_version = GemVersions[gem_name] + new_version = calculate_integers_for_gem_version(gem_directory_name[dash+1..-1]) + if current_version + if (current_version <=> new_version) == -1 + GemVersions[gem_name] = new_version + GemPaths[gem_name] = File.join(gems_directory, gem_directory_name) + end + else + GemVersions[gem_name] = new_version + GemPaths[gem_name] = File.join(gems_directory, gem_directory_name) + end + end + end + end + + require_paths = [] + + GemPaths.each_value do |path| + if File.exist?(file = File.join(path, ".require_paths")) then + paths = File.read(file).split.map do |require_path| + File.join path, require_path + end + + require_paths.concat paths + else + require_paths << file if File.exist?(file = File.join(path, "bin")) + require_paths << file if File.exist?(file = File.join(path, "lib")) + end + end + + # "tag" the first require_path inserted into the $LOAD_PATH to enable + # indexing correctly with rubygems proper when it inserts an explicitly + # gem version + unless require_paths.empty? + require_paths.first.instance_variable_set(:@gem_prelude_index, true) + end + # gem directories must come after -I and ENV['RUBYLIB'] + $:[$:.index(ConfigMap[:sitelibdir]),0] = require_paths + end + + def const_missing(constant) + QuickLoader.load_full_rubygems_library + if Gem.const_defined?(constant) + Gem.const_get(constant) + else + super + end + end + + def method_missing(method, *args, &block) + QuickLoader.load_full_rubygems_library + super unless Gem.respond_to?(method) + Gem.send(method, *args, &block) + end + end + + extend QuickLoader + + end + + begin + Gem.push_all_highest_version_gems_on_load_path + $" << File.join(Gem::ConfigMap[:libdir], Gem::ConfigMap[:ruby_install_name], + Gem::ConfigMap[:ruby_version], "rubygems.rb") + rescue Exception => e + puts "Error loading gem paths on load path in gem_prelude" + puts e + puts e.backtrace.join("\n") + end + +end + diff --git a/golf_prelude.rb b/golf_prelude.rb new file mode 100644 index 0000000..4363b49 --- /dev/null +++ b/golf_prelude.rb @@ -0,0 +1,114 @@ +class Object + @@golf_hash = {} + def method_missing m, *a, &b + t = @@golf_hash[ [m,self.class] ] ||= matching_methods(m)[0] + if t && b + __send__(t, *a) {|*args| + b.binding.eval("proc{|golf_matchdata| $~ = golf_matchdata }").call($~) if $~ + b.call(*args) + } + else + t ? __send__(t, *a, &b) : super + end + end + + def matching_methods(s='', m=callable_methods) + r=/^#{s.to_s.gsub(/./){"(.*?)"+Regexp.escape($&)}}/ + m.grep(r).sort_by do |i| + i.to_s.match(r).captures.map(&:size) << i + end + end + + def self.const_missing c + t = @@golf_hash[ [c,self.class] ] ||= matching_methods(c,constants)[0] + t and return const_get(t) + raise NameError, "uninitialized constant #{c}", caller(1) + end + + def shortest_abbreviation(s='', m=callable_methods) + s=s.to_s + our_case = (?A..?Z)===s[0] + if m.index(s.to_sym) + 1.upto(s.size){|z|s.scan(/./).combination(z).map{|trial| + next unless ((?A..?Z)===trial[0]) == our_case + trial*='' + return trial if matching_methods(trial,m)[0].to_s==s + }} + else + nil + end + end + + def callable_methods + self.class == Object ? methods + private_methods : methods + end + + private + + def h(a='H', b='w', c='!') + puts "#{a}ello, #{b}orld#{c}" + end + + alias say puts + + def do_while + 0 while yield + end + + def do_until + 0 until yield + end +end + +class Array + alias old_to_s to_s + alias to_s join +end + +class FalseClass + alias old_to_s to_s + def to_s + "" + end +end + +class Integer + alias each times + include Enumerable +end + +class String + alias / split + + def to_a + split('') + end + + (Array.instance_methods-instance_methods-[:to_ary,:transpose,:flatten,:flatten!,:compact,:compact!,:assoc,:rassoc]).each{|meth| + eval" + def #{meth}(*args, &block) + a=to_a + result = a.#{meth}(*args, &block) + replace(a.join) + if result.class == Array + Integer===result[0] ? result.pack('c*') : result.join + elsif result.class == Enumerator + result.map(&:join).to_enum + else + result + end + end" + } +end + +class Enumerator + alias old_to_s to_s + (Array.instance_methods-instance_methods-[:replace]+[:to_s]).each{|meth| + eval" + def #{meth}(*args, &block) + to_a.#{meth}(*args, &block) + end" + } + alias old_inspect inspect + alias inspect old_to_s +end diff --git a/goruby.c b/goruby.c new file mode 100644 index 0000000..7cd3a22 --- /dev/null +++ b/goruby.c @@ -0,0 +1,24 @@ +void Init_golf(void); +#define ruby_run_node goruby_run_node +#include "main.c" +#undef ruby_run_node + +RUBY_EXTERN int ruby_run_node(void*); +RUBY_EXTERN void ruby_init_ext(const char *name, void (*init)(void)); + +static VALUE +init_golf(VALUE arg) +{ + ruby_init_ext("golf", Init_golf); + return arg; +} + +int +goruby_run_node(void *arg) +{ + int state; + if (NIL_P(rb_protect(init_golf, Qtrue, &state))) { + return state == EXIT_SUCCESS ? EXIT_FAILURE : state; + } + return ruby_run_node(arg); +} diff --git a/hash.c b/hash.c new file mode 100644 index 0000000..a805d1a --- /dev/null +++ b/hash.c @@ -0,0 +1,2709 @@ +/********************************************************************** + + hash.c - + + $Author: yugui $ + created at: Mon Nov 22 18:51:18 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/util.h" + +#ifdef __APPLE__ +#include +#endif + +static VALUE rb_hash_s_try_convert(VALUE, VALUE); + +#define HASH_DELETED FL_USER1 +#define HASH_PROC_DEFAULT FL_USER2 + +VALUE +rb_hash_freeze(VALUE hash) +{ + return rb_obj_freeze(hash); +} + +VALUE rb_cHash; + +static VALUE envtbl; +static ID id_hash, id_yield, id_default; + +static int +rb_any_cmp(VALUE a, VALUE b) +{ + if (a == b) return 0; + if (FIXNUM_P(a) && FIXNUM_P(b)) { + return a != b; + } + if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString && + TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) { + return rb_str_hash_cmp(a, b); + } + if (a == Qundef || b == Qundef) return -1; + if (SYMBOL_P(a) && SYMBOL_P(b)) { + return a != b; + } + + return !rb_eql(a, b); +} + +VALUE +rb_hash(VALUE obj) +{ + VALUE hval = rb_funcall(obj, id_hash, 0); + retry: + switch (TYPE(hval)) { + case T_FIXNUM: + return hval; + + case T_BIGNUM: + return LONG2FIX(((long*)(RBIGNUM_DIGITS(hval)))[0]); + + default: + hval = rb_to_int(hval); + goto retry; + } +} + +static int +rb_any_hash(VALUE a) +{ + VALUE hval; + int hnum; + + switch (TYPE(a)) { + case T_FIXNUM: + case T_SYMBOL: + hnum = (int)a; + break; + + case T_STRING: + hnum = rb_str_hash(a); + break; + + default: + hval = rb_hash(a); + hnum = (int)FIX2LONG(hval); + } + hnum <<= 1; + return RSHIFT(hnum, 1); +} + +static const struct st_hash_type objhash = { + rb_any_cmp, + rb_any_hash, +}; + +static const struct st_hash_type identhash = { + st_numcmp, + st_numhash, +}; + +typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); + +struct foreach_safe_arg { + st_table *tbl; + st_foreach_func *func; + st_data_t arg; +}; + +static int +foreach_safe_i(st_data_t key, st_data_t value, struct foreach_safe_arg *arg) +{ + int status; + + if (key == Qundef) return ST_CONTINUE; + status = (*arg->func)(key, value, arg->arg); + if (status == ST_CONTINUE) { + return ST_CHECK; + } + return status; +} + +void +st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a) +{ + struct foreach_safe_arg arg; + + arg.tbl = table; + arg.func = (st_foreach_func *)func; + arg.arg = a; + if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) { + rb_raise(rb_eRuntimeError, "hash modified during iteration"); + } +} + +typedef int rb_foreach_func(VALUE, VALUE, VALUE); + +struct hash_foreach_arg { + VALUE hash; + rb_foreach_func *func; + VALUE arg; +}; + +static int +hash_foreach_iter(VALUE key, VALUE value, struct hash_foreach_arg *arg) +{ + int status; + st_table *tbl; + + tbl = RHASH(arg->hash)->ntbl; + if (key == Qundef) return ST_CONTINUE; + status = (*arg->func)(key, value, arg->arg); + if (RHASH(arg->hash)->ntbl != tbl) { + rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + } + switch (status) { + case ST_DELETE: + st_delete_safe(tbl, (st_data_t*)&key, 0, Qundef); + FL_SET(arg->hash, HASH_DELETED); + case ST_CONTINUE: + break; + case ST_STOP: + return ST_STOP; + } + return ST_CHECK; +} + +static VALUE +hash_foreach_ensure(VALUE hash) +{ + RHASH(hash)->iter_lev--; + + if (RHASH(hash)->iter_lev == 0) { + if (FL_TEST(hash, HASH_DELETED)) { + st_cleanup_safe(RHASH(hash)->ntbl, Qundef); + FL_UNSET(hash, HASH_DELETED); + } + } + return 0; +} + +static VALUE +hash_foreach_call(struct hash_foreach_arg *arg) +{ + if (st_foreach(RHASH(arg->hash)->ntbl, hash_foreach_iter, (st_data_t)arg)) { + rb_raise(rb_eRuntimeError, "hash modified during iteration"); + } + return Qnil; +} + +void +rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg) +{ + struct hash_foreach_arg arg; + + if (!RHASH(hash)->ntbl) + return; + RHASH(hash)->iter_lev++; + arg.hash = hash; + arg.func = (rb_foreach_func *)func; + arg.arg = farg; + rb_ensure(hash_foreach_call, (VALUE)&arg, hash_foreach_ensure, hash); +} + +static VALUE +hash_alloc(VALUE klass) +{ + NEWOBJ(hash, struct RHash); + OBJSETUP(hash, klass, T_HASH); + + hash->ifnone = Qnil; + + return (VALUE)hash; +} + +VALUE +rb_hash_new(void) +{ + return hash_alloc(rb_cHash); +} + +VALUE +rb_hash_dup(VALUE hash) +{ + NEWOBJ(ret, struct RHash); + DUPSETUP(ret, hash); + + if (!RHASH_EMPTY_P(hash)) + ret->ntbl = st_copy(RHASH(hash)->ntbl); + if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + FL_SET(ret, HASH_PROC_DEFAULT); + } + ret->ifnone = RHASH(hash)->ifnone; + return (VALUE)ret; +} + +static void +rb_hash_modify_check(VALUE hash) +{ + if (OBJ_FROZEN(hash)) rb_error_frozen("hash"); + if (!OBJ_UNTRUSTED(hash) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify hash"); +} + +struct st_table * +rb_hash_tbl(VALUE hash) +{ + if (!RHASH(hash)->ntbl) { + RHASH(hash)->ntbl = st_init_table(&objhash); + } + return RHASH(hash)->ntbl; +} + +static void +rb_hash_modify(VALUE hash) +{ + rb_hash_modify_check(hash); + rb_hash_tbl(hash); +} + +/* + * call-seq: + * Hash.new => hash + * Hash.new(obj) => aHash + * Hash.new {|hash, key| block } => aHash + * + * Returns a new, empty hash. If this hash is subsequently accessed by + * a key that doesn't correspond to a hash entry, the value returned + * depends on the style of new used to create the hash. In + * the first form, the access returns nil. If + * obj is specified, this single object will be used for + * all default values. If a block is specified, it will be + * called with the hash object and the key, and should return the + * default value. It is the block's responsibility to store the value + * in the hash if required. + * + * h = Hash.new("Go Fish") + * h["a"] = 100 + * h["b"] = 200 + * h["a"] #=> 100 + * h["c"] #=> "Go Fish" + * # The following alters the single default object + * h["c"].upcase! #=> "GO FISH" + * h["d"] #=> "GO FISH" + * h.keys #=> ["a", "b"] + * + * # While this creates a new default object each time + * h = Hash.new { |hash, key| hash[key] = "Go Fish: #{key}" } + * h["c"] #=> "Go Fish: c" + * h["c"].upcase! #=> "GO FISH: C" + * h["d"] #=> "Go Fish: d" + * h.keys #=> ["c", "d"] + * + */ + +static VALUE +rb_hash_initialize(int argc, VALUE *argv, VALUE hash) +{ + VALUE ifnone; + + rb_hash_modify(hash); + if (rb_block_given_p()) { + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + RHASH(hash)->ifnone = rb_block_proc(); + FL_SET(hash, HASH_PROC_DEFAULT); + } + else { + rb_scan_args(argc, argv, "01", &ifnone); + RHASH(hash)->ifnone = ifnone; + } + + return hash; +} + +/* + * call-seq: + * Hash[ [key =>|, value]* ] => hash + * + * Creates a new hash populated with the given objects. Equivalent to + * the literal { key, value, ... }. Keys and + * values occur in pairs, so there must be an even number of arguments. + * + * Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200} + * Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200} + * { "a" => 100, "b" => 200 } #=> {"a"=>100, "b"=>200} + */ + +static VALUE +rb_hash_s_create(int argc, VALUE *argv, VALUE klass) +{ + VALUE hash, tmp; + int i; + + if (argc == 1) { + tmp = rb_hash_s_try_convert(Qnil, argv[0]); + if (!NIL_P(tmp)) { + hash = hash_alloc(klass); + if (RHASH(tmp)->ntbl) { + RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); + } + return hash; + } + + tmp = rb_check_array_type(argv[0]); + if (!NIL_P(tmp)) { + long i; + + hash = hash_alloc(klass); + for (i = 0; i < RARRAY_LEN(tmp); ++i) { + VALUE v = rb_check_array_type(RARRAY_PTR(tmp)[i]); + VALUE key, val = Qnil; + + if (NIL_P(v)) continue; + switch (RARRAY_LEN(v)) { + case 2: + val = RARRAY_PTR(v)[1]; + case 1: + key = RARRAY_PTR(v)[0]; + rb_hash_aset(hash, key, val); + } + } + return hash; + } + } + if (argc % 2 != 0) { + rb_raise(rb_eArgError, "odd number of arguments for Hash"); + } + + hash = hash_alloc(klass); + for (i=0; i hash or nil + * + * Try to convert obj into a hash, using to_hash method. + * Returns converted hash or nil if obj cannot be converted + * for any reason. + * + * Hash.try_convert({1=>2}) # => {1=>2} + * Hash.try_convert("1=>2") # => nil + */ +static VALUE +rb_hash_s_try_convert(VALUE dummy, VALUE hash) +{ + return rb_check_convert_type(hash, T_HASH, "Hash", "to_hash"); +} + +static int +rb_hash_rehash_i(VALUE key, VALUE value, st_table *tbl) +{ + if (key != Qundef) st_insert(tbl, key, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.rehash -> hsh + * + * Rebuilds the hash based on the current hash values for each key. If + * values of key objects have changed since they were inserted, this + * method will reindex hsh. If Hash#rehash is + * called while an iterator is traversing the hash, an + * RuntimeError will be raised in the iterator. + * + * a = [ "a", "b" ] + * c = [ "c", "d" ] + * h = { a => 100, c => 300 } + * h[a] #=> 100 + * a[0] = "z" + * h[a] #=> nil + * h.rehash #=> {["z", "b"]=>100, ["c", "d"]=>300} + * h[a] #=> 100 + */ + +static VALUE +rb_hash_rehash(VALUE hash) +{ + st_table *tbl; + + if (RHASH(hash)->iter_lev > 0) { + rb_raise(rb_eRuntimeError, "rehash during iteration"); + } + rb_hash_modify_check(hash); + if (!RHASH(hash)->ntbl) + return hash; + tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); + rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl); + st_free_table(RHASH(hash)->ntbl); + RHASH(hash)->ntbl = tbl; + + return hash; +} + +/* + * call-seq: + * hsh[key] => value + * + * Element Reference---Retrieves the value object corresponding + * to the key object. If not found, returns the a default value (see + * Hash::new for details). + * + * h = { "a" => 100, "b" => 200 } + * h["a"] #=> 100 + * h["c"] #=> nil + * + */ + +VALUE +rb_hash_aref(VALUE hash, VALUE key) +{ + VALUE val; + + if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { + return rb_funcall(hash, id_default, 1, key); + } + return val; +} + +VALUE +rb_hash_lookup2(VALUE hash, VALUE key, VALUE def) +{ + VALUE val; + + if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { + return def; /* without Hash#default */ + } + return val; +} + +VALUE +rb_hash_lookup(VALUE hash, VALUE key) +{ + return rb_hash_lookup2(hash, key, Qnil); +} + +/* + * call-seq: + * hsh.fetch(key [, default] ) => obj + * hsh.fetch(key) {| key | block } => obj + * + * Returns a value from the hash for the given key. If the key can't be + * found, there are several options: With no other arguments, it will + * raise an KeyError exception; if default is + * given, then that will be returned; if the optional code block is + * specified, then that will be run and its result returned. + * + * h = { "a" => 100, "b" => 200 } + * h.fetch("a") #=> 100 + * h.fetch("z", "go fish") #=> "go fish" + * h.fetch("z") { |el| "go fish, #{el}"} #=> "go fish, z" + * + * The following example shows that an exception is raised if the key + * is not found and a default value is not supplied. + * + * h = { "a" => 100, "b" => 200 } + * h.fetch("z") + * + * produces: + * + * prog.rb:2:in `fetch': key not found (KeyError) + * from prog.rb:2 + * + */ + +static VALUE +rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash) +{ + VALUE key, if_none; + VALUE val; + long block_given; + + rb_scan_args(argc, argv, "11", &key, &if_none); + + block_given = rb_block_given_p(); + if (block_given && argc == 2) { + rb_warn("block supersedes default value argument"); + } + if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { + if (block_given) return rb_yield(key); + if (argc == 1) { + rb_raise(rb_eKeyError, "key not found"); + } + return if_none; + } + return val; +} + +VALUE +rb_hash_fetch(VALUE hash, VALUE key) +{ + return rb_hash_fetch_m(1, &key, hash); +} + +/* + * call-seq: + * hsh.default(key=nil) => obj + * + * Returns the default value, the value that would be returned by + * hsh[key] if key did not exist in hsh. + * See also Hash::new and Hash#default=. + * + * h = Hash.new #=> {} + * h.default #=> nil + * h.default(2) #=> nil + * + * h = Hash.new("cat") #=> {} + * h.default #=> "cat" + * h.default(2) #=> "cat" + * + * h = Hash.new {|h,k| h[k] = k.to_i*10} #=> {} + * h.default #=> nil + * h.default(2) #=> 20 + */ + +static VALUE +rb_hash_default(int argc, VALUE *argv, VALUE hash) +{ + VALUE key; + + rb_scan_args(argc, argv, "01", &key); + if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + if (argc == 0) return Qnil; + return rb_funcall(RHASH(hash)->ifnone, id_yield, 2, hash, key); + } + return RHASH(hash)->ifnone; +} + +/* + * call-seq: + * hsh.default = obj => obj + * + * Sets the default value, the value returned for a key that does not + * exist in the hash. It is not possible to set the a default to a + * Proc that will be executed on each key lookup. + * + * h = { "a" => 100, "b" => 200 } + * h.default = "Go fish" + * h["a"] #=> 100 + * h["z"] #=> "Go fish" + * # This doesn't do what you might hope... + * h.default = proc do |hash, key| + * hash[key] = key + key + * end + * h[2] #=> # + * h["cat"] #=> # + */ + +static VALUE +rb_hash_set_default(VALUE hash, VALUE ifnone) +{ + rb_hash_modify(hash); + RHASH(hash)->ifnone = ifnone; + FL_UNSET(hash, HASH_PROC_DEFAULT); + return ifnone; +} + +/* + * call-seq: + * hsh.default_proc -> anObject + * + * If Hash::new was invoked with a block, return that + * block, otherwise return nil. + * + * h = Hash.new {|h,k| h[k] = k*k } #=> {} + * p = h.default_proc #=> # + * a = [] #=> [] + * p.call(a, 2) + * a #=> [nil, nil, 4] + */ + + +static VALUE +rb_hash_default_proc(VALUE hash) +{ + if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + return RHASH(hash)->ifnone; + } + return Qnil; +} + +VALUE rb_obj_is_proc(VALUE proc); + +/* + * call-seq: + * hsh.default_proc = proc_obj => proc_obj + * + * Sets the default proc to be executed on each key lookup. + * + * h.default_proc = proc do |hash, key| + * hash[key] = key + key + * end + * h[2] #=> 4 + * h["cat"] #=> "catcat" + */ + +static VALUE +rb_hash_set_default_proc(VALUE hash, VALUE proc) +{ + VALUE b; + + rb_hash_modify(hash); + b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc"); + if (NIL_P(b) || !rb_obj_is_proc(b)) { + rb_raise(rb_eTypeError, + "wrong default_proc type %s (expected Proc)", + rb_obj_classname(proc)); + } + proc = b; + RHASH(hash)->ifnone = proc; + FL_SET(hash, HASH_PROC_DEFAULT); + return proc; +} + +static int +key_i(VALUE key, VALUE value, VALUE *args) +{ + if (rb_equal(value, args[0])) { + args[1] = key; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.key(value) => key + * + * Returns the key for a given value. If not found, returns nil. + * + * h = { "a" => 100, "b" => 200 } + * h.key(200) #=> "b" + * h.key(999) #=> nil + * + */ + +static VALUE +rb_hash_key(VALUE hash, VALUE value) +{ + VALUE args[2]; + + args[0] = value; + args[1] = Qnil; + + rb_hash_foreach(hash, key_i, (st_data_t)args); + + return args[1]; +} + +/* :nodoc: */ +static VALUE +rb_hash_index(VALUE hash, VALUE value) +{ + rb_warn("Hash#index is deprecated; use Hash#key"); + return rb_hash_key(hash, value); +} + +static VALUE +rb_hash_delete_key(VALUE hash, VALUE key) +{ + st_data_t ktmp = (st_data_t)key, val; + + if (!RHASH(hash)->ntbl) + return Qundef; + if (RHASH(hash)->iter_lev > 0) { + if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, Qundef)) { + FL_SET(hash, HASH_DELETED); + return (VALUE)val; + } + } + else if (st_delete(RHASH(hash)->ntbl, &ktmp, &val)) + return (VALUE)val; + return Qundef; +} + +/* + * call-seq: + * hsh.delete(key) => value + * hsh.delete(key) {| key | block } => value + * + * Deletes and returns a key-value pair from hsh whose key is + * equal to key. If the key is not found, returns the + * default value. If the optional code block is given and the + * key is not found, pass in the key and return the result of + * block. + * + * h = { "a" => 100, "b" => 200 } + * h.delete("a") #=> 100 + * h.delete("z") #=> nil + * h.delete("z") { |el| "#{el} not found" } #=> "z not found" + * + */ + +VALUE +rb_hash_delete(VALUE hash, VALUE key) +{ + VALUE val; + + rb_hash_modify(hash); + val = rb_hash_delete_key(hash, key); + if (val != Qundef) return val; + if (rb_block_given_p()) { + return rb_yield(key); + } + return Qnil; +} + +struct shift_var { + VALUE key; + VALUE val; +}; + +static int +shift_i(VALUE key, VALUE value, struct shift_var *var) +{ + if (key == Qundef) return ST_CONTINUE; + if (var->key != Qundef) return ST_STOP; + var->key = key; + var->val = value; + return ST_DELETE; +} + +static int +shift_i_safe(VALUE key, VALUE value, struct shift_var *var) +{ + if (key == Qundef) return ST_CONTINUE; + var->key = key; + var->val = value; + return ST_STOP; +} + +/* + * call-seq: + * hsh.shift -> anArray or obj + * + * Removes a key-value pair from hsh and returns it as the + * two-item array [ key, value ], or + * the hash's default value if the hash is empty. + * + * h = { 1 => "a", 2 => "b", 3 => "c" } + * h.shift #=> [1, "a"] + * h #=> {2=>"b", 3=>"c"} + */ + +static VALUE +rb_hash_shift(VALUE hash) +{ + struct shift_var var; + + rb_hash_modify(hash); + var.key = Qundef; + rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i, + (st_data_t)&var); + + if (var.key != Qundef) { + if (RHASH(hash)->iter_lev > 0) { + rb_hash_delete_key(hash, var.key); + } + return rb_assoc_new(var.key, var.val); + } + else if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + return rb_funcall(RHASH(hash)->ifnone, id_yield, 2, hash, Qnil); + } + else { + return RHASH(hash)->ifnone; + } +} + +static int +delete_if_i(VALUE key, VALUE value, VALUE hash) +{ + if (key == Qundef) return ST_CONTINUE; + if (RTEST(rb_yield_values(2, key, value))) { + rb_hash_delete_key(hash, key); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.delete_if {| key, value | block } -> hsh + * + * Deletes every key-value pair from hsh for which block + * evaluates to true. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.delete_if {|key, value| key >= "b" } #=> {"a"=>100} + * + */ + +VALUE +rb_hash_delete_if(VALUE hash) +{ + RETURN_ENUMERATOR(hash, 0, 0); + rb_hash_modify(hash); + rb_hash_foreach(hash, delete_if_i, hash); + return hash; +} + +/* + * call-seq: + * hsh.reject! {| key, value | block } -> hsh or nil + * + * Equivalent to Hash#delete_if, but returns + * nil if no changes were made. + */ + +VALUE +rb_hash_reject_bang(VALUE hash) +{ + int n; + + RETURN_ENUMERATOR(hash, 0, 0); + if (!RHASH(hash)->ntbl) + return Qnil; + n = RHASH(hash)->ntbl->num_entries; + rb_hash_delete_if(hash); + if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + return hash; +} + +/* + * call-seq: + * hsh.reject {| key, value | block } -> a_hash + * + * Same as Hash#delete_if, but works on (and returns) a + * copy of the hsh. Equivalent to + * hsh.dup.delete_if. + * + */ + +static VALUE +rb_hash_reject(VALUE hash) +{ + return rb_hash_delete_if(rb_obj_dup(hash)); +} + +/* + * call-seq: + * hsh.values_at(key, ...) => array + * + * Return an array containing the values associated with the given keys. + * Also see Hash.select. + * + * h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } + * h.values_at("cow", "cat") #=> ["bovine", "feline"] + */ + +VALUE +rb_hash_values_at(int argc, VALUE *argv, VALUE hash) +{ + VALUE result = rb_ary_new2(argc); + long i; + + for (i=0; i a_hash + * + * Returns a new hash consisting of entries which the block returns true. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.select {|k,v| k > "a"} #=> {"b" => 200, "c" => 300} + * h.select {|k,v| v < 200} #=> {"a" => 100} + */ + +VALUE +rb_hash_select(VALUE hash) +{ + VALUE result; + + RETURN_ENUMERATOR(hash, 0, 0); + result = rb_hash_new(); + rb_hash_foreach(hash, select_i, result); + return result; +} + +static int +clear_i(VALUE key, VALUE value, VALUE dummy) +{ + return ST_DELETE; +} + +/* + * call-seq: + * hsh.clear -> hsh + * + * Removes all key-value pairs from hsh. + * + * h = { "a" => 100, "b" => 200 } #=> {"a"=>100, "b"=>200} + * h.clear #=> {} + * + */ + +static VALUE +rb_hash_clear(VALUE hash) +{ + rb_hash_modify_check(hash); + if (!RHASH(hash)->ntbl) + return hash; + if (RHASH(hash)->ntbl->num_entries > 0) { + if (RHASH(hash)->iter_lev > 0) + rb_hash_foreach(hash, clear_i, 0); + else + st_clear(RHASH(hash)->ntbl); + } + + return hash; +} + +/* + * call-seq: + * hsh[key] = value => value + * hsh.store(key, value) => value + * + * Element Assignment---Associates the value given by + * value with the key given by key. + * key should not have its value changed while it is in + * use as a key (a String passed as a key will be + * duplicated and frozen). + * + * h = { "a" => 100, "b" => 200 } + * h["a"] = 9 + * h["c"] = 4 + * h #=> {"a"=>9, "b"=>200, "c"=>4} + * + */ + +VALUE +rb_hash_aset(VALUE hash, VALUE key, VALUE val) +{ + rb_hash_modify(hash); + if (RHASH(hash)->ntbl->type == &identhash || + TYPE(key) != T_STRING || st_lookup(RHASH(hash)->ntbl, key, 0)) { + st_insert(RHASH(hash)->ntbl, key, val); + } + else { + st_add_direct(RHASH(hash)->ntbl, rb_str_new4(key), val); + } + return val; +} + +static int +replace_i(VALUE key, VALUE val, VALUE hash) +{ + if (key != Qundef) { + rb_hash_aset(hash, key, val); + } + + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.replace(other_hash) -> hsh + * + * Replaces the contents of hsh with the contents of + * other_hash. + * + * h = { "a" => 100, "b" => 200 } + * h.replace({ "c" => 300, "d" => 400 }) #=> {"c"=>300, "d"=>400} + * + */ + +static VALUE +rb_hash_replace(VALUE hash, VALUE hash2) +{ + hash2 = to_hash(hash2); + if (hash == hash2) return hash; + rb_hash_clear(hash); + rb_hash_foreach(hash2, replace_i, hash); + RHASH(hash)->ifnone = RHASH(hash2)->ifnone; + if (FL_TEST(hash2, HASH_PROC_DEFAULT)) { + FL_SET(hash, HASH_PROC_DEFAULT); + } + else { + FL_UNSET(hash, HASH_PROC_DEFAULT); + } + + return hash; +} + +/* + * call-seq: + * hsh.length => fixnum + * hsh.size => fixnum + * + * Returns the number of key-value pairs in the hash. + * + * h = { "d" => 100, "a" => 200, "v" => 300, "e" => 400 } + * h.length #=> 4 + * h.delete("a") #=> 200 + * h.length #=> 3 + */ + +static VALUE +rb_hash_size(VALUE hash) +{ + if (!RHASH(hash)->ntbl) + return INT2FIX(0); + return INT2FIX(RHASH(hash)->ntbl->num_entries); +} + + +/* + * call-seq: + * hsh.empty? => true or false + * + * Returns true if hsh contains no key-value pairs. + * + * {}.empty? #=> true + * + */ + +static VALUE +rb_hash_empty_p(VALUE hash) +{ + return RHASH_EMPTY_P(hash) ? Qtrue : Qfalse; +} + +static int +each_value_i(VALUE key, VALUE value) +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield(value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each_value {| value | block } -> hsh + * + * Calls block once for each key in hsh, passing the + * value as a parameter. + * + * h = { "a" => 100, "b" => 200 } + * h.each_value {|value| puts value } + * + * produces: + * + * 100 + * 200 + */ + +static VALUE +rb_hash_each_value(VALUE hash) +{ + RETURN_ENUMERATOR(hash, 0, 0); + rb_hash_foreach(hash, each_value_i, 0); + return hash; +} + +static int +each_key_i(VALUE key, VALUE value) +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield(key); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each_key {| key | block } -> hsh + * + * Calls block once for each key in hsh, passing the key + * as a parameter. + * + * h = { "a" => 100, "b" => 200 } + * h.each_key {|key| puts key } + * + * produces: + * + * a + * b + */ +static VALUE +rb_hash_each_key(VALUE hash) +{ + RETURN_ENUMERATOR(hash, 0, 0); + rb_hash_foreach(hash, each_key_i, 0); + return hash; +} + +static int +each_pair_i(VALUE key, VALUE value) +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield(rb_assoc_new(key, value)); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each {| key, value | block } -> hsh + * hsh.each_pair {| key, value | block } -> hsh + * + * Calls block once for each key in hsh, passing the key-value + * pair as parameters. + * + * h = { "a" => 100, "b" => 200 } + * h.each {|key, value| puts "#{key} is #{value}" } + * + * produces: + * + * a is 100 + * b is 200 + * + */ + +static VALUE +rb_hash_each_pair(VALUE hash) +{ + RETURN_ENUMERATOR(hash, 0, 0); + rb_hash_foreach(hash, each_pair_i, 0); + return hash; +} + +static int +to_a_i(VALUE key, VALUE value, VALUE ary) +{ + if (key == Qundef) return ST_CONTINUE; + rb_ary_push(ary, rb_assoc_new(key, value)); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.to_a -> array + * + * Converts hsh to a nested array of [ key, + * value ] arrays. + * + * h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 } + * h.to_a #=> [["c", 300], ["a", 100], ["d", 400]] + */ + +static VALUE +rb_hash_to_a(VALUE hash) +{ + VALUE ary; + + ary = rb_ary_new(); + rb_hash_foreach(hash, to_a_i, ary); + OBJ_INFECT(ary, hash); + + return ary; +} + +static int +inspect_i(VALUE key, VALUE value, VALUE str) +{ + VALUE str2; + + if (key == Qundef) return ST_CONTINUE; + if (RSTRING_LEN(str) > 1) { + rb_str_cat2(str, ", "); + } + str2 = rb_inspect(key); + rb_str_buf_append(str, str2); + OBJ_INFECT(str, str2); + rb_str_buf_cat2(str, "=>"); + str2 = rb_inspect(value); + rb_str_buf_append(str, str2); + OBJ_INFECT(str, str2); + + return ST_CONTINUE; +} + +static VALUE +inspect_hash(VALUE hash, VALUE dummy, int recur) +{ + VALUE str; + + if (recur) return rb_usascii_str_new2("{...}"); + str = rb_str_buf_new2("{"); + rb_hash_foreach(hash, inspect_i, str); + rb_str_buf_cat2(str, "}"); + OBJ_INFECT(str, hash); + + return str; +} + +/* + * call-seq: + * hsh.to_s => string + * hsh.inspect => string + * + * Return the contents of this hash as a string. + * + * h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 } + * h.to_s #=> "{\"c\"=>300, \"a\"=>100, \"d\"=>400}" + */ + +static VALUE +rb_hash_inspect(VALUE hash) +{ + if (RHASH_EMPTY_P(hash)) + return rb_usascii_str_new2("{}"); + return rb_exec_recursive(inspect_hash, hash, 0); +} + +/* + * call-seq: + * hsh.to_hash => hsh + * + * Returns self. + */ + +static VALUE +rb_hash_to_hash(VALUE hash) +{ + return hash; +} + +static int +keys_i(VALUE key, VALUE value, VALUE ary) +{ + if (key == Qundef) return ST_CONTINUE; + rb_ary_push(ary, key); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.keys => array + * + * Returns a new array populated with the keys from this hash. See also + * Hash#values. + * + * h = { "a" => 100, "b" => 200, "c" => 300, "d" => 400 } + * h.keys #=> ["a", "b", "c", "d"] + * + */ + +static VALUE +rb_hash_keys(VALUE hash) +{ + VALUE ary; + + ary = rb_ary_new(); + rb_hash_foreach(hash, keys_i, ary); + + return ary; +} + +static int +values_i(VALUE key, VALUE value, VALUE ary) +{ + if (key == Qundef) return ST_CONTINUE; + rb_ary_push(ary, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.values => array + * + * Returns a new array populated with the values from hsh. See + * also Hash#keys. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.values #=> [100, 200, 300] + * + */ + +static VALUE +rb_hash_values(VALUE hash) +{ + VALUE ary; + + ary = rb_ary_new(); + rb_hash_foreach(hash, values_i, ary); + + return ary; +} + +/* + * call-seq: + * hsh.has_key?(key) => true or false + * hsh.include?(key) => true or false + * hsh.key?(key) => true or false + * hsh.member?(key) => true or false + * + * Returns true if the given key is present in hsh. + * + * h = { "a" => 100, "b" => 200 } + * h.has_key?("a") #=> true + * h.has_key?("z") #=> false + * + */ + +static VALUE +rb_hash_has_key(VALUE hash, VALUE key) +{ + if (!RHASH(hash)->ntbl) + return Qfalse; + if (st_lookup(RHASH(hash)->ntbl, key, 0)) { + return Qtrue; + } + return Qfalse; +} + +static int +rb_hash_search_value(VALUE key, VALUE value, VALUE *data) +{ + if (key == Qundef) return ST_CONTINUE; + if (rb_equal(value, data[1])) { + data[0] = Qtrue; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.has_value?(value) => true or false + * hsh.value?(value) => true or false + * + * Returns true if the given value is present for some key + * in hsh. + * + * h = { "a" => 100, "b" => 200 } + * h.has_value?(100) #=> true + * h.has_value?(999) #=> false + */ + +static VALUE +rb_hash_has_value(VALUE hash, VALUE val) +{ + VALUE data[2]; + + data[0] = Qfalse; + data[1] = val; + rb_hash_foreach(hash, rb_hash_search_value, (st_data_t)data); + return data[0]; +} + +struct equal_data { + VALUE result; + st_table *tbl; + int eql; +}; + +static int +eql_i(VALUE key, VALUE val1, struct equal_data *data) +{ + VALUE val2; + + if (key == Qundef) return ST_CONTINUE; + if (!st_lookup(data->tbl, key, &val2)) { + data->result = Qfalse; + return ST_STOP; + } + if (!(data->eql ? rb_eql(val1, val2) : rb_equal(val1, val2))) { + data->result = Qfalse; + return ST_STOP; + } + return ST_CONTINUE; +} + +static VALUE +recursive_eql(VALUE hash, VALUE dt, int recur) +{ + struct equal_data *data; + + if (recur) return Qtrue; /* Subtle! */ + data = (struct equal_data*)dt; + data->result = Qtrue; + rb_hash_foreach(hash, eql_i, (st_data_t)data); + + return data->result; +} + +static VALUE +hash_equal(VALUE hash1, VALUE hash2, int eql) +{ + struct equal_data data; + + if (hash1 == hash2) return Qtrue; + if (TYPE(hash2) != T_HASH) { + if (!rb_respond_to(hash2, rb_intern("to_hash"))) { + return Qfalse; + } + if (eql) + return rb_eql(hash2, hash1); + else + return rb_equal(hash2, hash1); + } + if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) + return Qfalse; + if (!RHASH(hash1)->ntbl || !RHASH(hash2)->ntbl) + return Qtrue; + if (RHASH(hash1)->ntbl->type != RHASH(hash2)->ntbl->type) + return Qfalse; +#if 0 + if (!(rb_equal(RHASH(hash1)->ifnone, RHASH(hash2)->ifnone) && + FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT))) + return Qfalse; +#endif + + data.tbl = RHASH(hash2)->ntbl; + data.eql = eql; + return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); +} + +/* + * call-seq: + * hsh == other_hash => true or false + * + * Equality---Two hashes are equal if they each contain the same number + * of keys and if each key-value pair is equal to (according to + * Object#==) the corresponding elements in the other + * hash. + * + * h1 = { "a" => 1, "c" => 2 } + * h2 = { 7 => 35, "c" => 2, "a" => 1 } + * h3 = { "a" => 1, "c" => 2, 7 => 35 } + * h4 = { "a" => 1, "d" => 2, "f" => 35 } + * h1 == h2 #=> false + * h2 == h3 #=> true + * h3 == h4 #=> false + * + */ + +static VALUE +rb_hash_equal(VALUE hash1, VALUE hash2) +{ + return hash_equal(hash1, hash2, Qfalse); +} + +/* + * call-seq: + * hash.eql?(other) -> true or false + * + * Returns true if hash and other are + * both hashes with the same content. + */ + +static VALUE +rb_hash_eql(VALUE hash1, VALUE hash2) +{ + return hash_equal(hash1, hash2, Qtrue); +} + +static int +hash_i(VALUE key, VALUE val, int *hval) +{ + if (key == Qundef) return ST_CONTINUE; + *hval ^= rb_hash(key); + *hval ^= rb_hash(val); + return ST_CONTINUE; +} + +static VALUE +recursive_hash(VALUE hash, VALUE dummy, int recur) +{ + int hval; + + if (recur) { + return LONG2FIX(0); + } + if (!RHASH(hash)->ntbl) + return LONG2FIX(0); + hval = RHASH(hash)->ntbl->num_entries; + rb_hash_foreach(hash, hash_i, (st_data_t)&hval); + return INT2FIX(hval); +} + +/* + * call-seq: + * hsh.hash -> fixnum + * + * Compute a hash-code for this hash. Two hashes with the same content + * will have the same hash code (and will compare using eql?). + */ + +static VALUE +rb_hash_hash(VALUE hash) +{ + return rb_exec_recursive(recursive_hash, hash, 0); +} + +static int +rb_hash_invert_i(VALUE key, VALUE value, VALUE hash) +{ + if (key == Qundef) return ST_CONTINUE; + rb_hash_aset(hash, value, key); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.invert -> aHash + * + * Returns a new hash created by using hsh's values as keys, and + * the keys as values. + * + * h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 } + * h.invert #=> {0=>"a", 100=>"m", 200=>"d", 300=>"y"} + * + */ + +static VALUE +rb_hash_invert(VALUE hash) +{ + VALUE h = rb_hash_new(); + + rb_hash_foreach(hash, rb_hash_invert_i, h); + return h; +} + +static int +rb_hash_update_i(VALUE key, VALUE value, VALUE hash) +{ + if (key == Qundef) return ST_CONTINUE; + rb_hash_aset(hash, key, value); + return ST_CONTINUE; +} + +static int +rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash) +{ + if (key == Qundef) return ST_CONTINUE; + if (rb_hash_has_key(hash, key)) { + value = rb_yield_values(3, key, rb_hash_aref(hash, key), value); + } + rb_hash_aset(hash, key, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.merge!(other_hash) => hsh + * hsh.update(other_hash) => hsh + * hsh.merge!(other_hash){|key, oldval, newval| block} => hsh + * hsh.update(other_hash){|key, oldval, newval| block} => hsh + * + * Adds the contents of other_hash to hsh. If no + * block is specified entries with duplicate keys are overwritten + * with the values from other_hash, otherwise the value + * of each duplicate key is determined by calling the block with + * the key, its value in hsh and its value in other_hash. + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge!(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge!(h2) { |key, v1, v2| v1 } + * #=> {"a"=>100, "b"=>200, "c"=>300} + */ + +static VALUE +rb_hash_update(VALUE hash1, VALUE hash2) +{ + hash2 = to_hash(hash2); + if (rb_block_given_p()) { + rb_hash_foreach(hash2, rb_hash_update_block_i, hash1); + } + else { + rb_hash_foreach(hash2, rb_hash_update_i, hash1); + } + return hash1; +} + +/* + * call-seq: + * hsh.merge(other_hash) -> a_hash + * hsh.merge(other_hash){|key, oldval, newval| block} -> a_hash + * + * Returns a new hash containing the contents of other_hash and + * the contents of hsh, overwriting entries in hsh with + * duplicate keys with those from other_hash. + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + * h1 #=> {"a"=>100, "b"=>200} + * + */ + +static VALUE +rb_hash_merge(VALUE hash1, VALUE hash2) +{ + return rb_hash_update(rb_obj_dup(hash1), hash2); +} + +static int +assoc_i(VALUE key, VALUE val, VALUE *args) +{ + if (key == Qundef) return ST_CONTINUE; + if (RTEST(rb_equal(args[0], key))) { + args[1] = rb_assoc_new(key, val); + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hash.assoc(obj) -> an_array or nil + * + * Searches through the hash comparing _obj_ with the key using ==. + * Returns the key-value pair (two elements array) or +nil+ + * if no match is found. See Array#assoc. + * + * h = {"colors" => ["red", "blue", "green"], + * "letters" => ["a", "b", "c" ]} + * h.assoc("letters") #=> ["letters", ["a", "b", "c"]] + * h.assoc("foo") #=> nil + */ + +VALUE +rb_hash_assoc(VALUE hash, VALUE obj) +{ + VALUE args[2]; + + args[0] = obj; + args[1] = Qnil; + rb_hash_foreach(hash, assoc_i, (st_data_t)args); + return args[1]; +} + +static int +rassoc_i(VALUE key, VALUE val, VALUE *args) +{ + if (key == Qundef) return ST_CONTINUE; + if (RTEST(rb_equal(args[0], val))) { + args[1] = rb_assoc_new(key, val); + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hash.rassoc(key) -> an_array or nil + * + * Searches through the hash comparing _obj_ with the value using ==. + * Returns the first key-value pair (two elements array) that matches. See + * also Array#rassoc. + * + * a = {1=> "one", 2 => "two", 3 => "three", "ii" => "two"} + * a.rassoc("two") #=> [2, "two"] + * a.rassoc("four") #=> nil + */ + +VALUE +rb_hash_rassoc(VALUE hash, VALUE obj) +{ + VALUE args[2]; + + args[0] = obj; + args[1] = Qnil; + rb_hash_foreach(hash, rassoc_i, (st_data_t)args); + return args[1]; +} + +/* + * call-seq: + * hash.flatten -> an_array + * hash.flatten(level) -> an_array + * + * Returns a new array that is a one-dimensional flattening of this + * hash. That is, for every key or value that is an array, extract + * its elements into the new array. Unlike Array#flatten, this + * method does not flatten recursively by default. If the optional + * level argument determines the level of recursion to flatten. + * + * a = {1=> "one", 2 => [2,"two"], 3 => "three"} + * a.flatten # => [1, "one", 2, [2, "two"], 3, "three"] + * a.flatten(2) # => [1, "one", 2, 2, "two", 3, "three"] + */ + +static VALUE +rb_hash_flatten(int argc, VALUE *argv, VALUE hash) +{ + VALUE ary, tmp; + + ary = rb_hash_to_a(hash); + if (argc == 0) { + argc = 1; + tmp = INT2FIX(1); + argv = &tmp; + } + rb_funcall2(ary, rb_intern("flatten!"), argc, argv); + return ary; +} + +/* + * call-seq: + * hsh.compare_by_identity => hsh + * + * Makes hsh to compare its keys by their identity, i.e. it + * will consider exact same objects as same keys. + * + * h1 = { "a" => 100, "b" => 200, :c => "c" } + * h1["a"] #=> 100 + * h1.compare_by_identity + * h1.compare_by_identity? #=> true + * h1["a"] #=> nil # different objects. + * h1[:c] #=> "c" # same symbols are all same. + * + */ + +static VALUE +rb_hash_compare_by_id(VALUE hash) +{ + rb_hash_modify(hash); + RHASH(hash)->ntbl->type = &identhash; + rb_hash_rehash(hash); + return hash; +} + +/* + * call-seq: + * hsh.compare_by_identity? => true or false + * + * Returns true if hsh will compare its keys by + * their identity. Also see Hash#compare_by_identity. + * + */ + +static VALUE +rb_hash_compare_by_id_p(VALUE hash) +{ + if (!RHASH(hash)->ntbl) + return Qfalse; + if (RHASH(hash)->ntbl->type == &identhash) { + return Qtrue; + } + return Qfalse; +} + +static int path_tainted = -1; + +static char **origenviron; +#ifdef _WIN32 +#define GET_ENVIRON(e) (e = rb_w32_get_environ()) +#define FREE_ENVIRON(e) rb_w32_free_environ(e) +static char **my_environ; +#undef environ +#define environ my_environ +#elif defined(__APPLE__) +#undef environ +#define environ (*_NSGetEnviron()) +#define GET_ENVIRON(e) (e) +#define FREE_ENVIRON(e) +#else +extern char **environ; +#define GET_ENVIRON(e) (e) +#define FREE_ENVIRON(e) +#endif +#ifdef ENV_IGNORECASE +#define ENVMATCH(s1, s2) (STRCASECMP(s1, s2) == 0) +#define ENVNMATCH(s1, s2, n) (STRNCASECMP(s1, s2, n) == 0) +#else +#define ENVMATCH(n1, n2) (strcmp(n1, n2) == 0) +#define ENVNMATCH(s1, s2, n) (memcmp(s1, s2, n) == 0) +#endif + +static VALUE +env_str_new(const char *ptr, long len) +{ + VALUE str = rb_tainted_str_new(ptr, len); + + rb_obj_freeze(str); + return str; +} + +static VALUE +env_str_new2(const char *ptr) +{ + if (!ptr) return Qnil; + return env_str_new(ptr, strlen(ptr)); +} + +static VALUE +env_delete(VALUE obj, VALUE name) +{ + char *nam, *val; + + rb_secure(4); + SafeStringValue(name); + nam = RSTRING_PTR(name); + if (strlen(nam) != RSTRING_LEN(name)) { + rb_raise(rb_eArgError, "bad environment variable name"); + } + val = getenv(nam); + if (val) { + VALUE value = env_str_new2(val); + + ruby_setenv(nam, 0); + if (ENVMATCH(nam, PATH_ENV)) { + path_tainted = 0; + } + return value; + } + return Qnil; +} + +static VALUE +env_delete_m(VALUE obj, VALUE name) +{ + VALUE val; + + val = env_delete(obj, name); + if (NIL_P(val) && rb_block_given_p()) rb_yield(name); + return val; +} + +static VALUE +rb_f_getenv(VALUE obj, VALUE name) +{ + char *nam, *env; + + rb_secure(4); + SafeStringValue(name); + nam = RSTRING_PTR(name); + if (strlen(nam) != RSTRING_LEN(name)) { + rb_raise(rb_eArgError, "bad environment variable name"); + } + env = getenv(nam); + if (env) { + if (ENVMATCH(nam, PATH_ENV) && !rb_env_path_tainted()) { + VALUE str = rb_str_new2(env); + + rb_obj_freeze(str); + return str; + } + return env_str_new2(env); + } + return Qnil; +} + +static VALUE +env_fetch(int argc, VALUE *argv) +{ + VALUE key, if_none; + long block_given; + char *nam, *env; + + rb_secure(4); + rb_scan_args(argc, argv, "11", &key, &if_none); + block_given = rb_block_given_p(); + if (block_given && argc == 2) { + rb_warn("block supersedes default value argument"); + } + SafeStringValue(key); + nam = RSTRING_PTR(key); + if (strlen(nam) != RSTRING_LEN(key)) { + rb_raise(rb_eArgError, "bad environment variable name"); + } + env = getenv(nam); + if (!env) { + if (block_given) return rb_yield(key); + if (argc == 1) { + rb_raise(rb_eKeyError, "key not found"); + } + return if_none; + } + if (ENVMATCH(nam, PATH_ENV) && !rb_env_path_tainted()) + return rb_str_new2(env); + return env_str_new2(env); +} + +static void +path_tainted_p(char *path) +{ + path_tainted = rb_path_check(path)?0:1; +} + +int +rb_env_path_tainted(void) +{ + if (path_tainted < 0) { + path_tainted_p(getenv(PATH_ENV)); + } + return path_tainted; +} + +#if !defined(_WIN32) && !(defined(HAVE_SETENV) && defined(HAVE_UNSETENV)) +static int +envix(const char *nam) +{ + register int i, len = strlen(nam); + char **env; + + env = GET_ENVIRON(environ); + for (i = 0; env[i]; i++) { + if (ENVNMATCH(env[i],nam,len) && env[i][len] == '=') + break; /* memcmp must come first to avoid */ + } /* potential SEGV's */ + FREE_ENVIRON(environ); + return i; +} +#endif + +void +ruby_setenv(const char *name, const char *value) +{ +#if defined(_WIN32) + /* The sane way to deal with the environment. + * Has these advantages over putenv() & co.: + * * enables us to store a truly empty value in the + * environment (like in UNIX). + * * we don't have to deal with RTL globals, bugs and leaks. + * * Much faster. + * Why you may want to enable USE_WIN32_RTL_ENV: + * * environ[] and RTL functions will not reflect changes, + * which might be an issue if extensions want to access + * the env. via RTL. This cuts both ways, since RTL will + * not see changes made by extensions that call the Win32 + * functions directly, either. + * GSAR 97-06-07 + * + * REMARK: USE_WIN32_RTL_ENV is already obsoleted since we don't use + * RTL's environ global variable directly yet. + */ + SetEnvironmentVariable(name,value); +#elif defined(HAVE_SETENV) && defined(HAVE_UNSETENV) +#undef setenv +#undef unsetenv + if (value) + setenv(name,value,1); + else + unsetenv(name); +#else /* WIN32 */ + size_t len; + int i=envix(name); /* where does it go? */ + + if (environ == origenviron) { /* need we copy environment? */ + int j; + int max; + char **tmpenv; + + for (max = i; environ[max]; max++) ; + tmpenv = ALLOC_N(char*, max+2); + for (j=0; j= 4) { + rb_raise(rb_eSecurityError, "can't change environment variable"); + } + + if (NIL_P(val)) { + env_delete(obj, nm); + return Qnil; + } + StringValue(nm); + StringValue(val); + name = RSTRING_PTR(nm); + value = RSTRING_PTR(val); + if (strlen(name) != RSTRING_LEN(nm)) + rb_raise(rb_eArgError, "bad environment variable name"); + if (strlen(value) != RSTRING_LEN(val)) + rb_raise(rb_eArgError, "bad environment variable value"); + + ruby_setenv(name, value); + if (ENVMATCH(name, PATH_ENV)) { + if (OBJ_TAINTED(val)) { + /* already tainted, no check */ + path_tainted = 1; + return val; + } + else { + path_tainted_p(value); + } + } + return val; +} + +static VALUE +env_keys(void) +{ + char **env; + VALUE ary; + + rb_secure(4); + ary = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_ary_push(ary, env_str_new(*env, s-*env)); + } + env++; + } + FREE_ENVIRON(environ); + return ary; +} + +static VALUE +env_each_key(VALUE ehash) +{ + VALUE keys; + long i; + + RETURN_ENUMERATOR(ehash, 0, 0); + keys = env_keys(); /* rb_secure(4); */ + for (i=0; i"); + i = rb_inspect(rb_str_new2(s+1)); + rb_str_buf_append(str, i); + } + env++; + } + FREE_ENVIRON(environ); + rb_str_buf_cat2(str, "}"); + OBJ_TAINT(str); + + return str; +} + +static VALUE +env_to_a(void) +{ + char **env; + VALUE ary; + + rb_secure(4); + ary = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env), + env_str_new2(s+1))); + } + env++; + } + FREE_ENVIRON(environ); + return ary; +} + +static VALUE +env_none(void) +{ + return Qnil; +} + +static VALUE +env_size(void) +{ + int i; + char **env; + + rb_secure(4); + env = GET_ENVIRON(environ); + for(i=0; env[i]; i++) + ; + FREE_ENVIRON(environ); + return INT2FIX(i); +} + +static VALUE +env_empty_p(void) +{ + char **env; + + rb_secure(4); + env = GET_ENVIRON(environ); + if (env[0] == 0) { + FREE_ENVIRON(environ); + return Qtrue; + } + FREE_ENVIRON(environ); + return Qfalse; +} + +static VALUE +env_has_key(VALUE env, VALUE key) +{ + char *s; + + rb_secure(4); + s = StringValuePtr(key); + if (strlen(s) != RSTRING_LEN(key)) + rb_raise(rb_eArgError, "bad environment variable name"); + if (getenv(s)) return Qtrue; + return Qfalse; +} + +static VALUE +env_assoc(VALUE env, VALUE key) +{ + char *s, *e; + + rb_secure(4); + s = StringValuePtr(key); + if (strlen(s) != RSTRING_LEN(key)) + rb_raise(rb_eArgError, "bad environment variable name"); + e = getenv(s); + if (e) return rb_assoc_new(key, rb_tainted_str_new2(e)); + return Qnil; +} + +static VALUE +env_has_value(VALUE dmy, VALUE obj) +{ + char **env; + + rb_secure(4); + obj = rb_check_string_type(obj); + if (NIL_P(obj)) return Qnil; + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s++) { + long len = strlen(s); + if (RSTRING_LEN(obj) == len && strncmp(s, RSTRING_PTR(obj), len) == 0) { + FREE_ENVIRON(environ); + return Qtrue; + } + } + env++; + } + FREE_ENVIRON(environ); + return Qfalse; +} + +static VALUE +env_rassoc(VALUE dmy, VALUE obj) +{ + char **env; + + rb_secure(4); + obj = rb_check_string_type(obj); + if (NIL_P(obj)) return Qnil; + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s++) { + long len = strlen(s); + if (RSTRING_LEN(obj) == len && strncmp(s, RSTRING_PTR(obj), len) == 0) { + VALUE result = rb_assoc_new(rb_tainted_str_new(*env, s-*env-1), obj); + FREE_ENVIRON(environ); + return result; + } + } + env++; + } + FREE_ENVIRON(environ); + return Qnil; +} + +static VALUE +env_key(VALUE dmy, VALUE value) +{ + char **env; + VALUE str; + + rb_secure(4); + StringValue(value); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s++) { + long len = strlen(s); + if (RSTRING_LEN(value) == len && strncmp(s, RSTRING_PTR(value), len) == 0) { + str = env_str_new(*env, s-*env-1); + FREE_ENVIRON(environ); + return str; + } + } + env++; + } + FREE_ENVIRON(environ); + return Qnil; +} + +static VALUE +env_index(VALUE dmy, VALUE value) +{ + rb_warn("ENV.index is deprecated; use ENV.key"); + return env_key(dmy, value); +} + +static VALUE +env_to_hash(void) +{ + char **env; + VALUE hash; + + rb_secure(4); + hash = rb_hash_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_hash_aset(hash, env_str_new(*env, s-*env), + env_str_new2(s+1)); + } + env++; + } + FREE_ENVIRON(environ); + return hash; +} + +static VALUE +env_reject(void) +{ + return rb_hash_delete_if(env_to_hash()); +} + +static VALUE +env_shift(void) +{ + char **env; + + rb_secure(4); + env = GET_ENVIRON(environ); + if (*env) { + char *s = strchr(*env, '='); + if (s) { + VALUE key = env_str_new(*env, s-*env); + VALUE val = env_str_new2(getenv(RSTRING_PTR(key))); + env_delete(Qnil, key); + return rb_assoc_new(key, val); + } + } + FREE_ENVIRON(environ); + return Qnil; +} + +static VALUE +env_invert(void) +{ + return rb_hash_invert(env_to_hash()); +} + +static int +env_replace_i(VALUE key, VALUE val, VALUE keys) +{ + if (key != Qundef) { + env_aset(Qnil, key, val); + if (rb_ary_includes(keys, key)) { + rb_ary_delete(keys, key); + } + } + return ST_CONTINUE; +} + +static VALUE +env_replace(VALUE env, VALUE hash) +{ + volatile VALUE keys; + long i; + + keys = env_keys(); /* rb_secure(4); */ + if (env == hash) return env; + hash = to_hash(hash); + rb_hash_foreach(hash, env_replace_i, keys); + + for (i=0; iHash is a collection of key-value pairs. It is + * similar to an Array, except that indexing is done via + * arbitrary keys of any object type, not an integer index. The order + * in which you traverse a hash by either key or value may seem + * arbitrary, and will generally not be in the insertion order. + * + * Hashes have a default value that is returned when accessing + * keys that do not exist in the hash. By default, that value is + * nil. + * + */ + +void +Init_Hash(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + id_hash = rb_intern("hash"); + id_yield = rb_intern("yield"); + id_default = rb_intern("default"); + + rb_cHash = rb_define_class("Hash", rb_cObject); + + rb_include_module(rb_cHash, rb_mEnumerable); + + rb_define_alloc_func(rb_cHash, hash_alloc); + rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1); + rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1); + rb_define_method(rb_cHash,"initialize", rb_hash_initialize, -1); + rb_define_method(rb_cHash,"initialize_copy", rb_hash_replace, 1); + rb_define_method(rb_cHash,"rehash", rb_hash_rehash, 0); + + rb_define_method(rb_cHash,"to_hash", rb_hash_to_hash, 0); + rb_define_method(rb_cHash,"to_a", rb_hash_to_a, 0); + rb_define_method(rb_cHash,"to_s", rb_hash_inspect, 0); + rb_define_method(rb_cHash,"inspect", rb_hash_inspect, 0); + + rb_define_method(rb_cHash,"==", rb_hash_equal, 1); + rb_define_method(rb_cHash,"[]", rb_hash_aref, 1); + rb_define_method(rb_cHash,"hash", rb_hash_hash, 0); + rb_define_method(rb_cHash,"eql?", rb_hash_eql, 1); + rb_define_method(rb_cHash,"fetch", rb_hash_fetch_m, -1); + rb_define_method(rb_cHash,"[]=", rb_hash_aset, 2); + rb_define_method(rb_cHash,"store", rb_hash_aset, 2); + rb_define_method(rb_cHash,"default", rb_hash_default, -1); + rb_define_method(rb_cHash,"default=", rb_hash_set_default, 1); + rb_define_method(rb_cHash,"default_proc", rb_hash_default_proc, 0); + rb_define_method(rb_cHash,"default_proc=", rb_hash_set_default_proc, 1); + rb_define_method(rb_cHash,"key", rb_hash_key, 1); + rb_define_method(rb_cHash,"index", rb_hash_index, 1); + rb_define_method(rb_cHash,"size", rb_hash_size, 0); + rb_define_method(rb_cHash,"length", rb_hash_size, 0); + rb_define_method(rb_cHash,"empty?", rb_hash_empty_p, 0); + + rb_define_method(rb_cHash,"each_value", rb_hash_each_value, 0); + rb_define_method(rb_cHash,"each_key", rb_hash_each_key, 0); + rb_define_method(rb_cHash,"each_pair", rb_hash_each_pair, 0); + rb_define_method(rb_cHash,"each", rb_hash_each_pair, 0); + + rb_define_method(rb_cHash,"keys", rb_hash_keys, 0); + rb_define_method(rb_cHash,"values", rb_hash_values, 0); + rb_define_method(rb_cHash,"values_at", rb_hash_values_at, -1); + + rb_define_method(rb_cHash,"shift", rb_hash_shift, 0); + rb_define_method(rb_cHash,"delete", rb_hash_delete, 1); + rb_define_method(rb_cHash,"delete_if", rb_hash_delete_if, 0); + rb_define_method(rb_cHash,"select", rb_hash_select, 0); + rb_define_method(rb_cHash,"reject", rb_hash_reject, 0); + rb_define_method(rb_cHash,"reject!", rb_hash_reject_bang, 0); + rb_define_method(rb_cHash,"clear", rb_hash_clear, 0); + rb_define_method(rb_cHash,"invert", rb_hash_invert, 0); + rb_define_method(rb_cHash,"update", rb_hash_update, 1); + rb_define_method(rb_cHash,"replace", rb_hash_replace, 1); + rb_define_method(rb_cHash,"merge!", rb_hash_update, 1); + rb_define_method(rb_cHash,"merge", rb_hash_merge, 1); + rb_define_method(rb_cHash, "assoc", rb_hash_assoc, 1); + rb_define_method(rb_cHash, "rassoc", rb_hash_rassoc, 1); + rb_define_method(rb_cHash, "flatten", rb_hash_flatten, -1); + + rb_define_method(rb_cHash,"include?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"member?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"has_key?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"has_value?", rb_hash_has_value, 1); + rb_define_method(rb_cHash,"key?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"value?", rb_hash_has_value, 1); + + rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0); + rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0); + + origenviron = environ; + envtbl = rb_obj_alloc(rb_cObject); + rb_extend_object(envtbl, rb_mEnumerable); + + rb_define_singleton_method(envtbl,"[]", rb_f_getenv, 1); + rb_define_singleton_method(envtbl,"fetch", env_fetch, -1); + rb_define_singleton_method(envtbl,"[]=", env_aset, 2); + rb_define_singleton_method(envtbl,"store", env_aset, 2); + rb_define_singleton_method(envtbl,"each", env_each_pair, 0); + rb_define_singleton_method(envtbl,"each_pair", env_each_pair, 0); + rb_define_singleton_method(envtbl,"each_key", env_each_key, 0); + rb_define_singleton_method(envtbl,"each_value", env_each_value, 0); + rb_define_singleton_method(envtbl,"delete", env_delete_m, 1); + rb_define_singleton_method(envtbl,"delete_if", env_delete_if, 0); + rb_define_singleton_method(envtbl,"clear", rb_env_clear, 0); + rb_define_singleton_method(envtbl,"reject", env_reject, 0); + rb_define_singleton_method(envtbl,"reject!", env_reject_bang, 0); + rb_define_singleton_method(envtbl,"select", env_select, 0); + rb_define_singleton_method(envtbl,"shift", env_shift, 0); + rb_define_singleton_method(envtbl,"invert", env_invert, 0); + rb_define_singleton_method(envtbl,"replace", env_replace, 1); + rb_define_singleton_method(envtbl,"update", env_update, 1); + rb_define_singleton_method(envtbl,"inspect", env_inspect, 0); + rb_define_singleton_method(envtbl,"rehash", env_none, 0); + rb_define_singleton_method(envtbl,"to_a", env_to_a, 0); + rb_define_singleton_method(envtbl,"to_s", env_to_s, 0); + rb_define_singleton_method(envtbl,"key", env_key, 1); + rb_define_singleton_method(envtbl,"index", env_index, 1); + rb_define_singleton_method(envtbl,"size", env_size, 0); + rb_define_singleton_method(envtbl,"length", env_size, 0); + rb_define_singleton_method(envtbl,"empty?", env_empty_p, 0); + rb_define_singleton_method(envtbl,"keys", env_keys, 0); + rb_define_singleton_method(envtbl,"values", env_values, 0); + rb_define_singleton_method(envtbl,"values_at", env_values_at, -1); + rb_define_singleton_method(envtbl,"include?", env_has_key, 1); + rb_define_singleton_method(envtbl,"member?", env_has_key, 1); + rb_define_singleton_method(envtbl,"has_key?", env_has_key, 1); + rb_define_singleton_method(envtbl,"has_value?", env_has_value, 1); + rb_define_singleton_method(envtbl,"key?", env_has_key, 1); + rb_define_singleton_method(envtbl,"value?", env_has_value, 1); + rb_define_singleton_method(envtbl,"to_hash", env_to_hash, 0); + rb_define_singleton_method(envtbl,"assoc", env_assoc, 1); + rb_define_singleton_method(envtbl,"rassoc", env_rassoc, 1); + + rb_define_global_const("ENV", envtbl); +} diff --git a/ia64.s b/ia64.s new file mode 100644 index 0000000..92415b4 --- /dev/null +++ b/ia64.s @@ -0,0 +1,42 @@ +// rb_ia64_flushrs and rb_ia64_bsp is written in IA64 assembly language +// because Intel Compiler for IA64 doesn't support inline assembly. +// +// This file is based on following C program compiled by gcc. +// +// void rb_ia64_flushrs(void) { __builtin_ia64_flushrs(); } +// void *rb_ia64_bsp(void) { return __builtin_ia64_bsp(); } +// +// Note that rb_ia64_flushrs and rb_ia64_bsp works in its own stack frame. +// It's because BSP is updated by br.call/brl.call (not alloc instruction). +// So rb_ia64_flushrs flushes stack frames including caller's one. +// rb_ia64_bsp returns the address next to caller's register stack frame. +// +// See also +// Intel Itanium Architecture Software Developer's Manual +// Volume 2: System Architecture. +// + .file "ia64.c" + .text + .align 16 + .global rb_ia64_flushrs# + .proc rb_ia64_flushrs# +rb_ia64_flushrs: + .prologue + .body + flushrs + ;; + nop.i 0 + br.ret.sptk.many b0 + .endp rb_ia64_flushrs# + .align 16 + .global rb_ia64_bsp# + .proc rb_ia64_bsp# +rb_ia64_bsp: + .prologue + .body + nop.m 0 + ;; + mov r8 = ar.bsp + br.ret.sptk.many b0 + .endp rb_ia64_bsp# + .ident "GCC: (GNU) 3.3.5 (Debian 1:3.3.5-13)" diff --git a/id.c b/id.c new file mode 100644 index 0000000..7ce211a --- /dev/null +++ b/id.c @@ -0,0 +1,50 @@ +/********************************************************************** + + id.c - + + $Author: nobu $ + created at: Thu Jul 12 04:37:51 2007 + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" + +#include "id.h" + +static void +Init_id(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + rb_encoding *enc = rb_usascii_encoding(); + + REGISTER_SYMID(idNULL, ""); + REGISTER_SYMID(idIFUNC, ""), + REGISTER_SYMID(idCFUNC, ""), + REGISTER_SYMID(idRespond_to, "respond_to?"), + REGISTER_SYMID(idThrowState, "#__ThrowState__"), + + REGISTER_SYMID(id_core_set_method_alias, "core#set_method_alias"), + REGISTER_SYMID(id_core_set_variable_alias, "core#set_variable_alias"), + REGISTER_SYMID(id_core_undef_method, "core#undef_method"), + REGISTER_SYMID(id_core_define_method, "core#define_method"), + REGISTER_SYMID(id_core_define_singleton_method, "core#define_singleton_method"), + REGISTER_SYMID(id_core_set_postexe, "core#set_postexe"), + + REGISTER_SYMID(idEach, "each"); + REGISTER_SYMID(idLength, "length"); + REGISTER_SYMID(idLambda, "lambda"); + REGISTER_SYMID(idIntern, "intern"); + REGISTER_SYMID(idGets, "gets"); + REGISTER_SYMID(idSucc, "succ"); + REGISTER_SYMID(idMethodMissing, "method_missing"); +#if SUPPORT_JOKE + REGISTER_SYMID(idBitblt, "bitblt"); + REGISTER_SYMID(idAnswer, "the_answer_to_life_the_universe_and_everything"); +#endif + REGISTER_SYMID(idSend, "send"); + REGISTER_SYMID(id__send__, "__send__"); + REGISTER_SYMID(idInitialize, "initialize"); +} diff --git a/id.h b/id.h new file mode 100644 index 0000000..12499d0 --- /dev/null +++ b/id.h @@ -0,0 +1,163 @@ +/* DO NOT EDIT THIS FILE DIRECTLY */ +/********************************************************************** + + id.h - + + $Author: nobu $ + created at: Sun Oct 19 21:12:51 2008 + + Copyright (C) 2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_ID_H +#define RUBY_ID_H + +#define ID_SCOPE_SHIFT 3 +#define ID_SCOPE_MASK 0x07 +#define ID_LOCAL 0x00 +#define ID_INSTANCE 0x01 +#define ID_GLOBAL 0x03 +#define ID_ATTRSET 0x04 +#define ID_CONST 0x05 +#define ID_CLASS 0x06 +#define ID_JUNK 0x07 +#define ID_INTERNAL ID_JUNK + +#ifdef USE_PARSE_H +#include "parse.h" +#endif + +#define symIFUNC ID2SYM(idIFUNC) +#define symCFUNC ID2SYM(idCFUNC) + +#if !defined tLAST_TOKEN && defined YYTOKENTYPE +#define tLAST_TOKEN tLAST_TOKEN +#endif + +enum ruby_method_ids { +#ifndef tLAST_TOKEN + tUPLUS = 321, + tUMINUS = 322, + tPOW = 323, + tCMP = 324, + tEQ = 325, + tEQQ = 326, + tNEQ = 327, + tGEQ = 328, + tLEQ = 329, + tANDOP = 330, + tOROP = 331, + tMATCH = 332, + tNMATCH = 333, + tDOT2 = 334, + tDOT3 = 335, + tAREF = 336, + tASET = 337, + tLSHFT = 338, + tRSHFT = 339, + tLAMBDA = 352, + idNULL = 365, + idRespond_to = 366, + idIFUNC = 367, + idCFUNC = 368, + idThrowState = 369, + id_core_set_method_alias = 370, + id_core_set_variable_alias = 371, + id_core_undef_method = 372, + id_core_define_method = 373, + id_core_define_singleton_method = 374, + id_core_set_postexe = 375, + tLAST_TOKEN = 376, +#endif + idPLUS = '+', + idMINUS = '-', + idMULT = '*', + idDIV = '/', + idMOD = '%', + idLT = '<', + idLTLT = tLSHFT, + idLE = tLEQ, + idGT = '>', + idGE = tGEQ, + idEq = tEQ, + idEqq = tEQQ, + idNeq = tNEQ, + idNot = '!', + idBackquote = '`', + idEqTilde = tMATCH, + idAREF = tAREF, + idASET = tASET, + idLAST_TOKEN = tLAST_TOKEN >> ID_SCOPE_SHIFT, + tIntern, + tMethodMissing, + tLength, + tGets, + tSucc, + tEach, + tLambda, + tSend, + t__send__, + tInitialize, +#if SUPPORT_JOKE + tBitblt, + tAnswer, +#endif + tLAST_ID, +#define TOKEN2ID(n) id##n = ((t##n< +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#undef _ +#ifdef HAVE_PROTOTYPES +# define _(args) args +#else +# define _(args) () +#endif + +#undef __ +#ifdef HAVE_STDARG_PROTOTYPES +# define __(args) args +#else +# define __(args) () +#endif + +#ifdef __cplusplus +#define ANYARGS ... +#else +#define ANYARGS +#endif + +#define xmalloc ruby_xmalloc +#define xmalloc2 ruby_xmalloc2 +#define xcalloc ruby_xcalloc +#define xrealloc ruby_xrealloc +#define xrealloc2 ruby_xrealloc2 +#define xfree ruby_xfree + +void *xmalloc(size_t); +void *xmalloc2(size_t,size_t); +void *xcalloc(size_t,size_t); +void *xrealloc(void*,size_t); +void *xrealloc2(void*,size_t,size_t); +void xfree(void*); + +#define STRINGIZE(expr) STRINGIZE0(expr) +#ifndef STRINGIZE0 +#define STRINGIZE0(expr) #expr +#endif + +#if SIZEOF_LONG_LONG > 0 +# define LONG_LONG long long +#elif SIZEOF___INT64 > 0 +# define HAVE_LONG_LONG 1 +# define LONG_LONG __int64 +# undef SIZEOF_LONG_LONG +# define SIZEOF_LONG_LONG SIZEOF___INT64 +#endif + +#if SIZEOF_INT*2 <= SIZEOF_LONG_LONG +# define BDIGIT unsigned int +# define SIZEOF_BDIGITS SIZEOF_INT +# define BDIGIT_DBL unsigned LONG_LONG +# define BDIGIT_DBL_SIGNED LONG_LONG +#elif SIZEOF_INT*2 <= SIZEOF_LONG +# define BDIGIT unsigned int +# define SIZEOF_BDIGITS SIZEOF_INT +# define BDIGIT_DBL unsigned long +# define BDIGIT_DBL_SIGNED long +#elif SIZEOF_SHORT*2 <= SIZEOF_LONG +# define BDIGIT unsigned short +# define SIZEOF_BDIGITS SIZEOF_SHORT +# define BDIGIT_DBL unsigned long +# define BDIGIT_DBL_SIGNED long +#else +# define BDIGIT unsigned short +# define SIZEOF_BDIGITS (SIZEOF_LONG/2) +# define BDIGIT_DBL unsigned long +# define BDIGIT_DBL_SIGNED long +#endif + +#ifdef __CYGWIN__ +#undef _WIN32 +#endif + +#if defined(_WIN32) || defined(__EMX__) +#define DOSISH 1 +# define DOSISH_DRIVE_LETTER +#endif + +#if defined(__NeXT__) || defined(__APPLE__) +/* Do not trust WORDS_BIGENDIAN from configure since -arch compiler flag may + result in a different endian. Instead trust __BIG_ENDIAN__ and + __LITTLE_ENDIAN__ which are set correctly by -arch. */ +#undef WORDS_BIGENDIAN +#ifdef __BIG_ENDIAN__ +#define WORDS_BIGENDIAN +#endif +#endif + +#ifdef __NeXT__ +/* NextStep, OpenStep, Rhapsody */ +#ifndef S_IRUSR +#define S_IRUSR 0000400 /* read permission, owner */ +#endif +#ifndef S_IRGRP +#define S_IRGRP 0000040 /* read permission, group */ +#endif +#ifndef S_IROTH +#define S_IROTH 0000004 /* read permission, other */ +#endif +#ifndef S_IWUSR +#define S_IWUSR 0000200 /* write permission, owner */ +#endif +#ifndef S_IWGRP +#define S_IWGRP 0000020 /* write permission, group */ +#endif +#ifndef S_IWOTH +#define S_IWOTH 0000002 /* write permission, other */ +#endif +#ifndef S_IXUSR +#define S_IXUSR 0000100 /* execute/search permission, owner */ +#endif +#ifndef S_IXGRP +#define S_IXGRP 0000010 /* execute/search permission, group */ +#endif +#ifndef S_IXOTH +#define S_IXOTH 0000001 /* execute/search permission, other */ +#endif +#ifndef S_IRWXU +#define S_IRWXU 0000700 /* read, write, execute permissions, owner */ +#endif +#ifndef S_IRWXG +#define S_IRWXG 0000070 /* read, write, execute permissions, group */ +#endif +#ifndef S_IRWXO +#define S_IRWXO 0000007 /* read, write, execute permissions, other */ +#endif +#ifndef S_ISBLK +#define S_ISBLK(mode) (((mode) & (0170000)) == (0060000)) +#endif +#ifndef S_ISCHR +#define S_ISCHR(mode) (((mode) & (0170000)) == (0020000)) +#endif +#ifndef S_ISDIR +#define S_ISDIR(mode) (((mode) & (0170000)) == (0040000)) +#endif +#ifndef S_ISFIFO +#define S_ISFIFO(mode) (((mode) & (0170000)) == (0010000)) +#endif +#ifndef S_ISREG +#define S_ISREG(mode) (((mode) & (0170000)) == (0100000)) +#endif +#ifndef __APPLE__ +/* NextStep, OpenStep (but not Rhapsody) */ +#ifndef GETPGRP_VOID +#define GETPGRP_VOID 1 +#endif +#ifndef WNOHANG +#define WNOHANG 01 +#endif +#ifndef WUNTRACED +#define WUNTRACED 02 +#endif +#ifndef X_OK +#define X_OK 1 +#endif +#endif /* __APPLE__ */ +#endif /* NeXT */ + +#ifdef _WIN32 +#include "ruby/win32.h" +#endif + +#if defined(__BEOS__) && !defined(__HAIKU__) && !defined(BONE) +#include /* intern.h needs fd_set definition */ +#endif + +#ifdef RUBY_EXPORT +#undef RUBY_EXTERN +#endif + +#ifndef RUBY_EXTERN +#define RUBY_EXTERN extern +#endif + +#ifndef EXTERN +#define EXTERN RUBY_EXTERN /* deprecated */ +#endif + +#ifndef RUBY_MBCHAR_MAXSIZE +#define RUBY_MBCHAR_MAXSIZE INT_MAX + /* MB_CUR_MAX will not work well in C locale */ +#endif + +#if defined(sparc) || defined(__sparc__) +static inline void +flush_register_windows(void) +{ + asm +#ifdef __GNUC__ + volatile +#endif +# if defined(__sparc_v9__) || defined(__sparcv9) || defined(__arch64__) + ("flushw") +# else + ("ta 0x03") +# endif /* trap always to flush register windows if we are on a Sparc system */ + ; +} +# define FLUSH_REGISTER_WINDOWS flush_register_windows() +#elif defined(__ia64) +void *rb_ia64_bsp(void); +void rb_ia64_flushrs(void); +# define FLUSH_REGISTER_WINDOWS rb_ia64_flushrs() +#else +# define FLUSH_REGISTER_WINDOWS ((void)0) +#endif + +#if defined(DOSISH) +#define PATH_SEP ";" +#else +#define PATH_SEP ":" +#endif +#define PATH_SEP_CHAR PATH_SEP[0] + +#define PATH_ENV "PATH" + +#if defined(DOSISH) && !defined(__EMX__) +#define ENV_IGNORECASE +#endif + +#ifndef CASEFOLD_FILESYSTEM +# if defined DOSISH +# define CASEFOLD_FILESYSTEM 1 +# else +# define CASEFOLD_FILESYSTEM 0 +# endif +#endif + +#ifndef DLEXT_MAXLEN +#define DLEXT_MAXLEN 4 +#endif + +#ifndef RUBY_PLATFORM +#define RUBY_PLATFORM "unknown-unknown" +#endif + +#ifndef RUBY_ALIAS_FUNCTION +#define RUBY_ALIAS_FUNCTION(old_prot, new_name, args) \ + VALUE old_prot {return new_name args;} +#endif + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_DEFINES_H */ diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h new file mode 100644 index 0000000..6a808a5 --- /dev/null +++ b/include/ruby/encoding.h @@ -0,0 +1,297 @@ +/********************************************************************** + + encoding.h - + + $Author: matz $ + created at: Thu May 24 11:49:41 JST 2007 + + Copyright (C) 2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_ENCODING_H +#define RUBY_ENCODING_H 1 + +#include +#include "ruby/oniguruma.h" + +#define ENCODING_INLINE_MAX 1023 +#define ENCODING_SHIFT (FL_USHIFT+10) +#define ENCODING_MASK (ENCODING_INLINE_MAX<flags &= ~ENCODING_MASK;\ + RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\ +} while (0) +#define ENCODING_SET(obj,i) do {\ + VALUE rb_encoding_set_obj = (obj); \ + int encoding_set_enc_index = (i); \ + if (encoding_set_enc_index < ENCODING_INLINE_MAX) \ + ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \ + else \ + rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \ +} while (0) + +#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) +#define ENCODING_GET(obj) \ + (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \ + ENCODING_GET_INLINED(obj) : \ + rb_enc_get_index(obj)) + +#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0) + +#define ENCODING_MAXNAMELEN 42 + +#define ENC_CODERANGE_MASK (FL_USER8|FL_USER9) +#define ENC_CODERANGE_UNKNOWN 0 +#define ENC_CODERANGE_7BIT FL_USER8 +#define ENC_CODERANGE_VALID FL_USER9 +#define ENC_CODERANGE_BROKEN (FL_USER8|FL_USER9) +#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK) +#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT) +#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \ + (RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr)) +#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0) + +/* assumed ASCII compatibility */ +#define ENC_CODERANGE_AND(a, b) \ + (a == ENC_CODERANGE_7BIT ? b : \ + a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \ + ENC_CODERANGE_UNKNOWN) + +#define ENCODING_CODERANGE_SET(obj, encindex, cr) \ + do { \ + VALUE rb_encoding_coderange_obj = (obj); \ + ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \ + ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \ + } while (0) + +typedef OnigEncodingType rb_encoding; + +int rb_enc_replicate(const char *, rb_encoding *); +int rb_define_dummy_encoding(const char *); +#define rb_enc_to_index(enc) ((enc) ? ENC_TO_ENCINDEX(enc) : 0) +int rb_enc_get_index(VALUE obj); +void rb_enc_set_index(VALUE obj, int encindex); +int rb_enc_find_index(const char *name); +int rb_to_encoding_index(VALUE); +rb_encoding* rb_to_encoding(VALUE); +rb_encoding* rb_enc_get(VALUE); +rb_encoding* rb_enc_compatible(VALUE,VALUE); +rb_encoding* rb_enc_check(VALUE,VALUE); +VALUE rb_enc_associate_index(VALUE, int); +VALUE rb_enc_associate(VALUE, rb_encoding*); +void rb_enc_copy(VALUE dst, VALUE src); + +VALUE rb_enc_str_new(const char*, long, rb_encoding*); +VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int); +PRINTF_ARGS(VALUE rb_enc_sprintf(rb_encoding *, const char*, ...), 2, 3); +VALUE rb_enc_vsprintf(rb_encoding *, const char*, va_list); +long rb_enc_strlen(const char*, const char*, rb_encoding*); +char* rb_enc_nth(const char*, const char*, int, rb_encoding*); +VALUE rb_obj_encoding(VALUE); +VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc); + +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *); +VALUE rb_str_export_to_enc(VALUE, rb_encoding *); +VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to); + +/* index -> rb_encoding */ +rb_encoding* rb_enc_from_index(int idx); + +/* name -> rb_encoding */ +rb_encoding * rb_enc_find(const char *name); + +/* rb_encoding * -> name */ +#define rb_enc_name(enc) (enc)->name + +/* rb_encoding * -> minlen/maxlen */ +#define rb_enc_mbminlen(enc) (enc)->min_enc_len +#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len + +/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */ +int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc); + +/* -> chlen, invalid or needmore */ +int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc); +#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) +#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) +#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) +#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) +#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) + +/* -> 0x00..0x7f, -1 */ +int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc); + +/* -> code or raise exception */ +unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc); +#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) + +/* -> codelen>0 or raise exception */ +int rb_enc_codelen(int code, rb_encoding *enc); + +/* code,ptr,encoding -> write buf */ +#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf)) + +/* start, ptr, end, encoding -> prev_char */ +#define rb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) +/* start, ptr, end, encoding -> next_char */ +#define rb_enc_left_char_head(s,p,e,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) +#define rb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) + +/* ptr, ptr, encoding -> newline_or_not */ +#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end)) + +#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) +#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) +#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c) +#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c) +#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c) +#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT(enc,c) +#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c) +#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c) +#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c) +#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c) + +#define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc)) + +int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc); +int rb_enc_toupper(int c, rb_encoding *enc); +int rb_enc_tolower(int c, rb_encoding *enc); +ID rb_intern3(const char*, long, rb_encoding*); +ID rb_interned_id_p(const char *, long, rb_encoding *); +int rb_enc_symname_p(const char*, rb_encoding*); +int rb_enc_str_coderange(VALUE); +long rb_str_coderange_scan_restartable(const char*, const char*, rb_encoding*, int*); +int rb_enc_str_asciionly_p(VALUE); +#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str)) +VALUE rb_enc_from_encoding(rb_encoding *enc); +rb_encoding *rb_ascii8bit_encoding(void); +rb_encoding *rb_utf8_encoding(void); +rb_encoding *rb_usascii_encoding(void); +rb_encoding *rb_locale_encoding(void); +rb_encoding *rb_filesystem_encoding(void); +rb_encoding *rb_default_external_encoding(void); +rb_encoding *rb_default_internal_encoding(void); +int rb_ascii8bit_encindex(void); +int rb_utf8_encindex(void); +int rb_usascii_encindex(void); +VALUE rb_enc_default_external(void); +VALUE rb_enc_default_internal(void); +void rb_enc_set_default_external(VALUE encoding); +void rb_enc_set_default_internal(VALUE encoding); +VALUE rb_locale_charmap(VALUE klass); +long rb_memsearch(const void*,long,const void*,long,rb_encoding*); + +RUBY_EXTERN VALUE rb_cEncoding; +#define enc_initialized_p(enc) ((enc)->ruby_encoding_index != ENC_UNINITIALIZED) +#define ENC_DUMMY_FLAG (1<<24) +#define ENC_INDEX_MASK (~(~0U<<24)) + +#define ENC_TO_ENCINDEX(enc) ((enc)->ruby_encoding_index & ENC_INDEX_MASK) +#define ENC_FROM_ENCINDEX(idx) (RARRAY_PTR(rb_encoding_list)[idx]) +#define ENC_FROM_ENCODING(enc) ENC_FROM_ENCINDEX(ENC_TO_ENCINDEX(enc)) + +#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG) +#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG) + +static inline int +rb_enc_dummy_p(rb_encoding *enc) +{ + return ENC_DUMMY_P(enc) != 0; +} + +/* econv stuff */ + +typedef enum { + econv_invalid_byte_sequence, + econv_undefined_conversion, + econv_destination_buffer_full, + econv_source_buffer_empty, + econv_finished, + econv_after_output, + econv_incomplete_input +} rb_econv_result_t; + +typedef struct rb_econv_t rb_econv_t; + +VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); +int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); + +int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); + +rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); +rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts); + +rb_econv_result_t rb_econv_convert(rb_econv_t *ec, + const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, + unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, + int flags); +void rb_econv_close(rb_econv_t *ec); + +/* result: 0:success -1:failure */ +int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); + +/* result: 0:success -1:failure */ +int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name); +int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name); + +VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags); + +/* result: 0:success -1:failure */ +int rb_econv_insert_output(rb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding); + +/* encoding that rb_econv_insert_output doesn't need conversion */ +const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec); + +/* raise an error if the last rb_econv_convert is error */ +void rb_econv_check_error(rb_econv_t *ec); + +/* returns an exception object or nil */ +VALUE rb_econv_make_exception(rb_econv_t *ec); + +int rb_econv_putbackable(rb_econv_t *ec); +void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); + +/* returns the corresponding ASCII compatible encoding for encname, + * or NULL if encname is not ASCII incompatible encoding. */ +const char *rb_econv_asciicompat_encoding(const char *encname); + +VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); +VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); +VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags); +VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags); + +void rb_econv_binmode(rb_econv_t *ec); + +/* flags for rb_econv_open */ + +#define ECONV_ERROR_HANDLER_MASK 0x000000ff + +#define ECONV_INVALID_MASK 0x0000000f +#define ECONV_INVALID_REPLACE 0x00000002 + +#define ECONV_UNDEF_MASK 0x000000f0 +#define ECONV_UNDEF_REPLACE 0x00000020 +#define ECONV_UNDEF_HEX_CHARREF 0x00000030 + +#define ECONV_DECORATOR_MASK 0x0000ff00 + +#define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100 +#define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000 +#define ECONV_CR_NEWLINE_DECORATOR 0x00002000 +#define ECONV_XML_TEXT_DECORATOR 0x00004000 +#define ECONV_XML_ATTR_CONTENT_DECORATOR 0x00008000 + +#define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000 +#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 + +/* end of flags for rb_econv_open */ + +/* flags for rb_econv_convert */ +#define ECONV_PARTIAL_INPUT 0x00010000 +#define ECONV_AFTER_OUTPUT 0x00020000 +/* end of flags for rb_econv_convert */ + +#endif /* RUBY_ENCODING_H */ diff --git a/include/ruby/intern.h b/include/ruby/intern.h new file mode 100644 index 0000000..9399712 --- /dev/null +++ b/include/ruby/intern.h @@ -0,0 +1,800 @@ +/********************************************************************** + + intern.h - + + $Author: yugui $ + created at: Thu Jun 10 14:22:17 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#ifndef RUBY_INTERN_H +#define RUBY_INTERN_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#ifdef HAVE_STDARG_PROTOTYPES +# include +#else +# include +#endif +#include "ruby/st.h" + +/* + * Functions and variables that are used by more than one source file of + * the kernel. + */ + +#define ID_ALLOCATOR 1 + +/* array.c */ +void rb_mem_clear(register VALUE*, register long); +VALUE rb_assoc_new(VALUE, VALUE); +VALUE rb_check_array_type(VALUE); +VALUE rb_ary_new(void); +VALUE rb_ary_new2(long); +VALUE rb_ary_new3(long,...); +VALUE rb_ary_new4(long, const VALUE *); +VALUE rb_ary_tmp_new(long); +void rb_ary_free(VALUE); +VALUE rb_ary_freeze(VALUE); +VALUE rb_ary_aref(int, VALUE*, VALUE); +VALUE rb_ary_subseq(VALUE, long, long); +void rb_ary_store(VALUE, long, VALUE); +VALUE rb_ary_dup(VALUE); +VALUE rb_ary_to_ary(VALUE); +VALUE rb_ary_to_s(VALUE); +VALUE rb_ary_push(VALUE, VALUE); +VALUE rb_ary_pop(VALUE); +VALUE rb_ary_shift(VALUE); +VALUE rb_ary_unshift(VALUE, VALUE); +VALUE rb_ary_entry(VALUE, long); +VALUE rb_ary_each(VALUE); +VALUE rb_ary_join(VALUE, VALUE); +VALUE rb_ary_print_on(VALUE, VALUE); +VALUE rb_ary_reverse(VALUE); +VALUE rb_ary_sort(VALUE); +VALUE rb_ary_sort_bang(VALUE); +VALUE rb_ary_delete(VALUE, VALUE); +VALUE rb_ary_delete_at(VALUE, long); +VALUE rb_ary_clear(VALUE); +VALUE rb_ary_plus(VALUE, VALUE); +VALUE rb_ary_concat(VALUE, VALUE); +VALUE rb_ary_assoc(VALUE, VALUE); +VALUE rb_ary_rassoc(VALUE, VALUE); +VALUE rb_ary_includes(VALUE, VALUE); +VALUE rb_ary_cmp(VALUE, VALUE); +VALUE rb_ary_replace(VALUE copy, VALUE orig); +VALUE rb_get_values_at(VALUE, long, int, VALUE*, VALUE(*)(VALUE,long)); +/* bignum.c */ +VALUE rb_big_clone(VALUE); +void rb_big_2comp(VALUE); +VALUE rb_big_norm(VALUE); +void rb_big_resize(VALUE big, long len); +VALUE rb_uint2big(VALUE); +VALUE rb_int2big(SIGNED_VALUE); +VALUE rb_uint2inum(VALUE); +VALUE rb_int2inum(SIGNED_VALUE); +VALUE rb_cstr_to_inum(const char*, int, int); +VALUE rb_str_to_inum(VALUE, int, int); +VALUE rb_cstr2inum(const char*, int); +VALUE rb_str2inum(VALUE, int); +VALUE rb_big2str(VALUE, int); +VALUE rb_big2str0(VALUE, int, int); +SIGNED_VALUE rb_big2long(VALUE); +#define rb_big2int(x) rb_big2long(x) +VALUE rb_big2ulong(VALUE); +#define rb_big2uint(x) rb_big2ulong(x) +#if HAVE_LONG_LONG +VALUE rb_ll2inum(LONG_LONG); +VALUE rb_ull2inum(unsigned LONG_LONG); +LONG_LONG rb_big2ll(VALUE); +unsigned LONG_LONG rb_big2ull(VALUE); +#endif /* HAVE_LONG_LONG */ +void rb_quad_pack(char*,VALUE); +VALUE rb_quad_unpack(const char*,int); +int rb_uv_to_utf8(char[6],unsigned long); +VALUE rb_dbl2big(double); +double rb_big2dbl(VALUE); +VALUE rb_big_cmp(VALUE, VALUE); +VALUE rb_big_eq(VALUE, VALUE); +VALUE rb_big_plus(VALUE, VALUE); +VALUE rb_big_minus(VALUE, VALUE); +VALUE rb_big_mul(VALUE, VALUE); +VALUE rb_big_div(VALUE, VALUE); +VALUE rb_big_modulo(VALUE, VALUE); +VALUE rb_big_divmod(VALUE, VALUE); +VALUE rb_big_pow(VALUE, VALUE); +VALUE rb_big_and(VALUE, VALUE); +VALUE rb_big_or(VALUE, VALUE); +VALUE rb_big_xor(VALUE, VALUE); +VALUE rb_big_lshift(VALUE, VALUE); +VALUE rb_big_rshift(VALUE, VALUE); +/* rational.c */ +VALUE rb_rational_raw(VALUE, VALUE); +#define rb_rational_raw1(x) rb_rational_raw(x, INT2FIX(1)) +#define rb_rational_raw2(x,y) rb_rational_raw(x, y) +VALUE rb_rational_new(VALUE, VALUE); +#define rb_rational_new1(x) rb_rational_new(x, INT2FIX(1)) +#define rb_rational_new2(x,y) rb_rational_new(x, y) +VALUE rb_Rational(VALUE, VALUE); +#define rb_Rational1(x) rb_Rational(x, INT2FIX(1)) +#define rb_Rational2(x,y) rb_Rational(x, y) +/* complex.c */ +VALUE rb_complex_raw(VALUE, VALUE); +#define rb_complex_raw1(x) rb_complex_raw(x, INT2FIX(0)) +#define rb_complex_raw2(x,y) rb_complex_raw(x, y) +VALUE rb_complex_new(VALUE, VALUE); +#define rb_complex_new1(x) rb_complex_new(x, INT2FIX(0)) +#define rb_complex_new2(x,y) rb_complex_new(x, y) +VALUE rb_complex_polar(VALUE, VALUE); +VALUE rb_Complex(VALUE, VALUE); +#define rb_Complex1(x) rb_Complex(x, INT2FIX(0)) +#define rb_Complex2(x,y) rb_Complex(x, y) +/* class.c */ +VALUE rb_class_boot(VALUE); +VALUE rb_class_new(VALUE); +VALUE rb_mod_init_copy(VALUE, VALUE); +VALUE rb_class_init_copy(VALUE, VALUE); +VALUE rb_singleton_class_clone(VALUE); +void rb_singleton_class_attached(VALUE,VALUE); +VALUE rb_make_metaclass(VALUE, VALUE); +void rb_check_inheritable(VALUE); +VALUE rb_class_inherited(VALUE, VALUE); +VALUE rb_define_class_id(ID, VALUE); +VALUE rb_define_class_id_under(VALUE, ID, VALUE); +VALUE rb_module_new(void); +VALUE rb_define_module_id(ID); +VALUE rb_define_module_id_under(VALUE, ID); +VALUE rb_mod_included_modules(VALUE); +VALUE rb_mod_include_p(VALUE, VALUE); +VALUE rb_mod_ancestors(VALUE); +VALUE rb_class_instance_methods(int, VALUE*, VALUE); +VALUE rb_class_public_instance_methods(int, VALUE*, VALUE); +VALUE rb_class_protected_instance_methods(int, VALUE*, VALUE); +VALUE rb_class_private_instance_methods(int, VALUE*, VALUE); +VALUE rb_obj_singleton_methods(int, VALUE*, VALUE); +void rb_define_method_id(VALUE, ID, VALUE (*)(ANYARGS), int); +void rb_frozen_class_p(VALUE); +void rb_undef(VALUE, ID); +void rb_define_protected_method(VALUE, const char*, VALUE (*)(ANYARGS), int); +void rb_define_private_method(VALUE, const char*, VALUE (*)(ANYARGS), int); +void rb_define_singleton_method(VALUE, const char*, VALUE(*)(ANYARGS), int); +VALUE rb_singleton_class(VALUE); +/* compar.c */ +int rb_cmpint(VALUE, VALUE, VALUE); +NORETURN(void rb_cmperr(VALUE, VALUE)); +/* cont.c */ +VALUE rb_fiber_new(VALUE (*)(ANYARGS), VALUE); +VALUE rb_fiber_resume(VALUE fib, int argc, VALUE *args); +VALUE rb_fiber_yield(int argc, VALUE *args); +VALUE rb_fiber_current(void); +VALUE rb_fiber_alive_p(VALUE); +/* enum.c */ +/* enumerator.c */ +VALUE rb_enumeratorize(VALUE, VALUE, int, VALUE *); +#define RETURN_ENUMERATOR(obj, argc, argv) do { \ + if (!rb_block_given_p()) \ + return rb_enumeratorize(obj, ID2SYM(rb_frame_this_func()), \ + argc, argv); \ + } while (0) +/* error.c */ +VALUE rb_exc_new(VALUE, const char*, long); +VALUE rb_exc_new2(VALUE, const char*); +VALUE rb_exc_new3(VALUE, VALUE); +PRINTF_ARGS(NORETURN(void rb_loaderror(const char*, ...)), 1, 2); +PRINTF_ARGS(NORETURN(void rb_name_error(ID, const char*, ...)), 2, 3); +NORETURN(void rb_invalid_str(const char*, const char*)); +PRINTF_ARGS(void rb_compile_error(const char*, int, const char*, ...), 3, 4); +PRINTF_ARGS(void rb_compile_error_append(const char*, ...), 1, 2); +NORETURN(void rb_load_fail(const char*)); +NORETURN(void rb_error_frozen(const char*)); +void rb_check_frozen(VALUE); +/* eval.c */ +int rb_sourceline(void); +const char *rb_sourcefile(void); + +#if defined(NFDBITS) && defined(HAVE_RB_FD_INIT) +typedef struct { + int maxfd; + fd_set *fdset; +} rb_fdset_t; + +void rb_fd_init(volatile rb_fdset_t *); +void rb_fd_term(rb_fdset_t *); +void rb_fd_zero(rb_fdset_t *); +void rb_fd_set(int, rb_fdset_t *); +void rb_fd_clr(int, rb_fdset_t *); +int rb_fd_isset(int, const rb_fdset_t *); +void rb_fd_copy(rb_fdset_t *, const fd_set *, int); +int rb_fd_select(int, rb_fdset_t *, rb_fdset_t *, rb_fdset_t *, struct timeval *); + +#define rb_fd_ptr(f) ((f)->fdset) +#define rb_fd_max(f) ((f)->maxfd) + +#elif defined(_WIN32) + +typedef struct { + int capa; + fd_set *fdset; +} rb_fdset_t; + +void rb_fd_init(volatile rb_fdset_t *); +void rb_fd_term(rb_fdset_t *); +#define rb_fd_zero(f) ((f)->fdset->fd_count = 0) +void rb_fd_set(int, rb_fdset_t *); +#define rb_fd_clr(n, f) rb_w32_fdclr(n, (f)->fdset) +#define rb_fd_isset(n, f) rb_w32_fdisset(n, (f)->fdset) +#define rb_fd_select(n, rfds, wfds, efds, timeout) rb_w32_select(n, (rfds) ? ((rb_fdset_t*)rfds)->fdset : NULL, (wfds) ? ((rb_fdset_t*)wfds)->fdset : NULL, (efds) ? ((rb_fdset_t*)efds)->fdset: NULL, timeout) + +#define rb_fd_ptr(f) ((f)->fdset) +#define rb_fd_max(f) ((f)->fdset->fd_count) + +#else + +typedef fd_set rb_fdset_t; +#define rb_fd_zero(f) FD_ZERO(f) +#define rb_fd_set(n, f) FD_SET(n, f) +#define rb_fd_clr(n, f) FD_CLR(n, f) +#define rb_fd_isset(n, f) FD_ISSET(n, f) +#define rb_fd_copy(d, s, n) (*(d) = *(s)) +#define rb_fd_ptr(f) (f) +#define rb_fd_init(f) FD_ZERO(f) +#define rb_fd_term(f) (void)(f) +#define rb_fd_max(f) FD_SETSIZE +#define rb_fd_select(n, rfds, wfds, efds, timeout) select(n, rfds, wfds, efds, timeout) + +#endif + +NORETURN(void rb_exc_raise(VALUE)); +NORETURN(void rb_exc_fatal(VALUE)); +VALUE rb_f_exit(int,VALUE*); +VALUE rb_f_abort(int,VALUE*); +void rb_remove_method(VALUE, const char*); +void rb_remove_method_id(VALUE, ID); +#define rb_disable_super(klass, name) ((void)0) +#define rb_enable_super(klass, name) ((void)0) +#define HAVE_RB_DEFINE_ALLOC_FUNC 1 +typedef VALUE (*rb_alloc_func_t)(VALUE); +void rb_define_alloc_func(VALUE, rb_alloc_func_t); +void rb_undef_alloc_func(VALUE); +rb_alloc_func_t rb_get_alloc_func(VALUE); +void rb_clear_cache(void); +void rb_clear_cache_by_class(VALUE); +void rb_alias(VALUE, ID, ID); +void rb_attr(VALUE,ID,int,int,int); +int rb_method_boundp(VALUE, ID, int); +int rb_method_basic_definition_p(VALUE, ID); +VALUE rb_eval_cmd(VALUE, VALUE, int); +int rb_obj_respond_to(VALUE, ID, int); +int rb_respond_to(VALUE, ID); +void rb_interrupt(void); +VALUE rb_apply(VALUE, ID, VALUE); +void rb_backtrace(void); +ID rb_frame_this_func(void); +VALUE rb_obj_instance_eval(int, VALUE*, VALUE); +VALUE rb_obj_instance_exec(int, VALUE*, VALUE); +VALUE rb_mod_module_eval(int, VALUE*, VALUE); +VALUE rb_mod_module_exec(int, VALUE*, VALUE); +void rb_load(VALUE, int); +void rb_load_protect(VALUE, int, int*); +NORETURN(void rb_jump_tag(int)); +int rb_provided(const char*); +int rb_feature_provided(const char *, const char **); +void rb_provide(const char*); +VALUE rb_f_require(VALUE, VALUE); +VALUE rb_require_safe(VALUE, int); +void rb_obj_call_init(VALUE, int, VALUE*); +VALUE rb_class_new_instance(int, VALUE*, VALUE); +VALUE rb_block_proc(void); +VALUE rb_f_lambda(void); +VALUE rb_proc_new(VALUE (*)(ANYARGS/* VALUE yieldarg[, VALUE procarg] */), VALUE); +VALUE rb_proc_call(VALUE, VALUE); +VALUE rb_proc_call_with_block(VALUE, int argc, VALUE *argv, VALUE); +int rb_proc_arity(VALUE); +VALUE rb_binding_new(void); +VALUE rb_obj_method(VALUE, VALUE); +VALUE rb_method_call(int, VALUE*, VALUE); +int rb_mod_method_arity(VALUE, ID); +int rb_obj_method_arity(VALUE, ID); +VALUE rb_protect(VALUE (*)(VALUE), VALUE, int*); +void rb_set_end_proc(void (*)(VALUE), VALUE); +void rb_mark_end_proc(void); +void rb_exec_end_proc(void); +void Init_jump(void); +void ruby_finalize(void); +NORETURN(void ruby_stop(int)); +int ruby_cleanup(int); +void rb_gc_mark_threads(void); +void rb_thread_schedule(void); +void rb_thread_wait_fd(int); +int rb_thread_fd_writable(int); +void rb_thread_fd_close(int); +int rb_thread_alone(void); +void rb_thread_polling(void); +void rb_thread_sleep(int); +void rb_thread_sleep_forever(void); +VALUE rb_thread_stop(void); +VALUE rb_thread_wakeup(VALUE); +VALUE rb_thread_run(VALUE); +VALUE rb_thread_kill(VALUE); +VALUE rb_thread_create(VALUE (*)(ANYARGS), void*); +void rb_thread_signal_raise(void *, int); +void rb_thread_signal_exit(void *); +int rb_thread_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); +void rb_thread_wait_for(struct timeval); +VALUE rb_thread_current(void); +VALUE rb_thread_main(void); +VALUE rb_thread_local_aref(VALUE, ID); +VALUE rb_thread_local_aset(VALUE, ID, VALUE); +void rb_thread_atfork(void); +void rb_thread_atfork_before_exec(void); +VALUE rb_exec_recursive(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE); +VALUE rb_exec_recursive_paired(VALUE(*)(VALUE, VALUE, int),VALUE,VALUE,VALUE); +/* file.c */ +VALUE rb_file_s_expand_path(int, VALUE *); +VALUE rb_file_expand_path(VALUE, VALUE); +VALUE rb_file_s_absolute_path(int, VALUE *); +VALUE rb_file_absolute_path(VALUE, VALUE); +void rb_file_const(const char*, VALUE); +int rb_find_file_ext(VALUE*, const char* const*); +VALUE rb_find_file(VALUE); +char *rb_path_next(const char *); +char *rb_path_skip_prefix(const char *); +char *rb_path_last_separator(const char *); +char *rb_path_end(const char *); +VALUE rb_file_directory_p(VALUE,VALUE); +/* gc.c */ +void ruby_set_stack_size(size_t); +NORETURN(void rb_memerror(void)); +int ruby_stack_check(void); +size_t ruby_stack_length(VALUE**); +int rb_during_gc(void); +void rb_gc_mark_locations(VALUE*, VALUE*); +void rb_mark_tbl(struct st_table*); +void rb_mark_set(struct st_table*); +void rb_mark_hash(struct st_table*); +void rb_gc_mark_maybe(VALUE); +void rb_gc_mark(VALUE); +void rb_gc_force_recycle(VALUE); +void rb_gc(void); +void rb_gc_copy_finalizer(VALUE,VALUE); +void rb_gc_finalize_deferred(void); +void rb_gc_call_finalizer_at_exit(void); +VALUE rb_gc_enable(void); +VALUE rb_gc_disable(void); +VALUE rb_gc_start(void); +/* hash.c */ +void st_foreach_safe(struct st_table *, int (*)(ANYARGS), st_data_t); +void rb_hash_foreach(VALUE, int (*)(ANYARGS), VALUE); +VALUE rb_hash(VALUE); +VALUE rb_hash_new(void); +VALUE rb_hash_dup(VALUE); +VALUE rb_hash_freeze(VALUE); +VALUE rb_hash_aref(VALUE, VALUE); +VALUE rb_hash_lookup(VALUE, VALUE); +VALUE rb_hash_lookup2(VALUE, VALUE, VALUE); +VALUE rb_hash_fetch(VALUE, VALUE); +VALUE rb_hash_aset(VALUE, VALUE, VALUE); +VALUE rb_hash_delete_if(VALUE); +VALUE rb_hash_delete(VALUE,VALUE); +struct st_table *rb_hash_tbl(VALUE); +int rb_path_check(const char*); +int rb_env_path_tainted(void); +VALUE rb_env_clear(void); +/* io.c */ +#define rb_defout rb_stdout +RUBY_EXTERN VALUE rb_fs; +RUBY_EXTERN VALUE rb_output_fs; +RUBY_EXTERN VALUE rb_rs; +RUBY_EXTERN VALUE rb_default_rs; +RUBY_EXTERN VALUE rb_output_rs; +VALUE rb_io_write(VALUE, VALUE); +VALUE rb_io_gets(VALUE); +VALUE rb_io_getbyte(VALUE); +VALUE rb_io_ungetc(VALUE, VALUE); +VALUE rb_io_ungetbyte(VALUE, VALUE); +VALUE rb_io_close(VALUE); +VALUE rb_io_flush(VALUE); +VALUE rb_io_eof(VALUE); +VALUE rb_io_binmode(VALUE); +VALUE rb_io_ascii8bit_binmode(VALUE); +VALUE rb_io_addstr(VALUE, VALUE); +VALUE rb_io_printf(int, VALUE*, VALUE); +VALUE rb_io_print(int, VALUE*, VALUE); +VALUE rb_io_puts(int, VALUE*, VALUE); +VALUE rb_io_fdopen(int, int, const char*); +VALUE rb_file_open(const char*, const char*); +VALUE rb_file_open_str(VALUE, const char*); +VALUE rb_gets(void); +void rb_write_error(const char*); +void rb_write_error2(const char*, long); +void rb_close_before_exec(int lowfd, int maxhint, VALUE noclose_fds); +int rb_pipe(int *pipes); +/* marshal.c */ +VALUE rb_marshal_dump(VALUE, VALUE); +VALUE rb_marshal_load(VALUE); +void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE)); +/* numeric.c */ +void rb_num_zerodiv(void); +#define RB_NUM_COERCE_FUNCS_NEED_OPID 1 +VALUE rb_num_coerce_bin(VALUE, VALUE, ID); +VALUE rb_num_coerce_cmp(VALUE, VALUE, ID); +VALUE rb_num_coerce_relop(VALUE, VALUE, ID); +VALUE rb_float_new(double); +VALUE rb_num2fix(VALUE); +VALUE rb_fix2str(VALUE, int); +VALUE rb_dbl_cmp(double, double); +/* object.c */ +int rb_eql(VALUE, VALUE); +VALUE rb_any_to_s(VALUE); +VALUE rb_inspect(VALUE); +VALUE rb_obj_is_instance_of(VALUE, VALUE); +VALUE rb_obj_is_kind_of(VALUE, VALUE); +VALUE rb_obj_alloc(VALUE); +VALUE rb_obj_clone(VALUE); +VALUE rb_obj_dup(VALUE); +VALUE rb_obj_init_copy(VALUE,VALUE); +VALUE rb_obj_taint(VALUE); +VALUE rb_obj_tainted(VALUE); +VALUE rb_obj_untaint(VALUE); +VALUE rb_obj_untrust(VALUE); +VALUE rb_obj_untrusted(VALUE); +VALUE rb_obj_trust(VALUE); +VALUE rb_obj_freeze(VALUE); +VALUE rb_obj_frozen_p(VALUE); +VALUE rb_obj_id(VALUE); +VALUE rb_obj_class(VALUE); +VALUE rb_class_real(VALUE); +VALUE rb_class_inherited_p(VALUE, VALUE); +VALUE rb_convert_type(VALUE,int,const char*,const char*); +VALUE rb_check_convert_type(VALUE,int,const char*,const char*); +VALUE rb_check_to_integer(VALUE, const char *); +VALUE rb_to_int(VALUE); +VALUE rb_Integer(VALUE); +VALUE rb_to_float(VALUE); +VALUE rb_Float(VALUE); +VALUE rb_String(VALUE); +VALUE rb_Array(VALUE); +double rb_cstr_to_dbl(const char*, int); +double rb_str_to_dbl(VALUE, int); +/* parse.y */ +RUBY_EXTERN int ruby_sourceline; +RUBY_EXTERN char *ruby_sourcefile; +ID rb_id_attrset(ID); +void rb_gc_mark_parser(void); +int rb_is_const_id(ID); +int rb_is_instance_id(ID); +int rb_is_class_id(ID); +int rb_is_local_id(ID); +int rb_is_junk_id(ID); +int rb_symname_p(const char*); +int rb_sym_interned_p(VALUE); +void rb_gc_mark_symbols(void); +VALUE rb_backref_get(void); +void rb_backref_set(VALUE); +VALUE rb_lastline_get(void); +void rb_lastline_set(VALUE); +VALUE rb_sym_all_symbols(void); +/* process.c */ +void rb_last_status_set(int status, rb_pid_t pid); +VALUE rb_last_status_get(void); +struct rb_exec_arg { + int argc; + VALUE *argv; + const char *prog; + VALUE options; + VALUE redirect_fds; +}; +int rb_proc_exec_n(int, VALUE*, const char*); +int rb_proc_exec(const char*); +VALUE rb_exec_arg_init(int argc, VALUE *argv, int accept_shell, struct rb_exec_arg *e); +int rb_exec_arg_addopt(struct rb_exec_arg *e, VALUE key, VALUE val); +void rb_exec_arg_fixup(struct rb_exec_arg *e); +int rb_run_exec_options(const struct rb_exec_arg *e, struct rb_exec_arg *s); +int rb_exec(const struct rb_exec_arg*); +rb_pid_t rb_fork(int*, int (*)(void*), void*, VALUE); +VALUE rb_f_exec(int,VALUE*); +rb_pid_t rb_waitpid(rb_pid_t pid, int *status, int flags); +void rb_syswait(rb_pid_t pid); +rb_pid_t rb_spawn(int, VALUE*); +VALUE rb_proc_times(VALUE); +VALUE rb_detach_process(rb_pid_t pid); +/* range.c */ +VALUE rb_range_new(VALUE, VALUE, int); +VALUE rb_range_beg_len(VALUE, long*, long*, long, int); +/* random.c */ +unsigned long rb_genrand_int32(void); +double rb_genrand_real(void); +void rb_reset_random_seed(void); +/* re.c */ +#define rb_memcmp memcmp +int rb_memcicmp(const void*,const void*,long); +void rb_match_busy(VALUE); +VALUE rb_reg_nth_defined(int, VALUE); +VALUE rb_reg_nth_match(int, VALUE); +VALUE rb_reg_last_match(VALUE); +VALUE rb_reg_match_pre(VALUE); +VALUE rb_reg_match_post(VALUE); +VALUE rb_reg_match_last(VALUE); +#define HAVE_RB_REG_NEW_STR 1 +VALUE rb_reg_new_str(VALUE, int); +VALUE rb_reg_new(const char *, long, int); +VALUE rb_reg_match(VALUE, VALUE); +VALUE rb_reg_match2(VALUE); +int rb_reg_options(VALUE); +void rb_set_kcode(const char*); +const char* rb_get_kcode(void); +/* ruby.c */ +#define rb_argv rb_get_argv() +RUBY_EXTERN VALUE rb_argv0; +VALUE rb_get_argv(void); +void *rb_load_file(const char*); +void ruby_script(const char*); +void ruby_prog_init(void); +void ruby_set_argv(int, char**); +void *ruby_process_options(int, char**); +void ruby_init_loadpath(void); +void ruby_incpush(const char*); +/* signal.c */ +VALUE rb_f_kill(int, VALUE*); +void rb_gc_mark_trap_list(void); +#ifdef POSIX_SIGNAL +#define posix_signal ruby_posix_signal +RETSIGTYPE (*posix_signal(int, RETSIGTYPE (*)(int)))(int); +#endif +void ruby_sig_finalize(void); +void rb_trap_exit(void); +void rb_trap_exec(void); +const char *ruby_signal_name(int); +void ruby_default_signal(int); +/* sprintf.c */ +VALUE rb_f_sprintf(int, const VALUE*); +PRINTF_ARGS(VALUE rb_sprintf(const char*, ...), 1, 2); +VALUE rb_vsprintf(const char*, va_list); +PRINTF_ARGS(VALUE rb_str_catf(VALUE, const char*, ...), 2, 3); +VALUE rb_str_vcatf(VALUE, const char*, va_list); +VALUE rb_str_format(int, const VALUE *, VALUE); +/* string.c */ +VALUE rb_str_new(const char*, long); +VALUE rb_str_new_cstr(const char*); +VALUE rb_str_new2(const char*); +VALUE rb_str_new_shared(VALUE); +VALUE rb_str_new3(VALUE); +VALUE rb_str_new_frozen(VALUE); +VALUE rb_str_new4(VALUE); +VALUE rb_str_new_with_class(VALUE, const char*, long); +VALUE rb_str_new5(VALUE, const char*, long); +VALUE rb_tainted_str_new_cstr(const char*); +VALUE rb_tainted_str_new(const char*, long); +VALUE rb_tainted_str_new2(const char*); +VALUE rb_external_str_new(const char*, long); +VALUE rb_external_str_new_cstr(const char*); +VALUE rb_locale_str_new(const char*, long); +VALUE rb_locale_str_new_cstr(const char*); +VALUE rb_str_buf_new(long); +VALUE rb_str_buf_new_cstr(const char*); +VALUE rb_str_buf_new2(const char*); +VALUE rb_str_tmp_new(long); +VALUE rb_usascii_str_new(const char*, long); +VALUE rb_usascii_str_new_cstr(const char*); +VALUE rb_usascii_str_new2(const char*); +void rb_str_free(VALUE); +void rb_str_shared_replace(VALUE, VALUE); +VALUE rb_str_buf_append(VALUE, VALUE); +VALUE rb_str_buf_cat(VALUE, const char*, long); +VALUE rb_str_buf_cat2(VALUE, const char*); +VALUE rb_str_buf_cat_ascii(VALUE, const char*); +VALUE rb_obj_as_string(VALUE); +VALUE rb_check_string_type(VALUE); +VALUE rb_str_dup(VALUE); +VALUE rb_str_locktmp(VALUE); +VALUE rb_str_unlocktmp(VALUE); +VALUE rb_str_dup_frozen(VALUE); +#define rb_str_dup_frozen rb_str_new_frozen +VALUE rb_str_plus(VALUE, VALUE); +VALUE rb_str_times(VALUE, VALUE); +long rb_str_sublen(VALUE, long); +VALUE rb_str_substr(VALUE, long, long); +VALUE rb_str_subseq(VALUE, long, long); +void rb_str_modify(VALUE); +VALUE rb_str_freeze(VALUE); +void rb_str_set_len(VALUE, long); +VALUE rb_str_resize(VALUE, long); +VALUE rb_str_cat(VALUE, const char*, long); +VALUE rb_str_cat2(VALUE, const char*); +VALUE rb_str_append(VALUE, VALUE); +VALUE rb_str_concat(VALUE, VALUE); +int rb_memhash(const void *ptr, long len); +int rb_str_hash(VALUE); +int rb_str_hash_cmp(VALUE,VALUE); +int rb_str_comparable(VALUE, VALUE); +int rb_str_cmp(VALUE, VALUE); +VALUE rb_str_equal(VALUE str1, VALUE str2); +VALUE rb_str_drop_bytes(VALUE, long); +void rb_str_update(VALUE, long, long, VALUE); +VALUE rb_str_replace(VALUE, VALUE); +VALUE rb_str_inspect(VALUE); +VALUE rb_str_dump(VALUE); +VALUE rb_str_split(VALUE, const char*); +void rb_str_associate(VALUE, VALUE); +VALUE rb_str_associated(VALUE); +void rb_str_setter(VALUE, ID, VALUE*); +VALUE rb_str_intern(VALUE); +VALUE rb_sym_to_s(VALUE); +VALUE rb_str_length(VALUE); +long rb_str_offset(VALUE, long); +size_t rb_str_capacity(VALUE); +#if defined __GNUC__ +#define rb_str_new_cstr(str) __extension__ ( \ +{ \ + (__builtin_constant_p(str)) ? \ + rb_str_new(str, strlen(str)) : \ + rb_str_new_cstr(str); \ +}) +#define rb_tainted_str_new_cstr(str) __extension__ ( \ +{ \ + (__builtin_constant_p(str)) ? \ + rb_tainted_str_new(str, strlen(str)) : \ + rb_tainted_str_new_cstr(str); \ +}) +#define rb_usascii_str_new_cstr(str) __extension__ ( \ +{ \ + (__builtin_constant_p(str)) ? \ + rb_usascii_str_new(str, strlen(str)) : \ + rb_usascii_str_new_cstr(str); \ +}) +#define rb_external_str_new_cstr(str) __extension__ ( \ +{ \ + (__builtin_constant_p(str)) ? \ + rb_external_str_new(str, strlen(str)) : \ + rb_external_str_new_cstr(str); \ +}) +#define rb_locale_str_new_cstr(str) __extension__ ( \ +{ \ + (__builtin_constant_p(str)) ? \ + rb_locale_str_new(str, strlen(str)) : \ + rb_locale_str_new_cstr(str); \ +}) +#define rb_str_buf_new_cstr(str) __extension__ ( \ +{ \ + (__builtin_constant_p(str)) ? \ + rb_str_buf_cat(rb_str_buf_new(strlen(str)), \ + str, strlen(str)) : \ + rb_str_buf_new_cstr(str); \ +}) +#define rb_str_buf_cat2(str, ptr) __extension__ ( \ +{ \ + (__builtin_constant_p(ptr)) ? \ + rb_str_buf_cat(str, ptr, strlen(ptr)) : \ + rb_str_buf_cat2(str, ptr); \ +}) +#define rb_str_cat2(str, ptr) __extension__ ( \ +{ \ + (__builtin_constant_p(ptr)) ? \ + rb_str_cat(str, ptr, strlen(ptr)) : \ + rb_str_cat2(str, ptr); \ +}) +#endif +#define rb_str_new2 rb_str_new_cstr +#define rb_str_new3 rb_str_new_shared +#define rb_str_new4 rb_str_new_frozen +#define rb_str_new5 rb_str_new_with_class +#define rb_tainted_str_new2 rb_tainted_str_new_cstr +#define rb_str_buf_new2 rb_str_buf_new_cstr +#define rb_usascii_str_new2 rb_usascii_str_new_cstr +/* struct.c */ +VALUE rb_struct_new(VALUE, ...); +VALUE rb_struct_define(const char*, ...); +VALUE rb_struct_alloc(VALUE, VALUE); +VALUE rb_struct_initialize(VALUE, VALUE); +VALUE rb_struct_aref(VALUE, VALUE); +VALUE rb_struct_aset(VALUE, VALUE, VALUE); +VALUE rb_struct_getmember(VALUE, ID); +VALUE rb_struct_iv_get(VALUE, const char*); +VALUE rb_struct_s_members(VALUE); +VALUE rb_struct_members(VALUE); +VALUE rb_struct_alloc_noinit(VALUE); +VALUE rb_struct_define_without_accessor(const char *, VALUE, rb_alloc_func_t, ...); +/* thread.c */ +typedef void rb_unblock_function_t(void *); +typedef VALUE rb_blocking_function_t(void *); +void rb_thread_check_ints(void); +int rb_thread_interrupted(VALUE thval); +VALUE rb_thread_blocking_region(rb_blocking_function_t *func, void *data1, + rb_unblock_function_t *ubf, void *data2); +#define RUBY_UBF_IO ((rb_unblock_function_t *)-1) +#define RUBY_UBF_PROCESS ((rb_unblock_function_t *)-1) +VALUE rb_mutex_new(void); +VALUE rb_mutex_locked_p(VALUE mutex); +VALUE rb_mutex_try_lock(VALUE mutex); +VALUE rb_mutex_lock(VALUE mutex); +VALUE rb_mutex_unlock(VALUE mutex); +VALUE rb_mutex_sleep(VALUE self, VALUE timeout); +VALUE rb_mutex_synchronize(VALUE mutex, VALUE (*func)(VALUE arg), VALUE arg); +VALUE rb_barrier_new(void); +VALUE rb_barrier_wait(VALUE self); +VALUE rb_barrier_release(VALUE self); +VALUE rb_barrier_destroy(VALUE self); +/* time.c */ +VALUE rb_time_new(time_t, long); +VALUE rb_time_nano_new(time_t, long); +/* variable.c */ +VALUE rb_mod_name(VALUE); +VALUE rb_class_path(VALUE); +void rb_set_class_path(VALUE, VALUE, const char*); +void rb_set_class_path_string(VALUE, VALUE, VALUE); +VALUE rb_path_to_class(VALUE); +VALUE rb_path2class(const char*); +void rb_name_class(VALUE, ID); +VALUE rb_class_name(VALUE); +void rb_autoload(VALUE, ID, const char*); +VALUE rb_autoload_load(VALUE, ID); +VALUE rb_autoload_p(VALUE, ID); +void rb_gc_mark_global_tbl(void); +VALUE rb_f_trace_var(int, VALUE*); +VALUE rb_f_untrace_var(int, VALUE*); +VALUE rb_f_global_variables(void); +void rb_alias_variable(ID, ID); +struct st_table* rb_generic_ivar_table(VALUE); +void rb_copy_generic_ivar(VALUE,VALUE); +void rb_mark_generic_ivar(VALUE); +void rb_mark_generic_ivar_tbl(void); +void rb_free_generic_ivar(VALUE); +VALUE rb_ivar_get(VALUE, ID); +VALUE rb_ivar_set(VALUE, ID, VALUE); +VALUE rb_ivar_defined(VALUE, ID); +void rb_ivar_foreach(VALUE, int (*)(ANYARGS), st_data_t); +VALUE rb_iv_set(VALUE, const char*, VALUE); +VALUE rb_iv_get(VALUE, const char*); +VALUE rb_attr_get(VALUE, ID); +VALUE rb_obj_instance_variables(VALUE); +VALUE rb_obj_remove_instance_variable(VALUE, VALUE); +void *rb_mod_const_at(VALUE, void*); +void *rb_mod_const_of(VALUE, void*); +VALUE rb_const_list(void*); +VALUE rb_mod_constants(int, VALUE *, VALUE); +VALUE rb_mod_remove_const(VALUE, VALUE); +int rb_const_defined(VALUE, ID); +int rb_const_defined_at(VALUE, ID); +int rb_const_defined_from(VALUE, ID); +VALUE rb_const_get(VALUE, ID); +VALUE rb_const_get_at(VALUE, ID); +VALUE rb_const_get_from(VALUE, ID); +void rb_const_set(VALUE, ID, VALUE); +VALUE rb_mod_const_missing(VALUE,VALUE); +VALUE rb_cvar_defined(VALUE, ID); +void rb_cvar_set(VALUE, ID, VALUE); +VALUE rb_cvar_get(VALUE, ID); +void rb_cv_set(VALUE, const char*, VALUE); +VALUE rb_cv_get(VALUE, const char*); +void rb_define_class_variable(VALUE, const char*, VALUE); +VALUE rb_mod_class_variables(VALUE); +VALUE rb_mod_remove_cvar(VALUE, VALUE); +/* version.c */ +void ruby_show_version(void); +void ruby_show_copyright(void); + +ID rb_frame_callee(void); +VALUE rb_str_succ(VALUE); +VALUE rb_time_succ(VALUE); +#define Init_stack(addr) ruby_init_stack(addr) +void rb_frame_pop(void); +int rb_frame_method_id_and_class(ID *idp, VALUE *klassp); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_INTERN_H */ diff --git a/include/ruby/io.h b/include/ruby/io.h new file mode 100644 index 0000000..193f812 --- /dev/null +++ b/include/ruby/io.h @@ -0,0 +1,175 @@ +/********************************************************************** + + rubyio.h - + + $Author: yugui $ + created at: Fri Nov 12 16:47:09 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_IO_H +#define RUBY_IO_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#include +#include +#include "ruby/encoding.h" + +#if defined(HAVE_STDIO_EXT_H) +#include +#endif + +typedef struct rb_io_t { + int fd; /* file descriptor */ + FILE *stdio_file; /* stdio ptr for read/write if available */ + int mode; /* mode flags: FMODE_XXXs */ + rb_pid_t pid; /* child's pid (for pipes) */ + int lineno; /* number of lines read */ + VALUE pathv; /* pathname for file */ + void (*finalize)(struct rb_io_t*,int); /* finalize proc */ + + char *wbuf; /* wbuf_off + wbuf_len <= wbuf_capa */ + int wbuf_off; + int wbuf_len; + int wbuf_capa; + + char *rbuf; /* rbuf_off + rbuf_len <= rbuf_capa */ + int rbuf_off; + int rbuf_len; + int rbuf_capa; + + VALUE tied_io_for_writing; + + /* + * enc enc2 read action write action + * NULL NULL force_encoding(default_external) write the byte sequence of str + * e1 NULL force_encoding(e1) convert str.encoding to e1 + * e1 e2 convert from e2 to e1 convert str.encoding to e2 + */ + struct rb_io_enc_t { + rb_encoding *enc; + rb_encoding *enc2; + int ecflags; + VALUE ecopts; + } encs; + + rb_econv_t *readconv; + char *cbuf; /* cbuf_off + cbuf_len <= cbuf_capa */ + int cbuf_off; + int cbuf_len; + int cbuf_capa; + + rb_econv_t *writeconv; + VALUE writeconv_asciicompat; + int writeconv_pre_ecflags; + VALUE writeconv_pre_ecopts; + int writeconv_initialized; + + VALUE write_lock; +} rb_io_t; + +#define HAVE_RB_IO_T 1 + +#define FMODE_READABLE 0x00000001 +#define FMODE_WRITABLE 0x00000002 +#define FMODE_READWRITE (FMODE_READABLE|FMODE_WRITABLE) +#define FMODE_BINMODE 0x00000004 +#define FMODE_SYNC 0x00000008 +#define FMODE_TTY 0x00000010 +#define FMODE_DUPLEX 0x00000020 +#define FMODE_APPEND 0x00000040 +#define FMODE_CREATE 0x00000080 +/* #define FMODE_NOREVLOOKUP 0x00000100 */ +#define FMODE_WSPLIT 0x00000200 +#define FMODE_WSPLIT_INITIALIZED 0x00000400 +#define FMODE_TRUNC 0x00000800 +#define FMODE_TEXTMODE 0x00001000 +#define FMODE_EOF 0x00002000 +/* #define FMODE_PREP 0x00010000 */ + +#define GetOpenFile(obj,fp) rb_io_check_closed((fp) = RFILE(rb_io_taint_check(obj))->fptr) + +#define MakeOpenFile(obj, fp) do {\ + if (RFILE(obj)->fptr) {\ + rb_io_close(obj);\ + free(RFILE(obj)->fptr);\ + RFILE(obj)->fptr = 0;\ + }\ + fp = 0;\ + fp = RFILE(obj)->fptr = ALLOC(rb_io_t);\ + fp->fd = -1;\ + fp->stdio_file = NULL;\ + fp->mode = 0;\ + fp->pid = 0;\ + fp->lineno = 0;\ + fp->pathv = Qnil;\ + fp->finalize = 0;\ + fp->wbuf = NULL;\ + fp->wbuf_off = 0;\ + fp->wbuf_len = 0;\ + fp->wbuf_capa = 0;\ + fp->rbuf = NULL;\ + fp->rbuf_off = 0;\ + fp->rbuf_len = 0;\ + fp->rbuf_capa = 0;\ + fp->readconv = NULL;\ + fp->cbuf = NULL;\ + fp->cbuf_off = 0;\ + fp->cbuf_len = 0;\ + fp->cbuf_capa = 0;\ + fp->writeconv = NULL;\ + fp->writeconv_asciicompat = Qnil;\ + fp->writeconv_pre_ecflags = 0;\ + fp->writeconv_pre_ecopts = Qnil;\ + fp->writeconv_initialized = 0;\ + fp->tied_io_for_writing = 0;\ + fp->encs.enc = NULL;\ + fp->encs.enc2 = NULL;\ + fp->encs.ecflags = 0;\ + fp->encs.ecopts = Qnil;\ + fp->write_lock = 0;\ +} while (0) + +FILE *rb_io_stdio_file(rb_io_t *fptr); + +FILE *rb_fdopen(int, const char*); +int rb_io_modestr_fmode(const char *modestr); +int rb_io_modestr_oflags(const char *modestr); +int rb_io_oflags_fmode(int oflags); +void rb_io_check_writable(rb_io_t*); +void rb_io_check_readable(rb_io_t*); +int rb_io_fptr_finalize(rb_io_t*); +void rb_io_synchronized(rb_io_t*); +void rb_io_check_initialized(rb_io_t*); +void rb_io_check_closed(rb_io_t*); +int rb_io_wait_readable(int); +int rb_io_wait_writable(int); +void rb_io_set_nonblock(rb_io_t *fptr); + +/* compatibility for ruby 1.8 and older */ +#define rb_io_mode_flags(modestr) rb_io_modestr_fmode(modestr) +#define rb_io_modenum_flags(oflags) rb_io_oflags_fmode(oflags) + +VALUE rb_io_taint_check(VALUE); +NORETURN(void rb_eof_error(void)); + +void rb_io_read_check(rb_io_t*); +int rb_io_read_pending(rb_io_t*); +void rb_read_check(FILE*); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_IO_H */ diff --git a/include/ruby/missing.h b/include/ruby/missing.h new file mode 100644 index 0000000..77f57d7 --- /dev/null +++ b/include/ruby/missing.h @@ -0,0 +1,172 @@ +/************************************************ + + missing.h - prototype for *.c in ./missing, and + for missing timeval struct + + $Author: yugui $ + created at: Sat May 11 23:46:03 JST 2002 + +************************************************/ + +#ifndef RUBY_MISSING_H +#define RUBY_MISSING_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#if defined(HAVE_SYS_TIME_H) +# include +#elif !defined(_WIN32) +# define time_t long +struct timeval { + time_t tv_sec; /* seconds */ + long tv_usec; /* microseconds */ +}; +#endif +#if defined(HAVE_SYS_TYPES_H) +# include +#endif + +#if !defined(HAVE_STRUCT_TIMESPEC) +struct timespec { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; +#endif + +#ifndef RUBY_EXTERN +#define RUBY_EXTERN extern +#endif + +#ifndef HAVE_ACOSH +RUBY_EXTERN double acosh(double); +RUBY_EXTERN double asinh(double); +RUBY_EXTERN double atanh(double); +#endif + +#ifndef HAVE_CRYPT +RUBY_EXTERN char *crypt(const char *, const char *); +#endif + +#ifndef HAVE_DUP2 +RUBY_EXTERN int dup2(int, int); +#endif + +#ifndef HAVE_EACCESS +RUBY_EXTERN int eaccess(const char*, int); +#endif + +#ifndef HAVE_FINITE +RUBY_EXTERN int finite(double); +#endif + +#ifndef HAVE_FLOCK +RUBY_EXTERN int flock(int, int); +#endif + +/* +#ifndef HAVE_FREXP +RUBY_EXTERN double frexp(double, int *); +#endif +*/ + +#ifndef HAVE_HYPOT +RUBY_EXTERN double hypot(double, double); +#endif + +#ifndef HAVE_ERF +RUBY_EXTERN double erf(double); +RUBY_EXTERN double erfc(double); +#endif + +#ifndef HAVE_TGAMMA +RUBY_EXTERN double tgamma(double); +#endif + +#ifndef HAVE_LGAMMA_R +RUBY_EXTERN double lgamma_r(double, int *); +#endif + +#ifndef HAVE_CBRT +RUBY_EXTERN double cbrt(double); +#endif + +#ifndef isinf +# ifndef HAVE_ISINF +# if defined(HAVE_FINITE) && defined(HAVE_ISNAN) +# define isinf(x) (!finite(x) && !isnan(x)) +# else +RUBY_EXTERN int isinf(double); +# endif +# endif +#endif + +#ifndef HAVE_ISNAN +RUBY_EXTERN int isnan(double); +#endif + +/* +#ifndef HAVE_MEMCMP +RUBY_EXTERN int memcmp(const void *, const void *, size_t); +#endif +*/ + +#ifndef HAVE_MEMMOVE +RUBY_EXTERN void *memmove(void *, const void *, size_t); +#endif + +/* +#ifndef HAVE_MODF +RUBY_EXTERN double modf(double, double *); +#endif +*/ + +#ifndef HAVE_STRCHR +RUBY_EXTERN char *strchr(const char *, int); +RUBY_EXTERN char *strrchr(const char *, int); +#endif + +#ifndef HAVE_STRERROR +RUBY_EXTERN char *strerror(int); +#endif + +#ifndef HAVE_STRSTR +RUBY_EXTERN char *strstr(const char *, const char *); +#endif + +/* +#ifndef HAVE_STRTOL +RUBY_EXTERN long strtol(const char *, char **, int); +#endif +*/ + +#if defined HAVE_VSNPRINTF || defined HAVE_SNPRINTF +# include +#endif +#ifndef HAVE_SNPRINTF +RUBY_EXTERN int snprintf(char *, size_t n, char const *, ...); +#endif +#ifndef HAVE_VSNPRINTF +extern int vsnprintf(char *, size_t n, char const *, va_list); +#endif + +#ifndef HAVE_STRLCPY +RUBY_EXTERN size_t strlcpy(char *, const char*, size_t); +#endif + +#ifndef HAVE_STRLCAT +RUBY_EXTERN size_t strlcat(char *, const char*, size_t); +#endif + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_MISSING_H */ diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h new file mode 100644 index 0000000..6f8ce23 --- /dev/null +++ b/include/ruby/oniguruma.h @@ -0,0 +1,792 @@ +#ifndef ONIGURUMA_H +#define ONIGURUMA_H +/********************************************************************** + oniguruma.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __cplusplus +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#define ONIGURUMA +#define ONIGURUMA_VERSION_MAJOR 5 +#define ONIGURUMA_VERSION_MINOR 9 +#define ONIGURUMA_VERSION_TEENY 1 + +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifdef HAVE_STDARG_H +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifndef P_ +#if defined(__STDC__) || defined(_WIN32) +# define P_(args) args +#else +# define P_(args) () +#endif +#endif + +#ifndef PV_ +#ifdef HAVE_STDARG_PROTOTYPES +# define PV_(args) args +#else +# define PV_(args) () +#endif +#endif + +#ifndef ONIG_EXTERN +#ifdef RUBY_EXTERN +#define ONIG_EXTERN RUBY_EXTERN +#else +#if defined(_WIN32) && !defined(__GNUC__) +#if defined(EXPORT) || defined(RUBY_EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +/* PART: character encoding */ + +#ifndef ONIG_ESCAPE_UCHAR_COLLISION +#define UChar OnigUChar +#endif + +typedef unsigned char OnigUChar; +typedef unsigned int OnigCodePoint; +typedef unsigned int OnigCtype; +typedef unsigned int OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +typedef unsigned int OnigCaseFoldType; /* case fold flag */ + +ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; + +/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ +/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ +#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) +#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) + +#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR +#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag + + +#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 +#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 +/* 13 => Unicode:0x1ffc */ + +/* code range */ +#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) +#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] +#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] + +typedef struct { + int byte_len; /* argument(original) character(s) byte length */ + int code_len; /* number of code */ + OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; +} OnigCaseFoldCodeItem; + +typedef struct { + OnigCodePoint esc; + OnigCodePoint anychar; + OnigCodePoint anytime; + OnigCodePoint zero_or_one_time; + OnigCodePoint one_or_more_time; + OnigCodePoint anychar_anytime; +} OnigMetaCharTableType; + +typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); + +typedef struct OnigEncodingTypeST { + int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); + const char* name; + int max_enc_len; + int min_enc_len; + int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + int (*code_to_mbclen)(OnigCodePoint code, struct OnigEncodingTypeST* enc); + int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, struct OnigEncodingTypeST* enc); + int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc); + int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc); + int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc); + int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); + int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc); + int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc); + OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); + int ruby_encoding_index; +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) + +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 +/* 18: 6(max-byte) * 3(case-fold chars) */ + +/* character types */ +#define ONIGENC_CTYPE_NEWLINE 0 +#define ONIGENC_CTYPE_ALPHA 1 +#define ONIGENC_CTYPE_BLANK 2 +#define ONIGENC_CTYPE_CNTRL 3 +#define ONIGENC_CTYPE_DIGIT 4 +#define ONIGENC_CTYPE_GRAPH 5 +#define ONIGENC_CTYPE_LOWER 6 +#define ONIGENC_CTYPE_PRINT 7 +#define ONIGENC_CTYPE_PUNCT 8 +#define ONIGENC_CTYPE_SPACE 9 +#define ONIGENC_CTYPE_UPPER 10 +#define ONIGENC_CTYPE_XDIGIT 11 +#define ONIGENC_CTYPE_WORD 12 +#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ +#define ONIGENC_CTYPE_ASCII 14 +#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII +#define ONIGENC_CTYPE_SPECIAL_MASK 128 +#define ONIGENC_CTYPE_S /* [\t\n\v\f\r\s] */ \ + ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE +#define ONIGENC_CTYPE_D /* [0-9] */ \ + ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT +#define ONIGENC_CTYPE_W /* [0-9A-Za-z_] */ \ + ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD +#define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK) + + +#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ + (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end,enc) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \ + (enc)->left_adjust_char_head(start, s, end, enc) +#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ + (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc) +#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ + (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc) +#define ONIGENC_STEP_BACK(enc,start,s,end,n) \ + onigenc_step_back((enc),(start),(s),(end),(n)) + +#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n) +#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r)) +#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r) + +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) +#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1) + +#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n)) +#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1) +#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r)) + +#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) + +ONIG_EXTERN +int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)); + +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc) +#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc) +#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ + (enc)->property_name_to_ctype(enc,p,end) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc) + +#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ + (enc)->get_ctype_code_range(ctype,sbout,ranges,enc) + +ONIG_EXTERN +OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n)); + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init P_((void)); +ONIG_EXTERN +int onigenc_set_default_encoding P_((OnigEncoding enc)); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding P_((void)); +ONIG_EXTERN +void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev)); +ONIG_EXTERN +OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end)); +ONIG_EXTERN +OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end)); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end)); +ONIG_EXTERN +int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end)); +ONIG_EXTERN +int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); +ONIG_EXTERN +int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +typedef unsigned int OnigOptionType; + +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE + +/* options */ +#define ONIG_OPTION_NONE 0U +#define ONIG_OPTION_IGNORECASE 1U +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) +/* options (search time) */ +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ + +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ + OnigMetaCharTableType meta_char_table; +} OnigSyntaxType; + +ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl_NG; +ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) + +/* default syntax */ +ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +/* syntax (operators) */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g, \g */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ +/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ +#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ + +/* syntax (behavior) */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?)(?) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 + +/* error codes */ +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) +/* normal return */ +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 + +/* internal error */ +#define ONIGERR_MEMORY -5 +#define ONIGERR_TYPE_BUG -6 +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 +/* syntax error */ +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_ESCAPE -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 +/* values error (syntax error) */ +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_CODE_POINT_VALUE -400 +#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 +#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 + +/* errors related to thread */ +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) + +typedef struct OnigCaptureTreeNodeStruct { + int group; /* group number */ + int beg; + int end; + int allocated; + int num_childs; + struct OnigCaptureTreeNodeStruct** childs; +} OnigCaptureTreeNode; + +/* match result region type */ +struct re_registers { + int allocated; + int num_regs; + int* beg; + int* end; + /* extended */ + OnigCaptureTreeNode* history_root; /* capture history tree root */ +}; + +/* capture tree traverse */ +#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 +#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 +#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ + ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) + + +#define ONIG_REGION_NOTPOS -1 + +typedef struct re_registers OnigRegion; + +typedef struct { + OnigEncoding enc; + OnigUChar* par; + OnigUChar* par_end; +} OnigErrorInfo; + +typedef struct { + int lower; + int upper; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc) P_((const char* s)); +extern void onig_null_warn P_((const char* s)); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 + +/* regex_t state */ +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 + +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) + +typedef struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_comb_exp_check; /* combination explosion check */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + const OnigSyntaxType* syntax; + OnigCaseFoldType case_fold_flag; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +} OnigRegexType; + +typedef OnigRegexType* OnigRegex; + +#ifndef ONIG_ESCAPE_REGEX_T_COLLISION + typedef OnigRegexType regex_t; +#endif + + +typedef struct { + int num_of_elements; + OnigEncoding pattern_enc; + OnigEncoding target_enc; + OnigSyntaxType* syntax; + OnigOptionType option; + OnigCaseFoldType case_fold_flag; +} OnigCompileInfo; + +/* Oniguruma Native API */ +ONIG_EXTERN +int onig_init P_((void)); +ONIG_EXTERN +int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); +ONIG_EXTERN +void onig_set_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +void onig_set_verb_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN +void onig_free P_((OnigRegex)); +ONIG_EXTERN +int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +OnigRegion* onig_region_new P_((void)); +ONIG_EXTERN +void onig_region_init P_((OnigRegion* region)); +ONIG_EXTERN +void onig_region_free P_((OnigRegion* region, int free_self)); +ONIG_EXTERN +void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); +ONIG_EXTERN +void onig_region_clear P_((OnigRegion* region)); +ONIG_EXTERN +int onig_region_resize P_((OnigRegion* region, int n)); +ONIG_EXTERN +int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); +ONIG_EXTERN +int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); +ONIG_EXTERN +int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); +ONIG_EXTERN +int onig_number_of_names P_((OnigRegex reg)); +ONIG_EXTERN +int onig_number_of_captures P_((OnigRegex reg)); +ONIG_EXTERN +int onig_number_of_capture_histories P_((OnigRegex reg)); +ONIG_EXTERN +OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); +ONIG_EXTERN +int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); +ONIG_EXTERN +int onig_noname_group_capture_is_active P_((OnigRegex reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((OnigRegex reg)); +ONIG_EXTERN +OnigOptionType onig_get_options P_((OnigRegex reg)); +ONIG_EXTERN +OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg)); +ONIG_EXTERN +const OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); +ONIG_EXTERN +int onig_set_default_syntax P_((const OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_copy_syntax P_((OnigSyntaxType* to, const OnigSyntaxType* from)); +ONIG_EXTERN +unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); +ONIG_EXTERN +void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); +ONIG_EXTERN +void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); +ONIG_EXTERN +void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); +ONIG_EXTERN +int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code)); +ONIG_EXTERN +void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); +ONIG_EXTERN +OnigCaseFoldType onig_get_default_case_fold_flag P_((void)); +ONIG_EXTERN +int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag)); +ONIG_EXTERN +unsigned int onig_get_match_stack_limit_size P_((void)); +ONIG_EXTERN +int onig_set_match_stack_limit_size P_((unsigned int size)); +ONIG_EXTERN +int onig_end P_((void)); +ONIG_EXTERN +const char* onig_version P_((void)); +ONIG_EXTERN +const char* onig_copyright P_((void)); + +#ifdef __cplusplus +#if 0 +{ /* satisfy cc-mode */ +#endif +} +#endif + +#endif /* ONIGURUMA_H */ diff --git a/include/ruby/re.h b/include/ruby/re.h new file mode 100644 index 0000000..0972cdc --- /dev/null +++ b/include/ruby/re.h @@ -0,0 +1,66 @@ +/********************************************************************** + + re.h - + + $Author: akr $ + created at: Thu Sep 30 14:18:32 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_RE_H +#define RUBY_RE_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#include +#include + +#include "ruby/regex.h" + +typedef struct re_pattern_buffer Regexp; + +struct rmatch_offset { + int beg; + int end; +}; + +struct rmatch { + struct re_registers regs; + + int char_offset_updated; + int char_offset_num_allocated; + struct rmatch_offset *char_offset; +}; + +struct RMatch { + struct RBasic basic; + VALUE str; + struct rmatch *rmatch; + VALUE regexp; /* RRegexp */ +}; + +#define RMATCH(obj) (R_CAST(RMatch)(obj)) +#define RMATCH_REGS(obj) (&(R_CAST(RMatch)(obj))->rmatch->regs) + +VALUE rb_reg_regcomp(VALUE); +int rb_reg_search(VALUE, VALUE, int, int); +VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE); +int rb_reg_adjust_startpos(VALUE, VALUE, int, int); +void rb_match_busy(VALUE); +VALUE rb_reg_quote(VALUE); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_RE_H */ diff --git a/include/ruby/regex.h b/include/ruby/regex.h new file mode 100644 index 0000000..d31f85a --- /dev/null +++ b/include/ruby/regex.h @@ -0,0 +1,42 @@ +/********************************************************************** + + regex.h - + + $Author: akr $ + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef ONIGURUMA_REGEX_H +#define ONIGURUMA_REGEX_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#ifdef RUBY +#include "ruby/oniguruma.h" +#else +#include "oniguruma.h" +#endif + +#ifndef ONIG_RUBY_M17N + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; + +#define mbclen(p,e,enc) rb_enc_mbclen((p),(e),(enc)) + +#endif /* ifndef ONIG_RUBY_M17N */ + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* ONIGURUMA_REGEX_H */ diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h new file mode 100644 index 0000000..215f147 --- /dev/null +++ b/include/ruby/ruby.h @@ -0,0 +1,1221 @@ +/********************************************************************** + + ruby/ruby.h - + + $Author: yugui $ + created at: Thu Jun 10 14:26:32 JST 1993 + + Copyright (C) 1993-2008 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#ifndef RUBY_RUBY_H +#define RUBY_RUBY_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#ifndef RUBY_LIB +#include "ruby/config.h" +#ifdef RUBY_EXTCONF_H +#include RUBY_EXTCONF_H +#endif +#endif + +#define NORETURN_STYLE_NEW 1 +#ifndef NORETURN +# define NORETURN(x) x +#endif +#ifndef DEPRECATED +# define DEPRECATED(x) x +#endif +#ifndef NOINLINE +# define NOINLINE(x) x +#endif + +#ifdef __GNUC__ +#define PRINTF_ARGS(decl, string_index, first_to_check) \ + decl __attribute__((format(printf, string_index, first_to_check))) +#else +#define PRINTF_ARGS(decl, string_index, first_to_check) decl +#endif + +#ifdef HAVE_STDLIB_H +# include +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#ifdef HAVE_INTRINSICS_H +# include +#endif + +#ifdef HAVE_INTTYPES_H +# include +#endif + +#include +#include + +#include "defines.h" + +#if defined(HAVE_ALLOCA_H) +#include +#else +# ifdef _AIX +#pragma alloca +# endif +#endif + +#if SIZEOF_LONG == SIZEOF_VOIDP +typedef unsigned long VALUE; +typedef unsigned long ID; +# define SIGNED_VALUE long +# define SIZEOF_VALUE SIZEOF_LONG +# define PRI_VALUE_PREFIX "l" +#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP +typedef unsigned LONG_LONG VALUE; +typedef unsigned LONG_LONG ID; +# define SIGNED_VALUE LONG_LONG +# define LONG_LONG_VALUE 1 +# define SIZEOF_VALUE SIZEOF_LONG_LONG +# define PRI_VALUE_PREFIX "ll" +#else +# error ---->> ruby requires sizeof(void*) == sizeof(long) to be compiled. <<---- +#endif +#define PRIdVALUE PRI_VALUE_PREFIX"d" +#define PRIiVALUE PRI_VALUE_PREFIX"i" +#define PRIoVALUE PRI_VALUE_PREFIX"o" +#define PRIuVALUE PRI_VALUE_PREFIX"u" +#define PRIxVALUE PRI_VALUE_PREFIX"x" +#define PRIXVALUE PRI_VALUE_PREFIX"X" + +#if SIZEOF_PTRDIFF_T == SIZEOF_INT +# define PRI_PTRDIFF_PREFIX +#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG +# define PRI_PTRDIFF_PREFIX "l" +#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG_LONG +# define PRI_PTRDIFF_PREFIX "ll" +#endif +#define PRIdPTRDIFF PRI_PTRDIFF_PREFIX"d" +#define PRIiPTRDIFF PRI_PTRDIFF_PREFIX"i" +#define PRIoPTRDIFF PRI_PTRDIFF_PREFIX"o" +#define PRIuPTRDIFF PRI_PTRDIFF_PREFIX"u" +#define PRIxPTRDIFF PRI_PTRDIFF_PREFIX"x" +#define PRIXPTRDIFF PRI_PTRDIFF_PREFIX"X" + +#if SIZEOF_SIZE_T == SIZEOF_INT +# define PRI_SIZE_PREFIX +#elif SIZEOF_SIZE_T == SIZEOF_LONG +# define PRI_SIZE_PREFIX "l" +#elif SIZEOF_SIZE_T == SIZEOF_LONG_LONG +# define PRI_SIZE_PREFIX "ll" +#endif +#define PRIdSIZE PRI_SIZE_PREFIX"d" +#define PRIiSIZE PRI_SIZE_PREFIX"i" +#define PRIoSIZE PRI_SIZE_PREFIX"o" +#define PRIuSIZE PRI_SIZE_PREFIX"u" +#define PRIxSIZE PRI_SIZE_PREFIX"x" +#define PRIXSIZE PRI_SIZE_PREFIX"X" + +#ifdef __STDC__ +# include +#else +# ifndef LONG_MAX +# ifdef HAVE_LIMITS_H +# include +# else + /* assuming 32bit(2's compliment) long */ +# define LONG_MAX 2147483647 +# endif +# endif +# ifndef LONG_MIN +# define LONG_MIN (-LONG_MAX-1) +# endif +# ifndef CHAR_BIT +# define CHAR_BIT 8 +# endif +#endif + +#ifdef HAVE_LONG_LONG +# ifndef LLONG_MAX +# ifdef LONG_LONG_MAX +# define LLONG_MAX LONG_LONG_MAX +# else +# ifdef _I64_MAX +# define LLONG_MAX _I64_MAX +# else + /* assuming 64bit(2's complement) long long */ +# define LLONG_MAX 9223372036854775807LL +# endif +# endif +# endif +# ifndef LLONG_MIN +# ifdef LONG_LONG_MIN +# define LLONG_MIN LONG_LONG_MIN +# else +# ifdef _I64_MIN +# define LLONG_MIN _I64_MIN +# else +# define LLONG_MIN (-LLONG_MAX-1) +# endif +# endif +# endif +#endif + +#define FIXNUM_MAX (LONG_MAX>>1) +#define FIXNUM_MIN RSHIFT((long)LONG_MIN,1) + +#define INT2FIX(i) ((VALUE)(((SIGNED_VALUE)(i))<<1 | FIXNUM_FLAG)) +#define LONG2FIX(i) INT2FIX(i) +#define rb_fix_new(v) INT2FIX(v) +VALUE rb_int2inum(SIGNED_VALUE); + +#define rb_int_new(v) rb_int2inum(v) +VALUE rb_uint2inum(VALUE); + +#define rb_uint_new(v) rb_uint2inum(v) + +#ifdef HAVE_LONG_LONG +VALUE rb_ll2inum(LONG_LONG); +#define LL2NUM(v) rb_ll2inum(v) +VALUE rb_ull2inum(unsigned LONG_LONG); +#define ULL2NUM(v) rb_ull2inum(v) +#endif + +#if SIZEOF_OFF_T > SIZEOF_LONG && defined(HAVE_LONG_LONG) +# define OFFT2NUM(v) LL2NUM(v) +#elif SIZEOF_OFF_T == SIZEOF_LONG +# define OFFT2NUM(v) LONG2NUM(v) +#else +# define OFFT2NUM(v) INT2NUM(v) +#endif + +#if SIZEOF_SIZE_T > SIZEOF_LONG && defined(HAVE_LONG_LONG) +# define SIZET2NUM(v) ULL2NUM(v) +# define SSIZET2NUM(v) LL2NUM(v) +#elif SIZEOF_SIZE_T == SIZEOF_LONG +# define SIZET2NUM(v) ULONG2NUM(v) +# define SSIZET2NUM(v) LONG2NUM(v) +#else +# define SIZET2NUM(v) UINT2NUM(v) +# define SSIZET2NUM(v) INT2NUM(v) +#endif + +#ifndef SSIZE_MAX +# if SIZEOF_SIZE_T > SIZEOF_LONG && defined(HAVE_LONG_LONG) +# define SSIZE_MAX LLONG_MAX +# define SSIZE_MIN LLONG_MIN +# elif SIZEOF_SIZE_T == SIZEOF_LONG +# define SSIZE_MAX LONG_MAX +# define SSIZE_MIN LONG_MIN +# elif SIZEOF_SIZE_T == SIZEOF_INT +# define SSIZE_MAX INT_MAX +# define SSIZE_MIN INT_MIN +# else +# define SSIZE_MAX SHRT_MAX +# define SSIZE_MIN SHRT_MIN +# endif +#endif + +#ifndef PIDT2NUM +#define PIDT2NUM(v) LONG2NUM(v) +#endif +#ifndef NUM2PIDT +#define NUM2PIDT(v) NUM2LONG(v) +#endif +#ifndef UIDT2NUM +#define UIDT2NUM(v) LONG2NUM(v) +#endif +#ifndef NUM2UIDT +#define NUM2UIDT(v) NUM2LONG(v) +#endif +#ifndef GIDT2NUM +#define GIDT2NUM(v) LONG2NUM(v) +#endif +#ifndef NUM2GIDT +#define NUM2GIDT(v) NUM2LONG(v) +#endif + +#define FIX2LONG(x) RSHIFT((SIGNED_VALUE)x,1) +#define FIX2ULONG(x) ((((VALUE)(x))>>1)&LONG_MAX) +#define FIXNUM_P(f) (((SIGNED_VALUE)(f))&FIXNUM_FLAG) +#define POSFIXABLE(f) ((f) < FIXNUM_MAX+1) +#define NEGFIXABLE(f) ((f) >= FIXNUM_MIN) +#define FIXABLE(f) (POSFIXABLE(f) && NEGFIXABLE(f)) + +#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK) + +#define SYMBOL_P(x) (((VALUE)(x)&~(~(VALUE)0<flags & T_MASK) + +#define TYPE(x) rb_type((VALUE)(x)) + +#define RB_GC_GUARD(v) (*(volatile VALUE *)&(v)) + +void rb_check_type(VALUE,int); +#define Check_Type(v,t) rb_check_type((VALUE)(v),t) + +VALUE rb_str_to_str(VALUE); +VALUE rb_string_value(volatile VALUE*); +char *rb_string_value_ptr(volatile VALUE*); +char *rb_string_value_cstr(volatile VALUE*); + +#define StringValue(v) rb_string_value(&(v)) +#define StringValuePtr(v) rb_string_value_ptr(&(v)) +#define StringValueCStr(v) rb_string_value_cstr(&(v)) + +void rb_check_safe_obj(VALUE); +void rb_check_safe_str(VALUE); +#define SafeStringValue(v) do {\ + StringValue(v);\ + rb_check_safe_obj(v);\ +} while (0) +/* obsolete macro - use SafeStringValue(v) */ +#define Check_SafeStr(v) rb_check_safe_str((VALUE)(v)) + +VALUE rb_str_export(VALUE); +#define ExportStringValue(v) do {\ + SafeStringValue(v);\ + (v) = rb_str_export(v);\ +} while (0) +VALUE rb_str_export_locale(VALUE); + +VALUE rb_get_path(VALUE); +#define FilePathValue(v) (RB_GC_GUARD(v) = rb_get_path(v)) + +VALUE rb_get_path_no_checksafe(VALUE); +#define FilePathStringValue(v) ((v) = rb_get_path_no_checksafe(v)) + +void rb_secure(int); +int rb_safe_level(void); +void rb_set_safe_level(int); +void rb_set_safe_level_force(int); +void rb_secure_update(VALUE); + +VALUE rb_errinfo(void); +void rb_set_errinfo(VALUE); + +SIGNED_VALUE rb_num2long(VALUE); +VALUE rb_num2ulong(VALUE); +static inline long +NUM2LONG(VALUE x) +{ + return FIXNUM_P(x) ? FIX2LONG(x) : rb_num2long(x); +} +#define NUM2ULONG(x) rb_num2ulong((VALUE)x) +#if SIZEOF_INT < SIZEOF_LONG +long rb_num2int(VALUE); +long rb_fix2int(VALUE); +#define FIX2INT(x) ((int)rb_fix2int((VALUE)x)) +static inline int +NUM2INT(VALUE x) +{ + return FIXNUM_P(x) ? FIX2INT(x) : rb_num2int(x); +} +unsigned long rb_num2uint(VALUE); +#define NUM2UINT(x) ((unsigned int)rb_num2uint(x)) +unsigned long rb_fix2uint(VALUE); +#define FIX2UINT(x) ((unsigned int)rb_fix2uint(x)) +#else +#define NUM2INT(x) ((int)NUM2LONG(x)) +#define NUM2UINT(x) ((unsigned int)NUM2ULONG(x)) +#define FIX2INT(x) ((int)FIX2LONG(x)) +#define FIX2UINT(x) ((unsigned int)FIX2ULONG(x)) +#endif + +#ifdef HAVE_LONG_LONG +LONG_LONG rb_num2ll(VALUE); +unsigned LONG_LONG rb_num2ull(VALUE); +static inline LONG_LONG +NUM2LL(VALUE x) +{ + return FIXNUM_P(x) ? FIX2LONG(x) : rb_num2ll(x); +} +# define NUM2ULL(x) rb_num2ull((VALUE)x) +#endif + +#if defined(HAVE_LONG_LONG) && SIZEOF_OFF_T > SIZEOF_LONG +# define NUM2OFFT(x) ((off_t)NUM2LL(x)) +#else +# define NUM2OFFT(x) NUM2LONG(x) +#endif + +#if defined(HAVE_LONG_LONG) && SIZEOF_SIZE_T > SIZEOF_LONG +# define NUM2SIZET(x) ((size_t)NUM2ULL(x)) +# define NUM2SSIZET(x) ((size_t)NUM2LL(x)) +#else +# define NUM2SIZET(x) NUM2ULONG(x) +# define NUM2SSIZET(x) NUM2LONG(x) +#endif + +double rb_num2dbl(VALUE); +#define NUM2DBL(x) rb_num2dbl((VALUE)(x)) + +VALUE rb_uint2big(VALUE); +VALUE rb_int2big(SIGNED_VALUE); + +#if SIZEOF_INT < SIZEOF_VALUE +# define INT2NUM(v) INT2FIX((int)(v)) +# define UINT2NUM(v) LONG2FIX((unsigned int)(v)) +#else +static inline VALUE +INT2NUM(int v) +{ + if (!FIXABLE(v)) + return rb_int2big(v); + return INT2FIX(v); +} + +static inline VALUE +UINT2NUM(unsigned int v) +{ + if (!POSFIXABLE(v)) + return rb_uint2big(v); + return LONG2FIX(v); +} +#endif + +static inline VALUE +LONG2NUM(long v) +{ + if (FIXABLE(v)) return LONG2FIX(v); + return rb_int2big(v); +} + +static inline VALUE +ULONG2NUM(unsigned long v) +{ + if (POSFIXABLE(v)) return LONG2FIX(v); + return rb_uint2big(v); +} + +/* obsolete API - use StringValue() */ +char *rb_str2cstr(VALUE,long*); +/* obsolete API - use StringValuePtr() */ +#define STR2CSTR(x) rb_str2cstr((VALUE)(x),0) + +#define NUM2CHR(x) (((TYPE(x) == T_STRING)&&(RSTRING_LEN(x)>=1))?\ + RSTRING_PTR(x)[0]:(char)(NUM2INT(x)&0xff)) +#define CHR2FIX(x) INT2FIX((long)((x)&0xff)) + +VALUE rb_newobj(void); +#define NEWOBJ(obj,type) type *obj = (type*)rb_newobj() +#define OBJSETUP(obj,c,t) do {\ + RBASIC(obj)->flags = (t);\ + RBASIC(obj)->klass = (c);\ + if (rb_safe_level() >= 3) FL_SET(obj, FL_TAINT | FL_UNTRUSTED);\ +} while (0) +#define CLONESETUP(clone,obj) do {\ + OBJSETUP(clone,rb_singleton_class_clone((VALUE)obj),RBASIC(obj)->flags);\ + rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone);\ + if (FL_TEST(obj, FL_EXIVAR)) rb_copy_generic_ivar((VALUE)clone,(VALUE)obj);\ +} while (0) +#define DUPSETUP(dup,obj) do {\ + OBJSETUP(dup,rb_obj_class(obj), (RBASIC(obj)->flags)&(T_MASK|FL_EXIVAR|FL_TAINT|FL_UNTRUSTED)); \ + if (FL_TEST(obj, FL_EXIVAR)) rb_copy_generic_ivar((VALUE)dup,(VALUE)obj);\ +} while (0) + +struct RBasic { + VALUE flags; + VALUE klass; +}; + +#define ROBJECT_EMBED_LEN_MAX 3 +struct RObject { + struct RBasic basic; + union { + struct { + long numiv; + VALUE *ivptr; + struct st_table *iv_index_tbl; /* shortcut for RCLASS_IV_INDEX_TBL(rb_obj_class(obj)) */ + } heap; + VALUE ary[ROBJECT_EMBED_LEN_MAX]; + } as; +}; +#define ROBJECT_EMBED FL_USER1 +#define ROBJECT_NUMIV(o) \ + ((RBASIC(o)->flags & ROBJECT_EMBED) ? \ + ROBJECT_EMBED_LEN_MAX : \ + ROBJECT(o)->as.heap.numiv) +#define ROBJECT_IVPTR(o) \ + ((RBASIC(o)->flags & ROBJECT_EMBED) ? \ + ROBJECT(o)->as.ary : \ + ROBJECT(o)->as.heap.ivptr) +#define ROBJECT_IV_INDEX_TBL(o) \ + ((RBASIC(o)->flags & ROBJECT_EMBED) ? \ + RCLASS_IV_INDEX_TBL(rb_obj_class(o)) : \ + ROBJECT(o)->as.heap.iv_index_tbl) + +typedef struct { + VALUE super; + struct st_table *iv_tbl; +} rb_classext_t; + +struct RClass { + struct RBasic basic; + rb_classext_t *ptr; + struct st_table *m_tbl; + struct st_table *iv_index_tbl; +}; +#define RCLASS_IV_TBL(c) (RCLASS(c)->ptr->iv_tbl) +#define RCLASS_M_TBL(c) (RCLASS(c)->m_tbl) +#define RCLASS_SUPER(c) (RCLASS(c)->ptr->super) +#define RCLASS_IV_INDEX_TBL(c) (RCLASS(c)->iv_index_tbl) +#define RMODULE_IV_TBL(m) RCLASS_IV_TBL(m) +#define RMODULE_M_TBL(m) RCLASS_M_TBL(m) +#define RMODULE_SUPER(m) RCLASS_SUPER(m) + +struct RFloat { + struct RBasic basic; + double float_value; +}; +#define RFLOAT_VALUE(v) (RFLOAT(v)->float_value) +#define DBL2NUM(dbl) rb_float_new(dbl) + +#define ELTS_SHARED FL_USER2 + +#define RSTRING_EMBED_LEN_MAX ((sizeof(VALUE)*3)/sizeof(char)-1) +struct RString { + struct RBasic basic; + union { + struct { + long len; + char *ptr; + union { + long capa; + VALUE shared; + } aux; + } heap; + char ary[RSTRING_EMBED_LEN_MAX + 1]; + } as; +}; +#define RSTRING_NOEMBED FL_USER1 +#define RSTRING_EMBED_LEN_MASK (FL_USER2|FL_USER3|FL_USER4|FL_USER5|FL_USER6) +#define RSTRING_EMBED_LEN_SHIFT (FL_USHIFT+2) +#define RSTRING_LEN(str) \ + (!(RBASIC(str)->flags & RSTRING_NOEMBED) ? \ + (long)((RBASIC(str)->flags >> RSTRING_EMBED_LEN_SHIFT) & \ + (RSTRING_EMBED_LEN_MASK >> RSTRING_EMBED_LEN_SHIFT)) : \ + RSTRING(str)->as.heap.len) +#define RSTRING_PTR(str) \ + (!(RBASIC(str)->flags & RSTRING_NOEMBED) ? \ + RSTRING(str)->as.ary : \ + RSTRING(str)->as.heap.ptr) +#define RSTRING_END(str) (RSTRING_PTR(str)+RSTRING_LEN(str)) + +#define RARRAY_EMBED_LEN_MAX 3 +struct RArray { + struct RBasic basic; + union { + struct { + long len; + union { + long capa; + VALUE shared; + } aux; + VALUE *ptr; + } heap; + VALUE ary[RARRAY_EMBED_LEN_MAX]; + } as; +}; +#define RARRAY_EMBED_FLAG FL_USER1 +/* FL_USER2 is for ELTS_SHARED */ +#define RARRAY_EMBED_LEN_MASK (FL_USER4|FL_USER3) +#define RARRAY_EMBED_LEN_SHIFT (FL_USHIFT+3) +#define RARRAY_LEN(a) \ + ((RBASIC(a)->flags & RARRAY_EMBED_FLAG) ? \ + (long)((RBASIC(a)->flags >> RARRAY_EMBED_LEN_SHIFT) & \ + (RARRAY_EMBED_LEN_MASK >> RARRAY_EMBED_LEN_SHIFT)) : \ + RARRAY(a)->as.heap.len) +#define RARRAY_PTR(a) \ + ((RBASIC(a)->flags & RARRAY_EMBED_FLAG) ? \ + RARRAY(a)->as.ary : \ + RARRAY(a)->as.heap.ptr) + +struct RRegexp { + struct RBasic basic; + struct re_pattern_buffer *ptr; + VALUE src; + unsigned long usecnt; +}; +#define RREGEXP_SRC(r) RREGEXP(r)->src +#define RREGEXP_SRC_PTR(r) RSTRING_PTR(RREGEXP(r)->src) +#define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src) + +struct RHash { + struct RBasic basic; + struct st_table *ntbl; /* possibly 0 */ + int iter_lev; + VALUE ifnone; +}; +/* RHASH_TBL allocates st_table if not available. */ +#define RHASH_TBL(h) rb_hash_tbl(h) +#define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev) +#define RHASH_IFNONE(h) (RHASH(h)->ifnone) +#define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries : 0) +#define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0) + +struct RFile { + struct RBasic basic; + struct rb_io_t *fptr; +}; + +struct RRational { + struct RBasic basic; + VALUE num; + VALUE den; +}; + +struct RComplex { + struct RBasic basic; + VALUE real; + VALUE imag; +}; + +struct RData { + struct RBasic basic; + void (*dmark)(void*); + void (*dfree)(void*); + void *data; +}; + +#define DATA_PTR(dta) (RDATA(dta)->data) + +/* +#define RUBY_DATA_FUNC(func) ((void (*)(void*))func) +*/ +typedef void (*RUBY_DATA_FUNC)(void*); + +VALUE rb_data_object_alloc(VALUE,void*,RUBY_DATA_FUNC,RUBY_DATA_FUNC); + +#define Data_Wrap_Struct(klass,mark,free,sval)\ + rb_data_object_alloc(klass,sval,(RUBY_DATA_FUNC)mark,(RUBY_DATA_FUNC)free) + +#define Data_Make_Struct(klass,type,mark,free,sval) (\ + sval = ALLOC(type),\ + memset(sval, 0, sizeof(type)),\ + Data_Wrap_Struct(klass,mark,free,sval)\ +) + +#define Data_Get_Struct(obj,type,sval) do {\ + Check_Type(obj, T_DATA); \ + sval = (type*)DATA_PTR(obj);\ +} while (0) + +#define RSTRUCT_EMBED_LEN_MAX 3 +struct RStruct { + struct RBasic basic; + union { + struct { + long len; + VALUE *ptr; + } heap; + VALUE ary[RSTRUCT_EMBED_LEN_MAX]; + } as; +}; +#define RSTRUCT_EMBED_LEN_MASK (FL_USER2|FL_USER1) +#define RSTRUCT_EMBED_LEN_SHIFT (FL_USHIFT+1) +#define RSTRUCT_LEN(st) \ + ((RBASIC(st)->flags & RSTRUCT_EMBED_LEN_MASK) ? \ + (long)((RBASIC(st)->flags >> RSTRUCT_EMBED_LEN_SHIFT) & \ + (RSTRUCT_EMBED_LEN_MASK >> RSTRUCT_EMBED_LEN_SHIFT)) : \ + RSTRUCT(st)->as.heap.len) +#define RSTRUCT_PTR(st) \ + ((RBASIC(st)->flags & RSTRUCT_EMBED_LEN_MASK) ? \ + RSTRUCT(st)->as.ary : \ + RSTRUCT(st)->as.heap.ptr) + +#define RBIGNUM_EMBED_LEN_MAX ((sizeof(VALUE)*3)/sizeof(BDIGIT)) +struct RBignum { + struct RBasic basic; + union { + struct { + long len; + BDIGIT *digits; + } heap; + BDIGIT ary[RBIGNUM_EMBED_LEN_MAX]; + } as; +}; +#define RBIGNUM_SIGN_BIT FL_USER1 +/* sign: positive:1, negative:0 */ +#define RBIGNUM_SIGN(b) ((RBASIC(b)->flags & RBIGNUM_SIGN_BIT) != 0) +#define RBIGNUM_SET_SIGN(b,sign) \ + ((sign) ? (RBASIC(b)->flags |= RBIGNUM_SIGN_BIT) \ + : (RBASIC(b)->flags &= ~RBIGNUM_SIGN_BIT)) +#define RBIGNUM_POSITIVE_P(b) RBIGNUM_SIGN(b) +#define RBIGNUM_NEGATIVE_P(b) (!RBIGNUM_SIGN(b)) + +#define RBIGNUM_EMBED_FLAG FL_USER2 +#define RBIGNUM_EMBED_LEN_MASK (FL_USER5|FL_USER4|FL_USER3) +#define RBIGNUM_EMBED_LEN_SHIFT (FL_USHIFT+3) +#define RBIGNUM_LEN(b) \ + ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \ + (long)((RBASIC(b)->flags >> RBIGNUM_EMBED_LEN_SHIFT) & \ + (RBIGNUM_EMBED_LEN_MASK >> RBIGNUM_EMBED_LEN_SHIFT)) : \ + RBIGNUM(b)->as.heap.len) +/* LSB:RBIGNUM_DIGITS(b)[0], MSB:RBIGNUM_DIGITS(b)[RBIGNUM_LEN(b)-1] */ +#define RBIGNUM_DIGITS(b) \ + ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \ + RBIGNUM(b)->as.ary : \ + RBIGNUM(b)->as.heap.digits) + +#define R_CAST(st) (struct st*) +#define RBASIC(obj) (R_CAST(RBasic)(obj)) +#define ROBJECT(obj) (R_CAST(RObject)(obj)) +#define RCLASS(obj) (R_CAST(RClass)(obj)) +#define RMODULE(obj) RCLASS(obj) +#define RFLOAT(obj) (R_CAST(RFloat)(obj)) +#define RSTRING(obj) (R_CAST(RString)(obj)) +#define RREGEXP(obj) (R_CAST(RRegexp)(obj)) +#define RARRAY(obj) (R_CAST(RArray)(obj)) +#define RHASH(obj) (R_CAST(RHash)(obj)) +#define RDATA(obj) (R_CAST(RData)(obj)) +#define RSTRUCT(obj) (R_CAST(RStruct)(obj)) +#define RBIGNUM(obj) (R_CAST(RBignum)(obj)) +#define RFILE(obj) (R_CAST(RFile)(obj)) +#define RRATIONAL(obj) (R_CAST(RRational)(obj)) +#define RCOMPLEX(obj) (R_CAST(RComplex)(obj)) + +#define FL_SINGLETON FL_USER0 +#define FL_MARK (((VALUE)1)<<5) +#define FL_RESERVED (((VALUE)1)<<6) /* will be used in the future GC */ +#define FL_FINALIZE (((VALUE)1)<<7) +#define FL_TAINT (((VALUE)1)<<8) +#define FL_UNTRUSTED (((VALUE)1)<<9) +#define FL_EXIVAR (((VALUE)1)<<10) +#define FL_FREEZE (((VALUE)1)<<11) + +#define FL_USHIFT 12 + +#define FL_USER0 (((VALUE)1)<<(FL_USHIFT+0)) +#define FL_USER1 (((VALUE)1)<<(FL_USHIFT+1)) +#define FL_USER2 (((VALUE)1)<<(FL_USHIFT+2)) +#define FL_USER3 (((VALUE)1)<<(FL_USHIFT+3)) +#define FL_USER4 (((VALUE)1)<<(FL_USHIFT+4)) +#define FL_USER5 (((VALUE)1)<<(FL_USHIFT+5)) +#define FL_USER6 (((VALUE)1)<<(FL_USHIFT+6)) +#define FL_USER7 (((VALUE)1)<<(FL_USHIFT+7)) +#define FL_USER8 (((VALUE)1)<<(FL_USHIFT+8)) +#define FL_USER9 (((VALUE)1)<<(FL_USHIFT+9)) +#define FL_USER10 (((VALUE)1)<<(FL_USHIFT+10)) +#define FL_USER11 (((VALUE)1)<<(FL_USHIFT+11)) +#define FL_USER12 (((VALUE)1)<<(FL_USHIFT+12)) +#define FL_USER13 (((VALUE)1)<<(FL_USHIFT+13)) +#define FL_USER14 (((VALUE)1)<<(FL_USHIFT+14)) +#define FL_USER15 (((VALUE)1)<<(FL_USHIFT+15)) +#define FL_USER16 (((VALUE)1)<<(FL_USHIFT+16)) +#define FL_USER17 (((VALUE)1)<<(FL_USHIFT+17)) +#define FL_USER18 (((VALUE)1)<<(FL_USHIFT+18)) +#define FL_USER19 (((VALUE)1)<<(FL_USHIFT+19)) + +#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x)) + +#define FL_ABLE(x) (!SPECIAL_CONST_P(x) && BUILTIN_TYPE(x) != T_NODE) +#define FL_TEST(x,f) (FL_ABLE(x)?(RBASIC(x)->flags&(f)):0) +#define FL_ANY(x,f) FL_TEST(x,f) +#define FL_ALL(x,f) (FL_TEST(x,f) == (f)) +#define FL_SET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags |= (f);} while (0) +#define FL_UNSET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags &= ~(f);} while (0) +#define FL_REVERSE(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags ^= (f);} while (0) + +#define OBJ_TAINTED(x) FL_TEST((x), FL_TAINT) +#define OBJ_TAINT(x) FL_SET((x), FL_TAINT) +#define OBJ_UNTRUSTED(x) FL_TEST((x), FL_UNTRUSTED) +#define OBJ_UNTRUST(x) FL_SET((x), FL_UNTRUSTED) +#define OBJ_INFECT(x,s) do {if (FL_ABLE(x) && FL_ABLE(s)) RBASIC(x)->flags |= RBASIC(s)->flags & (FL_TAINT | FL_UNTRUSTED);} while (0) + +#define OBJ_FROZEN(x) FL_TEST((x), FL_FREEZE) +#define OBJ_FREEZE(x) FL_SET((x), FL_FREEZE) + +#define ALLOC_N(type,n) (type*)xmalloc2((n),sizeof(type)) +#define ALLOC(type) (type*)xmalloc(sizeof(type)) +#define REALLOC_N(var,type,n) (var)=(type*)xrealloc2((char*)(var),(n),sizeof(type)) + +#define ALLOCA_N(type,n) (type*)alloca(sizeof(type)*(n)) + +#define MEMZERO(p,type,n) memset((p), 0, sizeof(type)*(n)) +#define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), sizeof(type)*(n)) +#define MEMMOVE(p1,p2,type,n) memmove((p1), (p2), sizeof(type)*(n)) +#define MEMCMP(p1,p2,type,n) memcmp((p1), (p2), sizeof(type)*(n)) + +void rb_obj_infect(VALUE,VALUE); + +typedef int ruby_glob_func(const char*,VALUE, void*); +void rb_glob(const char*,void(*)(const char*,VALUE,void*),VALUE); +int ruby_glob(const char*,int,ruby_glob_func*,VALUE); +int ruby_brace_glob(const char*,int,ruby_glob_func*,VALUE); + +VALUE rb_define_class(const char*,VALUE); +VALUE rb_define_module(const char*); +VALUE rb_define_class_under(VALUE, const char*, VALUE); +VALUE rb_define_module_under(VALUE, const char*); + +void rb_include_module(VALUE,VALUE); +void rb_extend_object(VALUE,VALUE); + +struct rb_global_variable; + +typedef VALUE rb_gvar_getter_t(ID id, void *data, struct rb_global_variable *gvar); +typedef void rb_gvar_setter_t(VALUE val, ID id, void *data, struct rb_global_variable *gvar); +typedef void rb_gvar_marker_t(VALUE *var); + +VALUE rb_gvar_undef_getter(ID id, void *data, struct rb_global_variable *gvar); +void rb_gvar_undef_setter(VALUE val, ID id, void *data, struct rb_global_variable *gvar); +void rb_gvar_undef_marker(VALUE *var); + +VALUE rb_gvar_val_getter(ID id, void *data, struct rb_global_variable *gvar); +void rb_gvar_val_setter(VALUE val, ID id, void *data, struct rb_global_variable *gvar); +void rb_gvar_val_marker(VALUE *var); + +VALUE rb_gvar_var_getter(ID id, void *data, struct rb_global_variable *gvar); +void rb_gvar_var_setter(VALUE val, ID id, void *data, struct rb_global_variable *gvar); +void rb_gvar_var_marker(VALUE *var); + +void rb_gvar_readonly_setter(VALUE val, ID id, void *data, struct rb_global_variable *gvar); + +void rb_define_variable(const char*,VALUE*); +void rb_define_virtual_variable(const char*,VALUE(*)(ANYARGS),void(*)(ANYARGS)); +void rb_define_hooked_variable(const char*,VALUE*,VALUE(*)(ANYARGS),void(*)(ANYARGS)); +void rb_define_readonly_variable(const char*,VALUE*); +void rb_define_const(VALUE,const char*,VALUE); +void rb_define_global_const(const char*,VALUE); + +#define RUBY_METHOD_FUNC(func) ((VALUE (*)(ANYARGS))func) +void rb_define_method(VALUE,const char*,VALUE(*)(ANYARGS),int); +void rb_define_module_function(VALUE,const char*,VALUE(*)(ANYARGS),int); +void rb_define_global_function(const char*,VALUE(*)(ANYARGS),int); + +void rb_undef_method(VALUE,const char*); +void rb_define_alias(VALUE,const char*,const char*); +void rb_define_attr(VALUE,const char*,int,int); + +void rb_global_variable(VALUE*); +void rb_gc_register_mark_object(VALUE); +void rb_gc_register_address(VALUE*); +void rb_gc_unregister_address(VALUE*); + +ID rb_intern(const char*); +ID rb_intern2(const char*, long); +ID rb_intern_str(VALUE str); +const char *rb_id2name(ID); +ID rb_to_id(VALUE); +VALUE rb_id2str(ID); + +#define CONST_ID_CACHE(result, str) \ + { \ + static ID rb_intern_id_cache; \ + if (!rb_intern_id_cache) \ + rb_intern_id_cache = rb_intern2(str, strlen(str)); \ + result rb_intern_id_cache; \ + } +#define CONST_ID(var, str) \ + do CONST_ID_CACHE(var =, str) while (0) +#ifdef __GNUC__ +/* __builtin_constant_p and statement expression is available + * since gcc-2.7.2.3 at least. */ +#define rb_intern(str) \ + (__builtin_constant_p(str) ? \ + __extension__ (CONST_ID_CACHE(/**/, str)) : \ + rb_intern(str)) +#define rb_intern_const(str) \ + (__builtin_constant_p(str) ? \ + __extension__ (rb_intern2(str, strlen(str))) : \ + (rb_intern)(str)) +#else +#define rb_intern_const(str) rb_intern2(str, strlen(str)) +#endif + +const char *rb_class2name(VALUE); +const char *rb_obj_classname(VALUE); + +void rb_p(VALUE); + +VALUE rb_eval_string(const char*); +VALUE rb_eval_string_protect(const char*, int*); +VALUE rb_eval_string_wrap(const char*, int*); +VALUE rb_funcall(VALUE, ID, int, ...); +VALUE rb_funcall2(VALUE, ID, int, const VALUE*); +VALUE rb_funcall3(VALUE, ID, int, const VALUE*); +int rb_scan_args(int, const VALUE*, const char*, ...); +VALUE rb_call_super(int, const VALUE*); + +VALUE rb_gv_set(const char*, VALUE); +VALUE rb_gv_get(const char*); +VALUE rb_iv_get(VALUE, const char*); +VALUE rb_iv_set(VALUE, const char*, VALUE); + +VALUE rb_equal(VALUE,VALUE); + +VALUE *rb_ruby_verbose_ptr(void); +VALUE *rb_ruby_debug_ptr(void); +#define ruby_verbose (*rb_ruby_verbose_ptr()) +#define ruby_debug (*rb_ruby_debug_ptr()) + +PRINTF_ARGS(NORETURN(void rb_raise(VALUE, const char*, ...)), 2, 3); +PRINTF_ARGS(NORETURN(void rb_fatal(const char*, ...)), 1, 2); +PRINTF_ARGS(NORETURN(void rb_bug(const char*, ...)), 1, 2); +NORETURN(void rb_sys_fail(const char*)); +NORETURN(void rb_iter_break(void)); +NORETURN(void rb_exit(int)); +NORETURN(void rb_notimplement(void)); + +/* reports if `-w' specified */ +PRINTF_ARGS(void rb_warning(const char*, ...), 1, 2); +PRINTF_ARGS(void rb_compile_warning(const char *, int, const char*, ...), 3, 4); +PRINTF_ARGS(void rb_sys_warning(const char*, ...), 1, 2); +/* reports always */ +PRINTF_ARGS(void rb_warn(const char*, ...), 1, 2); +PRINTF_ARGS(void rb_compile_warn(const char *, int, const char*, ...), 3, 4); + +typedef VALUE rb_block_call_func(VALUE, VALUE, int, VALUE*); + +VALUE rb_each(VALUE); +VALUE rb_yield(VALUE); +VALUE rb_yield_values(int n, ...); +VALUE rb_yield_values2(int n, const VALUE *argv); +VALUE rb_yield_splat(VALUE); +int rb_block_given_p(void); +void rb_need_block(void); +VALUE rb_iterate(VALUE(*)(VALUE),VALUE,VALUE(*)(ANYARGS),VALUE); +VALUE rb_block_call(VALUE,ID,int,VALUE*,VALUE(*)(ANYARGS),VALUE); +VALUE rb_rescue(VALUE(*)(ANYARGS),VALUE,VALUE(*)(ANYARGS),VALUE); +VALUE rb_rescue2(VALUE(*)(ANYARGS),VALUE,VALUE(*)(ANYARGS),VALUE,...); +VALUE rb_ensure(VALUE(*)(ANYARGS),VALUE,VALUE(*)(ANYARGS),VALUE); +VALUE rb_catch(const char*,VALUE(*)(ANYARGS),VALUE); +VALUE rb_catch_obj(VALUE,VALUE(*)(ANYARGS),VALUE); +NORETURN(void rb_throw(const char*,VALUE)); +NORETURN(void rb_throw_obj(VALUE,VALUE)); + +VALUE rb_require(const char*); + +#ifdef __ia64 +void ruby_init_stack(VALUE*, void*); +#define ruby_init_stack(addr) ruby_init_stack(addr, rb_ia64_bsp()) +#else +void ruby_init_stack(VALUE*); +#endif +#define RUBY_INIT_STACK \ + VALUE variable_in_this_stack_frame; \ + ruby_init_stack(&variable_in_this_stack_frame); +void ruby_init(void); +void *ruby_options(int, char**); +int ruby_run_node(void *); + +RUBY_EXTERN VALUE rb_mKernel; +RUBY_EXTERN VALUE rb_mComparable; +RUBY_EXTERN VALUE rb_mEnumerable; +RUBY_EXTERN VALUE rb_mErrno; +RUBY_EXTERN VALUE rb_mFileTest; +RUBY_EXTERN VALUE rb_mGC; +RUBY_EXTERN VALUE rb_mMath; +RUBY_EXTERN VALUE rb_mProcess; + +RUBY_EXTERN VALUE rb_cBasicObject; +RUBY_EXTERN VALUE rb_cObject; +RUBY_EXTERN VALUE rb_cArray; +RUBY_EXTERN VALUE rb_cBignum; +RUBY_EXTERN VALUE rb_cBinding; +RUBY_EXTERN VALUE rb_cClass; +RUBY_EXTERN VALUE rb_cCont; +RUBY_EXTERN VALUE rb_cDir; +RUBY_EXTERN VALUE rb_cData; +RUBY_EXTERN VALUE rb_cFalseClass; +RUBY_EXTERN VALUE rb_cEncoding; +RUBY_EXTERN VALUE rb_cEnumerator; +RUBY_EXTERN VALUE rb_cFile; +RUBY_EXTERN VALUE rb_cFixnum; +RUBY_EXTERN VALUE rb_cFloat; +RUBY_EXTERN VALUE rb_cHash; +RUBY_EXTERN VALUE rb_cInteger; +RUBY_EXTERN VALUE rb_cIO; +RUBY_EXTERN VALUE rb_cMatch; +RUBY_EXTERN VALUE rb_cMethod; +RUBY_EXTERN VALUE rb_cModule; +RUBY_EXTERN VALUE rb_cNameErrorMesg; +RUBY_EXTERN VALUE rb_cNilClass; +RUBY_EXTERN VALUE rb_cNumeric; +RUBY_EXTERN VALUE rb_cProc; +RUBY_EXTERN VALUE rb_cRange; +RUBY_EXTERN VALUE rb_cRational; +RUBY_EXTERN VALUE rb_cComplex; +RUBY_EXTERN VALUE rb_cRegexp; +RUBY_EXTERN VALUE rb_cStat; +RUBY_EXTERN VALUE rb_cString; +RUBY_EXTERN VALUE rb_cStruct; +RUBY_EXTERN VALUE rb_cSymbol; +RUBY_EXTERN VALUE rb_cThread; +RUBY_EXTERN VALUE rb_cTime; +RUBY_EXTERN VALUE rb_cTrueClass; +RUBY_EXTERN VALUE rb_cUnboundMethod; + +RUBY_EXTERN VALUE rb_eException; +RUBY_EXTERN VALUE rb_eStandardError; +RUBY_EXTERN VALUE rb_eSystemExit; +RUBY_EXTERN VALUE rb_eInterrupt; +RUBY_EXTERN VALUE rb_eSignal; +RUBY_EXTERN VALUE rb_eFatal; +RUBY_EXTERN VALUE rb_eArgError; +RUBY_EXTERN VALUE rb_eEOFError; +RUBY_EXTERN VALUE rb_eIndexError; +RUBY_EXTERN VALUE rb_eStopIteration; +RUBY_EXTERN VALUE rb_eKeyError; +RUBY_EXTERN VALUE rb_eRangeError; +RUBY_EXTERN VALUE rb_eIOError; +RUBY_EXTERN VALUE rb_eRuntimeError; +RUBY_EXTERN VALUE rb_eSecurityError; +RUBY_EXTERN VALUE rb_eSystemCallError; +RUBY_EXTERN VALUE rb_eThreadError; +RUBY_EXTERN VALUE rb_eTypeError; +RUBY_EXTERN VALUE rb_eZeroDivError; +RUBY_EXTERN VALUE rb_eNotImpError; +RUBY_EXTERN VALUE rb_eNoMemError; +RUBY_EXTERN VALUE rb_eNoMethodError; +RUBY_EXTERN VALUE rb_eFloatDomainError; +RUBY_EXTERN VALUE rb_eLocalJumpError; +RUBY_EXTERN VALUE rb_eSysStackError; +RUBY_EXTERN VALUE rb_eRegexpError; +RUBY_EXTERN VALUE rb_eEncodingError; +RUBY_EXTERN VALUE rb_eEncCompatError; + +RUBY_EXTERN VALUE rb_eScriptError; +RUBY_EXTERN VALUE rb_eNameError; +RUBY_EXTERN VALUE rb_eSyntaxError; +RUBY_EXTERN VALUE rb_eLoadError; + +RUBY_EXTERN VALUE rb_stdin, rb_stdout, rb_stderr; + +static inline VALUE +rb_class_of(VALUE obj) +{ + if (IMMEDIATE_P(obj)) { + if (FIXNUM_P(obj)) return rb_cFixnum; + if (obj == Qtrue) return rb_cTrueClass; + if (SYMBOL_P(obj)) return rb_cSymbol; + } + else if (!RTEST(obj)) { + if (obj == Qnil) return rb_cNilClass; + if (obj == Qfalse) return rb_cFalseClass; + } + return RBASIC(obj)->klass; +} + +static inline int +rb_type(VALUE obj) +{ + if (IMMEDIATE_P(obj)) { + if (FIXNUM_P(obj)) return T_FIXNUM; + if (obj == Qtrue) return T_TRUE; + if (SYMBOL_P(obj)) return T_SYMBOL; + if (obj == Qundef) return T_UNDEF; + } + else if (!RTEST(obj)) { + if (obj == Qnil) return T_NIL; + if (obj == Qfalse) return T_FALSE; + } + return BUILTIN_TYPE(obj); +} + +static inline int +rb_special_const_p(VALUE obj) +{ + if (SPECIAL_CONST_P(obj)) return Qtrue; + return Qfalse; +} + +#include "ruby/missing.h" +#include "ruby/intern.h" + +#if defined(EXTLIB) && defined(USE_DLN_A_OUT) +/* hook for external modules */ +static char *dln_libs_to_be_linked[] = { EXTLIB, 0 }; +#endif + +#if (defined(__APPLE__) || defined(__NeXT__)) && defined(__MACH__) +/* to link startup code with ObjC support */ +#define RUBY_GLOBAL_SETUP static void objcdummyfunction(void) {objc_msgSend();} +#else +#define RUBY_GLOBAL_SETUP +#endif + +void ruby_sysinit(int *, char ***); + +#define RUBY_VM 1 /* YARV */ +#define HAVE_NATIVETHREAD +int ruby_native_thread_p(void); + +#define RUBY_EVENT_NONE 0x0000 +#define RUBY_EVENT_LINE 0x0001 +#define RUBY_EVENT_CLASS 0x0002 +#define RUBY_EVENT_END 0x0004 +#define RUBY_EVENT_CALL 0x0008 +#define RUBY_EVENT_RETURN 0x0010 +#define RUBY_EVENT_C_CALL 0x0020 +#define RUBY_EVENT_C_RETURN 0x0040 +#define RUBY_EVENT_RAISE 0x0080 +#define RUBY_EVENT_ALL 0xffff +#define RUBY_EVENT_VM 0x10000 +#define RUBY_EVENT_SWITCH 0x20000 +#define RUBY_EVENT_COVERAGE 0x40000 + +typedef unsigned int rb_event_flag_t; +typedef void (*rb_event_hook_func_t)(rb_event_flag_t, VALUE data, VALUE, ID, VALUE klass); + +typedef struct rb_event_hook_struct { + rb_event_flag_t flag; + rb_event_hook_func_t func; + VALUE data; + struct rb_event_hook_struct *next; +} rb_event_hook_t; + +void rb_add_event_hook(rb_event_hook_func_t func, rb_event_flag_t events, + VALUE data); +int rb_remove_event_hook(rb_event_hook_func_t func); + +/* locale insensitive functions */ + +#define rb_isascii(c) ((unsigned long)(c) < 128) +int rb_isalnum(int c); +int rb_isalpha(int c); +int rb_isblank(int c); +int rb_iscntrl(int c); +int rb_isdigit(int c); +int rb_isgraph(int c); +int rb_islower(int c); +int rb_isprint(int c); +int rb_ispunct(int c); +int rb_isspace(int c); +int rb_isupper(int c); +int rb_isxdigit(int c); +int rb_tolower(int c); +int rb_toupper(int c); + +#ifndef ISPRINT +#define ISASCII(c) rb_isascii((unsigned char)(c)) +#undef ISPRINT +#define ISPRINT(c) rb_isprint((unsigned char)(c)) +#define ISSPACE(c) rb_isspace((unsigned char)(c)) +#define ISUPPER(c) rb_isupper((unsigned char)(c)) +#define ISLOWER(c) rb_islower((unsigned char)(c)) +#define ISALNUM(c) rb_isalnum((unsigned char)(c)) +#define ISALPHA(c) rb_isalpha((unsigned char)(c)) +#define ISDIGIT(c) rb_isdigit((unsigned char)(c)) +#define ISXDIGIT(c) rb_isxdigit((unsigned char)(c)) +#endif +#define TOUPPER(c) rb_toupper((unsigned char)(c)) +#define TOLOWER(c) rb_tolower((unsigned char)(c)) + +int st_strcasecmp(const char *s1, const char *s2); +int st_strncasecmp(const char *s1, const char *s2, size_t n); +#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2)) +#define STRNCASECMP(s1, s2, n) (st_strncasecmp(s1, s2, n)) + +unsigned long ruby_strtoul(const char *str, char **endptr, int base); +#define STRTOUL(str, endptr, base) (ruby_strtoul(str, endptr, base)) + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif +#endif /* RUBY_RUBY_H */ diff --git a/include/ruby/st.h b/include/ruby/st.h new file mode 100644 index 0000000..73216ba --- /dev/null +++ b/include/ruby/st.h @@ -0,0 +1,117 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* @(#) st.h 5.1 89/12/14 */ + +#ifndef RUBY_ST_H +#define RUBY_ST_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#ifndef RUBY_LIB +#include "ruby/config.h" +#include "ruby/defines.h" +#ifdef RUBY_EXTCONF_H +#include RUBY_EXTCONF_H +#endif +#endif + +#if defined STDC_HEADERS +#include +#elif defined HAVE_STDLIB_H +#include +#endif + +#if SIZEOF_LONG == SIZEOF_VOIDP +typedef unsigned long st_data_t; +#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP +typedef unsigned LONG_LONG st_data_t; +#else +# error ---->> st.c requires sizeof(void*) == sizeof(long) to be compiled. <<---- +#endif +#define ST_DATA_T_DEFINED + +#ifndef CHAR_BIT +# ifdef HAVE_LIMITS_H +# include +# else +# define CHAR_BIT 8 +# endif +#endif +#ifndef _ +# define _(args) args +#endif +#ifndef ANYARGS +# ifdef __cplusplus +# define ANYARGS ... +# else +# define ANYARGS +# endif +#endif + +typedef struct st_table st_table; + +typedef int st_compare_func(st_data_t, st_data_t); +typedef int st_hash_func(st_data_t); + +struct st_hash_type { + int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */ + int (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */ +}; + +typedef st_data_t st_index_t; +#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT) + +struct st_table { + const struct st_hash_type *type; + st_index_t num_bins; + unsigned int entries_packed : 1; +#ifdef __GNUC__ + __extension__ +#endif + st_index_t num_entries : ST_INDEX_BITS - 1; + struct st_table_entry **bins; + struct st_table_entry *head, *tail; +}; + +#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) + +enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; + +st_table *st_init_table(const struct st_hash_type *); +st_table *st_init_table_with_size(const struct st_hash_type *, int); +st_table *st_init_numtable(void); +st_table *st_init_numtable_with_size(int); +st_table *st_init_strtable(void); +st_table *st_init_strtable_with_size(int); +st_table *st_init_strcasetable(void); +st_table *st_init_strcasetable_with_size(int); +int st_delete(st_table *, st_data_t *, st_data_t *); /* returns 0:notfound 1:deleted */ +int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t); +int st_insert(st_table *, st_data_t, st_data_t); +int st_lookup(st_table *, st_data_t, st_data_t *); +int st_get_key(st_table *, st_data_t, st_data_t *); +int st_foreach(st_table *, int (*)(ANYARGS), st_data_t); +int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t); +void st_add_direct(st_table *, st_data_t, st_data_t); +void st_free_table(st_table *); +void st_cleanup_safe(st_table *, st_data_t); +void st_clear(st_table *); +st_table *st_copy(st_table *); +int st_numcmp(st_data_t, st_data_t); +int st_numhash(st_data_t); +int st_strcasecmp(const char *s1, const char *s2); +int st_strncasecmp(const char *s1, const char *s2, size_t n); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_ST_H */ diff --git a/include/ruby/util.h b/include/ruby/util.h new file mode 100644 index 0000000..b41810b --- /dev/null +++ b/include/ruby/util.h @@ -0,0 +1,82 @@ +/********************************************************************** + + util.h - + + $Author: yugui $ + created at: Thu Mar 9 11:55:53 JST 1995 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_UTIL_H +#define RUBY_UTIL_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#ifndef _ +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif +#ifdef HAVE_PROTOTYPES +# define _(args) args +#else +# define _(args) () +#endif +#ifdef HAVE_STDARG_PROTOTYPES +# define __(args) args +#else +# define __(args) () +#endif +#endif + +#define scan_oct ruby_scan_oct +unsigned long ruby_scan_oct(const char *, int, int *); +#define scan_hex ruby_scan_hex +unsigned long ruby_scan_hex(const char *, int, int *); + +#if defined(__CYGWIN32__) || defined(_WIN32) +void ruby_add_suffix(VALUE str, const char *suffix); +#endif + +void ruby_qsort(void *, const int, const int, + int (*)(const void *, const void *, void *), void *); + +void ruby_setenv(const char *, const char *); +void ruby_unsetenv(const char *); +#undef setenv +#undef unsetenv +#define setenv(name,val) ruby_setenv(name,val) +#define unsetenv(name,val) ruby_unsetenv(name); + +char *ruby_strdup(const char *); +#undef strdup +#define strdup(s) ruby_strdup(s) + +char *ruby_getcwd(void); +#define my_getcwd() ruby_getcwd() + +double ruby_strtod(const char *, char **); +#undef strtod +#define strtod(s,e) ruby_strtod(s,e) + +void ruby_each_words(const char *, void (*)(const char*, int, void*), void *); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_UTIL_H */ diff --git a/include/ruby/vm.h b/include/ruby/vm.h new file mode 100644 index 0000000..cdee8f6 --- /dev/null +++ b/include/ruby/vm.h @@ -0,0 +1,22 @@ +/********************************************************************** + + ruby/vm.h - + + $Author: ko1 $ + created at: Sat May 31 15:17:36 2008 + + Copyright (C) 2008 Yukihiro Matsumoto + +**********************************************************************/ + +#ifndef RUBY_VM_H +#define RUBY_VM_H 1 + +/* Place holder. + * + * We will prepare VM creation/control APIs on 1.9.2 or later. + * If you have an interest about it, please see mvm branch. + * http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/branches/mvm/ + */ + +#endif /* RUBY_VM_H */ diff --git a/include/ruby/win32.h b/include/ruby/win32.h new file mode 100644 index 0000000..fd654e4 --- /dev/null +++ b/include/ruby/win32.h @@ -0,0 +1,570 @@ +#ifndef RUBY_WIN32_H +#define RUBY_WIN32_H 1 + +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +/* + * Copyright (c) 1993, Intergraph Corporation + * + * You may distribute under the terms of either the GNU General Public + * License or the Artistic License, as specified in the perl README file. + * + */ + +// +// Definitions for NT port of Perl +// + + +// +// Ok now we can include the normal include files. +// + +// #include conflict with varargs.h? +#if !defined(WSAAPI) +#if defined(__cplusplus) && defined(_MSC_VER) +extern "C++" { /* template without extern "C++" */ +#endif +#include +#include +#if defined(__cplusplus) && defined(_MSC_VER) +} +#endif +#endif + +#define NT 1 /* deprecated */ + +// +// We're not using Microsoft's "extensions" to C for +// Structured Exception Handling (SEH) so we can nuke these +// +#undef try +#undef except +#undef finally +#undef leave + +#include +#include +#include +#include +#include +#include +#if defined(__cplusplus) && defined(_MSC_VER) && _MSC_VER == 1200 +extern "C++" { /* template without extern "C++" */ +#endif +#include +#if defined(__cplusplus) && defined(_MSC_VER) && _MSC_VER == 1200 +} +#endif +#include +#include +#include +#ifdef HAVE_SYS_UTIME_H +# include +#else +# include +#endif +#include +#include +#if defined __MINGW32__ || __BORLANDC__ >= 0x0580 +# include +#else +# if !defined(_INTPTR_T_DEFINED) +# ifdef _WIN64 +typedef __int64 intptr_t; +# else +typedef int intptr_t; +# endif +# define _INTPTR_T_DEFINED +# endif +# if !defined(_UINTPTR_T_DEFINED) +# ifdef _WIN64 +typedef unsigned __int64 uintptr_t; +# else +typedef unsigned int uintptr_t; +# endif +# define _UINTPTR_T_DEFINED +# endif +#endif +#ifndef __MINGW32__ +# define mode_t int +#endif + +#ifdef _M_IX86 +# define WIN95 1 +#else +# undef WIN95 +#endif + +#ifdef WIN95 +extern DWORD rb_w32_osid(void); +#define rb_w32_iswinnt() (rb_w32_osid() == VER_PLATFORM_WIN32_NT) +#define rb_w32_iswin95() (rb_w32_osid() == VER_PLATFORM_WIN32_WINDOWS) +#else +#define rb_w32_iswinnt() TRUE +#define rb_w32_iswin95() FALSE +#endif + +#define WNOHANG -1 + +#undef getc +#undef putc +#undef fgetc +#undef fputc +#undef getchar +#undef putchar +#undef fgetchar +#undef fputchar +#undef utime +#undef lseek +#undef fstat +#define getc(_stream) rb_w32_getc(_stream) +#define getchar() rb_w32_getc(stdin) +#define putc(_c, _stream) rb_w32_putc(_c, _stream) +#define putchar(_c) rb_w32_putc(_c, stdout) +#ifdef RUBY_EXPORT +#define fgetc(_stream) getc(_stream) +#define fputc(_c, _stream) putc(_c, _stream) +#define fgetchar() getchar() +#define fputchar(_c) putchar(_c) +#define utime(_p, _t) rb_w32_utime(_p, _t) +#define lseek(_f, _o, _w) _lseeki64(_f, _o, _w) + +#define pipe(p) rb_w32_pipe(p) +#define open rb_w32_open +#define close(h) rb_w32_close(h) +#define fclose(f) rb_w32_fclose(f) +#define read(f, b, s) rb_w32_read(f, b, s) +#define write(f, b, s) rb_w32_write(f, b, s) +#define getpid() rb_w32_getpid() +#define getppid() rb_w32_getppid() +#define sleep(x) rb_w32_Sleep((x)*1000) +#define Sleep(msec) (void)rb_w32_Sleep(msec) +#define fstat(fd,st) _fstati64(fd,st) +#ifdef __BORLANDC__ +#define creat(p, m) _creat(p, m) +#define eof() _eof() +#define filelength(h) _filelength(h) +#define mktemp(t) _mktemp(t) +#define tell(h) _tell(h) +#define _open _sopen +#define sopen _sopen +#define _fstati64(fd,st) rb_w32_fstati64(fd,st) +#undef fopen +#define fopen(p, m) rb_w32_fopen(p, m) +#undef fdopen +#define fdopen(h, m) rb_w32_fdopen(h, m) +#undef fsopen +#define fsopen(p, m, sh) rb_w32_fsopen(p, m, sh) +#endif + +#undef execv +#define execv(path,argv) rb_w32_aspawn(P_OVERLAY,path,argv) +#if !defined(__BORLANDC__) +#undef isatty +#define isatty(h) rb_w32_isatty(h) +#endif + +#undef mkdir +#define mkdir(p, m) rb_w32_mkdir(p, m) +#undef rmdir +#define rmdir(p) rb_w32_rmdir(p) +#undef unlink +#define unlink(p) rb_w32_unlink(p) +#endif + +#if SIZEOF_OFF_T == 8 +#define off_t __int64 +#define stat stati64 +#if defined(__BORLANDC__) +#define stati64(path, st) rb_w32_stati64(path, st) +#elif !defined(_MSC_VER) || _MSC_VER < 1400 +#define stati64 _stati64 +#define _stati64(path, st) rb_w32_stati64(path, st) +#else +#define stati64 _stat64 +#define _stat64(path, st) rb_w32_stati64(path, st) +#endif +#else +#define stat(path,st) rb_w32_stat(path,st) +#define fstat(fd,st) rb_w32_fstat(fd,st) +extern int rb_w32_stat(const char *, struct stat *); +extern int rb_w32_fstat(int, struct stat *); +#endif +#define access(path,mode) rb_w32_access(path,mode) + +#define strcasecmp stricmp +#define strncasecmp strnicmp +#define fsync _commit + +#ifdef __MINGW32__ +struct timezone { + int tz_minuteswest; + int tz_dsttime; +}; +#undef isascii +#define isascii __isascii +#endif +#define NtInitialize ruby_sysinit +extern int rb_w32_cmdvector(const char *, char ***); +extern rb_pid_t rb_w32_pipe_exec(const char *, const char *, int, int *, int *); +extern int flock(int fd, int oper); +extern int rb_w32_is_socket(int); +extern int WSAAPI rb_w32_accept(int, struct sockaddr *, int *); +extern int WSAAPI rb_w32_bind(int, const struct sockaddr *, int); +extern int WSAAPI rb_w32_connect(int, const struct sockaddr *, int); +extern void rb_w32_fdset(int, fd_set*); +extern void rb_w32_fdclr(int, fd_set*); +extern int rb_w32_fdisset(int, fd_set*); +extern int WSAAPI rb_w32_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); +extern int WSAAPI rb_w32_getpeername(int, struct sockaddr *, int *); +extern int WSAAPI rb_w32_getsockname(int, struct sockaddr *, int *); +extern int WSAAPI rb_w32_getsockopt(int, int, int, char *, int *); +extern int WSAAPI rb_w32_ioctlsocket(int, long, u_long *); +extern int WSAAPI rb_w32_listen(int, int); +extern int WSAAPI rb_w32_recv(int, char *, int, int); +extern int WSAAPI rb_w32_recvfrom(int, char *, int, int, struct sockaddr *, int *); +extern int WSAAPI rb_w32_send(int, const char *, int, int); +extern int WSAAPI rb_w32_sendto(int, const char *, int, int, const struct sockaddr *, int); +extern int WSAAPI rb_w32_setsockopt(int, int, int, const char *, int); +extern int WSAAPI rb_w32_shutdown(int, int); +extern int WSAAPI rb_w32_socket(int, int, int); +extern SOCKET rb_w32_get_osfhandle(int); +extern struct hostent *WSAAPI rb_w32_gethostbyaddr(const char *, int, int); +extern struct hostent *WSAAPI rb_w32_gethostbyname(const char *); +extern int WSAAPI rb_w32_gethostname(char *, int); +extern struct protoent *WSAAPI rb_w32_getprotobyname(const char *); +extern struct protoent *WSAAPI rb_w32_getprotobynumber(int); +extern struct servent *WSAAPI rb_w32_getservbyname(const char *, const char *); +extern struct servent *WSAAPI rb_w32_getservbyport(int, const char *); +extern int rb_w32_socketpair(int, int, int, int *); +extern char * rb_w32_getcwd(char *, int); +extern char * rb_w32_getenv(const char *); +extern int rb_w32_rename(const char *, const char *); +extern char **rb_w32_get_environ(void); +extern void rb_w32_free_environ(char **); +extern int rb_w32_map_errno(DWORD); + +extern int chown(const char *, int, int); +extern int link(const char *, const char *); +extern int gettimeofday(struct timeval *, struct timezone *); +extern rb_pid_t waitpid (rb_pid_t, int *, int); +extern rb_pid_t rb_w32_spawn(int, const char *, const char*); +extern rb_pid_t rb_w32_aspawn(int, const char *, char *const *); +extern int kill(int, int); +extern int fcntl(int, int, ...); +extern rb_pid_t rb_w32_getpid(void); +extern rb_pid_t rb_w32_getppid(void); +#if !defined(__BORLANDC__) +extern int rb_w32_isatty(int); +#endif +extern int rb_w32_mkdir(const char *, int); +extern int rb_w32_rmdir(const char *); +extern int rb_w32_unlink(const char *); +extern int rb_w32_stati64(const char *, struct stati64 *); +extern int rb_w32_access(const char *, int); + +#ifdef __BORLANDC__ +extern int rb_w32_fstati64(int, struct stati64 *); +extern off_t _lseeki64(int, off_t, int); +extern FILE *rb_w32_fopen(const char *, const char *); +extern FILE *rb_w32_fdopen(int, const char *); +extern FILE *rb_w32_fsopen(const char *, const char *, int); +#endif + +#include +#if !defined __MINGW32__ || defined __NO_ISOCEXT +#ifndef isnan +#define isnan(x) _isnan(x) +#endif +#ifndef finite +#define finite(x) _finite(x) +#endif +#ifndef copysign +#define copysign(a, b) _copysign(a, b) +#endif +#ifndef scalb +#define scalb(a, b) _scalb(a, b) +#endif +#endif + +#if !defined S_IFIFO && defined _S_IFIFO +#define S_IFIFO _S_IFIFO +#endif + +#if 0 && defined __BORLANDC__ +#undef S_ISDIR +#undef S_ISFIFO +#undef S_ISBLK +#undef S_ISCHR +#undef S_ISREG +#define S_ISDIR(m) (((unsigned short)(m) & S_IFMT) == S_IFDIR) +#define S_ISFIFO(m) (((unsigned short)(m) & S_IFMT) == S_IFIFO) +#define S_ISBLK(m) (((unsigned short)(m) & S_IFMT) == S_IFBLK) +#define S_ISCHR(m) (((unsigned short)(m) & S_IFMT) == S_IFCHR) +#define S_ISREG(m) (((unsigned short)(m) & S_IFMT) == S_IFREG) +#endif + +#if !defined S_IRUSR && !defined __MINGW32__ +#define S_IRUSR 0400 +#endif +#ifndef S_IRGRP +#define S_IRGRP 0040 +#endif +#ifndef S_IROTH +#define S_IROTH 0004 +#endif + +#if !defined S_IWUSR && !defined __MINGW32__ +#define S_IWUSR 0200 +#endif +#ifndef S_IWGRP +#define S_IWGRP 0020 +#endif +#ifndef S_IWOTH +#define S_IWOTH 0002 +#endif + +#if !defined S_IXUSR && !defined __MINGW32__ +#define S_IXUSR 0100 +#endif +#ifndef S_IXGRP +#define S_IXGRP 0010 +#endif +#ifndef S_IXOTH +#define S_IXOTH 0001 +#endif + +// +// define this so we can do inplace editing +// + +#define SUFFIX +extern int truncate(const char *path, off_t length); +extern int ftruncate(int fd, off_t length); +extern int fseeko(FILE *stream, off_t offset, int whence); +extern off_t ftello(FILE *stream); + +// +// stubs +// +extern int ioctl (int, int, ...); +extern rb_uid_t getuid (void); +extern rb_uid_t geteuid (void); +extern rb_gid_t getgid (void); +extern rb_gid_t getegid (void); +extern int setuid (rb_uid_t); +extern int setgid (rb_gid_t); + +extern char *rb_w32_strerror(int); + +#ifdef RUBY_EXPORT +#define strerror(e) rb_w32_strerror(e) +#endif + +#define PIPE_BUF 1024 + +#define LOCK_SH 1 +#define LOCK_EX 2 +#define LOCK_NB 4 +#define LOCK_UN 8 + + +#ifndef SIGINT +#define SIGINT 2 +#endif +#ifndef SIGKILL +#define SIGKILL 9 +#endif + + +/* #undef va_start */ +/* #undef va_end */ + +/* winsock error map */ +#define EWOULDBLOCK WSAEWOULDBLOCK +#define EINPROGRESS WSAEINPROGRESS +#define EALREADY WSAEALREADY +#define ENOTSOCK WSAENOTSOCK +#define EDESTADDRREQ WSAEDESTADDRREQ +#define EMSGSIZE WSAEMSGSIZE +#define EPROTOTYPE WSAEPROTOTYPE +#define ENOPROTOOPT WSAENOPROTOOPT +#define EPROTONOSUPPORT WSAEPROTONOSUPPORT +#define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +#define EOPNOTSUPP WSAEOPNOTSUPP +#define EPFNOSUPPORT WSAEPFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#define EADDRINUSE WSAEADDRINUSE +#define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#define ENETDOWN WSAENETDOWN +#define ENETUNREACH WSAENETUNREACH +#define ENETRESET WSAENETRESET +#define ECONNABORTED WSAECONNABORTED +#define ECONNRESET WSAECONNRESET +#define ENOBUFS WSAENOBUFS +#define EISCONN WSAEISCONN +#define ENOTCONN WSAENOTCONN +#define ESHUTDOWN WSAESHUTDOWN +#define ETOOMANYREFS WSAETOOMANYREFS +#define ETIMEDOUT WSAETIMEDOUT +#define ECONNREFUSED WSAECONNREFUSED +#define ELOOP WSAELOOP +/*#define ENAMETOOLONG WSAENAMETOOLONG*/ +#define EHOSTDOWN WSAEHOSTDOWN +#define EHOSTUNREACH WSAEHOSTUNREACH +/*#define ENOTEMPTY WSAENOTEMPTY*/ +#define EPROCLIM WSAEPROCLIM +#define EUSERS WSAEUSERS +#define EDQUOT WSAEDQUOT +#define ESTALE WSAESTALE +#define EREMOTE WSAEREMOTE + +#define F_SETFL 1 +#define O_NONBLOCK 1 + +#undef FD_SET +#define FD_SET(f, s) rb_w32_fdset(f, s) + +#undef FD_CLR +#define FD_CLR(f, s) rb_w32_fdclr(f, s) + +#undef FD_ISSET +#define FD_ISSET(f, s) rb_w32_fdisset(f, s) + +#ifdef RUBY_EXPORT +#undef accept +#define accept(s, a, l) rb_w32_accept(s, a, l) + +#undef bind +#define bind(s, a, l) rb_w32_bind(s, a, l) + +#undef connect +#define connect(s, a, l) rb_w32_connect(s, a, l) + +#undef select +#define select(n, r, w, e, t) rb_w32_select(n, r, w, e, t) + +#undef getpeername +#define getpeername(s, a, l) rb_w32_getpeername(s, a, l) + +#undef getsockname +#define getsockname(s, a, l) rb_w32_getsockname(s, a, l) + +#undef getsockopt +#define getsockopt(s, v, n, o, l) rb_w32_getsockopt(s, v, n, o, l) + +#undef ioctlsocket +#define ioctlsocket(s, c, a) rb_w32_ioctlsocket(s, c, a) + +#undef listen +#define listen(s, b) rb_w32_listen(s, b) + +#undef recv +#define recv(s, b, l, f) rb_w32_recv(s, b, l, f) + +#undef recvfrom +#define recvfrom(s, b, l, f, fr, frl) rb_w32_recvfrom(s, b, l, f, fr, frl) + +#undef send +#define send(s, b, l, f) rb_w32_send(s, b, l, f) + +#undef sendto +#define sendto(s, b, l, f, t, tl) rb_w32_sendto(s, b, l, f, t, tl) + +#undef setsockopt +#define setsockopt(s, v, n, o, l) rb_w32_setsockopt(s, v, n, o, l) + +#undef shutdown +#define shutdown(s, h) rb_w32_shutdown(s, h) + +#undef socket +#define socket(s, t, p) rb_w32_socket(s, t, p) + +#undef gethostbyaddr +#define gethostbyaddr(a, l, t) rb_w32_gethostbyaddr(a, l, t) + +#undef gethostbyname +#define gethostbyname(n) rb_w32_gethostbyname(n) + +#undef gethostname +#define gethostname(n, l) rb_w32_gethostname(n, l) + +#undef getprotobyname +#define getprotobyname(n) rb_w32_getprotobyname(n) + +#undef getprotobynumber +#define getprotobynumber(n) rb_w32_getprotobynumber(n) + +#undef getservbyname +#define getservbyname(n, p) rb_w32_getservbyname(n, p) + +#undef getservbyport +#define getservbyport(p, pr) rb_w32_getservbyport(p, pr) + +#undef socketpair +#define socketpair(a, t, p, s) rb_w32_socketpair(a, t, p, s) + +#undef get_osfhandle +#define get_osfhandle(h) rb_w32_get_osfhandle(h) + +#undef getcwd +#define getcwd(b, s) rb_w32_getcwd(b, s) + +#undef getenv +#define getenv(n) rb_w32_getenv(n) + +#undef rename +#define rename(o, n) rb_w32_rename(o, n) + +#undef times +#define times(t) rb_w32_times(t) +#endif + +struct tms { + long tms_utime; + long tms_stime; + long tms_cutime; + long tms_cstime; +}; + +int rb_w32_times(struct tms *); + +/* thread stuff */ +HANDLE GetCurrentThreadHandle(void); +int rb_w32_sleep(unsigned long msec); +int rb_w32_putc(int, FILE*); +int rb_w32_getc(FILE*); +int rb_w32_open(const char *, int, ...); +int rb_w32_close(int); +int rb_w32_fclose(FILE*); +int rb_w32_pipe(int[2]); +size_t rb_w32_read(int, void *, size_t); +size_t rb_w32_write(int, const void *, size_t); +int rb_w32_utime(const char *, const struct utimbuf *); +int WINAPI rb_w32_Sleep(unsigned long msec); +int rb_w32_wait_events_blocking(HANDLE *events, int num, DWORD timeout); + +/* +== ***CAUTION*** +Since this function is very dangerous, ((*NEVER*)) +* lock any HANDLEs(i.e. Mutex, Semaphore, CriticalSection and so on) or, +* use anything like TRAP_BEG...TRAP_END block structure, +in asynchronous_func_t. +*/ +typedef uintptr_t (*asynchronous_func_t)(uintptr_t self, int argc, uintptr_t* argv); +uintptr_t rb_w32_asynchronize(asynchronous_func_t func, uintptr_t self, int argc, uintptr_t* argv, uintptr_t intrval); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_WIN32_H */ diff --git a/inits.c b/inits.c new file mode 100644 index 0000000..d0d59ea --- /dev/null +++ b/inits.c @@ -0,0 +1,63 @@ +/********************************************************************** + + inits.c - + + $Author: yugui $ + created at: Tue Dec 28 16:01:58 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" + +#define CALL(n) {void Init_##n(void); Init_##n();} + +void +rb_call_inits(void) +{ + CALL(RandomSeed); + CALL(sym); + CALL(var_tables); + CALL(Object); + CALL(top_self); + CALL(Encoding); + CALL(Comparable); + CALL(Enumerable); + CALL(String); + CALL(Exception); + CALL(eval); + CALL(safe); + CALL(jump); + CALL(Numeric); + CALL(Bignum); + CALL(syserr); + CALL(Array); + CALL(Hash); + CALL(Struct); + CALL(Regexp); + CALL(pack); + CALL(transcode); + CALL(marshal); + CALL(Range); + CALL(IO); + CALL(Dir); + CALL(Time); + CALL(Random); + CALL(signal); + CALL(process); + CALL(load); + CALL(Proc); + CALL(Binding); + CALL(Math); + CALL(GC); + CALL(Enumerator); + CALL(VM); + CALL(ISeq); + CALL(Thread); + CALL(Cont); + CALL(Rational); + CALL(Complex); + CALL(version); +} +#undef CALL diff --git a/insns.def b/insns.def new file mode 100644 index 0000000..7aa1899 --- /dev/null +++ b/insns.def @@ -0,0 +1,2047 @@ +/** ##skip -*- mode:c; style:ruby -*- + insns.def - YARV instruction definitions + + $Author: $ + created at: 04/01/01 01:17:55 JST + + Copyright (C) 2004-2007 Koichi Sasada +*/ + +/** ##skip + instruction comment + @c: category + @e: english description + @j: japanese description + + instruction form: + DEFINE_INSN + instrunction_name + (instruction_operands, ..) + (pop_values, ..) + (return value) + { + .. // insn body + } + + */ + + +/** + @c nop + @e nop + @j nop + */ +DEFINE_INSN +nop +() +() +() +{ + /* none */ +} + +/**********************************************************/ +/* deal with variables */ +/**********************************************************/ + +/** + @c variable + @e get local variable value (which is pointed by idx). + @j idx w[JX^bNuB + */ +DEFINE_INSN +getlocal +(lindex_t idx) +() +(VALUE val) +{ + val = *(GET_LFP() - idx); +} + +/** + @c variable + @e set local variable value (which is pointed by idx) as val. + @j idx w[J val B + */ +DEFINE_INSN +setlocal +(lindex_t idx) +(VALUE val) +() +{ + (*(GET_LFP() - idx)) = val; +} + +/** + @c variable + @e get special local variable ($~, $_, ..) value. + @j [Ji$~, $_, ...jlB + */ +DEFINE_INSN +getspecial +(VALUE key, rb_num_t type) +() +(VALUE val) +{ + val = vm_getspecial(th, GET_LFP(), key, type); +} + +/** + @c variable + @e set special local variable ($~, $_, ...) value as obj. + @j [Ji$~, $_, ...jlB + */ +DEFINE_INSN +setspecial +(VALUE key) +(VALUE obj) +() +{ + lfp_svar_set(th, GET_LFP(), key, obj); +} + +/** + @c variable + @e get block local variable(which is pointed by idx and level). + level means nest level of block, and specify how above this variable. + @j level, idx wubN[JlX^bNuB + level ubNlXgxAiB + */ +DEFINE_INSN +getdynamic +(dindex_t idx, rb_num_t level) +() +(VALUE val) +{ + int i; + VALUE *dfp2 = GET_DFP(); + for (i = 0; i < level; i++) { + dfp2 = GET_PREV_DFP(dfp2); + } + val = *(dfp2 - idx); +} + +/** + @c variable + @e set block local variable(which is pointed by 'idx') as val. + level means nest level of block, and specify how above this variable. + @j level, idx wubN[Jl val B + level ubNlXgxAiB + */ +DEFINE_INSN +setdynamic +(dindex_t idx, rb_num_t level) +(VALUE val) +() +{ + int i; + VALUE *dfp2 = GET_DFP(); + for (i = 0; i < level; i++) { + dfp2 = GET_PREV_DFP(dfp2); + } + *(dfp2 - idx) = val; +} + +/** + @c variable + @e get instance variable id of obj. + if is_local is not 0, search as class local variable. + @j self CX^X id lB + */ +DEFINE_INSN +getinstancevariable +(ID id) +() +(VALUE val) +{ + val = rb_ivar_get(GET_SELF(), id); +} + +/** + @c variable + @e set instance variable id of obj as val. + if is_local is not 0, search as class local variable. + @j self CX^X id val B + */ +DEFINE_INSN +setinstancevariable +(ID id) +(VALUE val) +() +{ + rb_ivar_set(GET_SELF(), id, val); +} + +/** + @c variable + @e get class variable id of klass as val. + @j XR[vNX id lB + */ +DEFINE_INSN +getclassvariable +(ID id) +() +(VALUE val) +{ + NODE * const cref = vm_get_cref(GET_ISEQ(), GET_LFP(), GET_DFP()); + val = rb_cvar_get(vm_get_cvar_base(cref), id); +} + +/** + @c variable + @e set class variable id of klass as val. + @j klass NX id val B + */ +DEFINE_INSN +setclassvariable +(ID id) +(VALUE val) +() +{ + NODE * const cref = vm_get_cref(GET_ISEQ(), GET_LFP(), GET_DFP()); + rb_cvar_set(vm_get_cvar_base(cref), id, val); +} + +/** + @c variable + @e + get constant variable id. if klass is Qnil, constant + are searched in current scope. if klass is Qfalse, constant as + top level constant. otherwise, get constant under klass + class or module. + @j id lB + klass Qnil AXR[vlB + Qfalse AgbvxXR[vB + OAklass NXB + */ +DEFINE_INSN +getconstant +(ID id) +(VALUE klass) +(VALUE val) +{ + val = vm_get_ev_const(th, GET_ISEQ(), klass, id, 0); +} + +/** + @c variable + @e + set constant variable id. if klass is Qfalse, constant + is able to access in this scope. if klass is Qnil, set + top level constant. otherwise, set constant under klass + class or module. + + @j id l val B + klass Qfalse AXR[v id lB + Qnil AgbvxXR[vlB + OAklass NXB + */ +DEFINE_INSN +setconstant +(ID id) +(VALUE val, VALUE cbase) +() +{ + vm_check_if_namespace(cbase); + rb_const_set(cbase, id, val); + INC_VM_STATE_VERSION(); +} + +/** + @c variable + @e get global variable id. + @j O[o id lB + */ +DEFINE_INSN +getglobal +(GENTRY entry) +() +(VALUE val) +{ + val = GET_GLOBAL(entry); +} + +/** + @c variable + @e set global variable id as val. + @j O[o id lB + */ +DEFINE_INSN +setglobal +(GENTRY entry) +(VALUE val) +() +{ + SET_GLOBAL(entry, val); +} + + +/**********************************************************/ +/* deal with values */ +/**********************************************************/ + +/** + @c put + @e put nil to stack. + @j X^bN nil vbVB + */ +DEFINE_INSN +putnil +() +() +(VALUE val) +{ + val = Qnil; +} + +/** + @c put + @e put self. + @j X^bN self vbVB + */ +DEFINE_INSN +putself +() +() +(VALUE val) +{ + val = GET_SELF(); +} + +/** + @c put + @e put some object. + i.e. Fixnum, true, false, nil, and so on. + @j IuWFNg val X^bNvbVB + i.e. Fixnum, true, false, nil, and so on. + */ +DEFINE_INSN +putobject +(VALUE val) +() +(VALUE val) +{ + /* */ +} + +/** + @c put + @e put special object. "value_type" is for expansion. + @j IuWFNg val X^bNvbVB + IuWFNg value_type D + */ +DEFINE_INSN +putspecialobject +(rb_num_t value_type) +() +(VALUE val) +{ + switch (value_type) { + case VM_SPECIAL_OBJECT_VMCORE: + val = rb_mRubyVMFrozenCore; + break; + case VM_SPECIAL_OBJECT_CBASE: + val = vm_get_cbase(GET_ISEQ(), GET_LFP(), GET_DFP()); + break; + default: + rb_bug("putspecialobject insn: unknown value_type"); + } +} + +/** + @c put + @e put iseq value. + @j put iseq value. + */ +DEFINE_INSN +putiseq +(ISEQ iseq) +() +(VALUE ret) +{ + ret = iseq->self; +} + +/** + @c put + @e put string val. string will be copied. + @j Rs[X^bNvbVB + */ +DEFINE_INSN +putstring +(VALUE str) +() +(VALUE val) +{ + val = rb_str_replace(rb_str_new(0, 0), str); +} + +/** + @c put + @e put concatenate strings + @j X^bNgbv n ACX^bNvbVB + */ +DEFINE_INSN +concatstrings +(rb_num_t num) +(...) +(VALUE val) // inc += 1 - num; +{ + int i; + + val = rb_str_new(0, 0); + for (i = num - 1; i >= 0; i--) { + const VALUE v = TOPN(i); + rb_str_append(val, v); + } + POPN(num); +} + +/** + @c put + @e to_str + @j to_str X^bNvbVB + */ +DEFINE_INSN +tostring +() +(VALUE val) +(VALUE val) +{ + val = rb_obj_as_string(val); +} + +/** + @c put + @e to Regexp + @j str K\RpCX^bNvbVB + RpCCopt K\IvVB + */ +DEFINE_INSN +toregexp +(rb_num_t opt, rb_num_t cnt) +(...) +(VALUE val) // inc += 1 - cnt; +{ + VALUE rb_reg_new_ary(VALUE ary, int options); + int i; + const VALUE ary = rb_ary_new2(cnt); + RBASIC(ary)->klass = 0; + for (i = 0; i < cnt; i++) { + rb_ary_store(ary, cnt-i-1, TOPN(i)); + } + POPN(cnt); + val = rb_reg_new_ary(ary, opt); +} + +/** + @c put + @e put new array. + @j VzX^bN num lvbVB + */ +DEFINE_INSN +newarray +(rb_num_t num) +(...) +(VALUE val) // inc += 1 - num; +{ + val = rb_ary_new4((long)num, STACK_ADDR_FROM_TOP(num)); + POPN(num); +} + +/** + @c put + @e dup array + @j z ary dup X^bNvbVB + */ +DEFINE_INSN +duparray +(VALUE ary) +() +(VALUE val) +{ + val = rb_ary_replace(rb_ary_new2(0), ary); +} + +/** + @c put + @e expand array to num objects. + @j X^bNgbvIuWFNgzAWJB + zIuWFNgvf numA nil BnumA + numvfB + zIuWFNgAnum - 1 nil B + flag ^Acvfz + flag: 0x01 - z + flag: 0x02 - postarg p + flag: 0x04 - reverse? + */ +DEFINE_INSN +expandarray +(rb_num_t num, rb_num_t flag) +(..., VALUE ary) +(...) // inc += num - 1 + (flag & 1 ? 1 : 0); +{ + vm_expandarray(GET_CFP(), ary, num, flag); +} + +/** + @c put + @e concat two arrays + @j z ary1, ary2 AX^bNvbVB + */ +DEFINE_INSN +concatarray +() +(VALUE ary1, VALUE ary2st) +(VALUE ary) +{ + const VALUE ary2 = ary2st; + VALUE tmp1 = rb_check_convert_type(ary1, T_ARRAY, "Array", "to_a"); + VALUE tmp2 = rb_check_convert_type(ary2, T_ARRAY, "Array", "to_a"); + + if (NIL_P(tmp1)) { + tmp1 = rb_ary_new3(1, ary1); + } + + if (NIL_P(tmp2)) { + tmp2 = rb_ary_new3(1, ary2); + } + + if (tmp1 == ary1) { + tmp1 = rb_ary_dup(ary1); + } + ary = rb_ary_concat(tmp1, tmp2); +} + +/** + @c put + @e splat array + @j z ary to_a oB + */ +DEFINE_INSN +splatarray +(VALUE flag) +(VALUE ary) +(VALUE obj) +{ + VALUE tmp = rb_check_convert_type(ary, T_ARRAY, "Array", "to_a"); + if (NIL_P(tmp)) { + tmp = rb_ary_new3(1, ary); + } + obj = tmp; +} + +/** + @c put + @e check value is included in ary + @j z ary vf obj `FbNBcase/when pB + */ +DEFINE_INSN +checkincludearray +(VALUE flag) +(VALUE obj, VALUE ary) +(VALUE obj, VALUE result) +{ + int i; + result = Qfalse; + + if (TYPE(ary) != T_ARRAY) { + ary = rb_Array(ary); + } + + if (flag == Qtrue) { + /* NODE_CASE */ + for (i = 0; i < RARRAY_LEN(ary); i++) { + /* TODO: fix me (use another method dispatch) */ + if (RTEST(rb_funcall2(RARRAY_PTR(ary)[i], idEqq, 1, &obj))) { + result = Qtrue; + break; + } + } + } + else { + obj = Qfalse; + /* NODE_WHEN */ + for (i = 0; i < RARRAY_LEN(ary); i++) { + if (RTEST(RARRAY_PTR(ary)[i])) { + obj = result = Qtrue; + break; + } + } + } +} + +/** + @c put + @e put new Hash. + @j VnbVX^bNgbv n lB + n L[lyA 2 {B + */ +DEFINE_INSN +newhash +(rb_num_t num) +(...) +(VALUE val) // inc += 1 - num; +{ + int i; + val = rb_hash_new(); + + for (i = num; i > 0; i -= 2) { + const VALUE v = TOPN(i - 2); + const VALUE k = TOPN(i - 1); + rb_hash_aset(val, k, v); + } + POPN(num); +} + +/** + @c put + @e put new Range object.(Range.new(low, high, flag)) + @j Range.new(low, high, flag) IuWFNgX^bNvbVB + */ +DEFINE_INSN +newrange +(rb_num_t flag) +(VALUE low, VALUE high) +(VALUE val) +{ + val = rb_range_new(low, high, flag); +} + +/**********************************************************/ +/* deal with stack operation */ +/**********************************************************/ + +/** + @c stack + @e pop from stack. + @j X^bN|bvB + */ +DEFINE_INSN +pop +() +(VALUE val) +() +{ + val = val; + /* none */ +} + +/** + @c stack + @e duplicate stack top. + @j X^bNgbvRs[X^bNvbVB + */ +DEFINE_INSN +dup +() +(VALUE val) +(VALUE val1, VALUE val2) +{ + val1 = val2 = val; +} + +/** + @c stack + @e duplicate stack top n elements + @j X^bNgbv n Rs[X^bNvbVB + */ +DEFINE_INSN +dupn +(rb_num_t n) +(...) +(...) // inc += n; +{ + int i; + VALUE *sp = STACK_ADDR_FROM_TOP(n); + for (i = 0; i < n; i++) { + GET_SP()[i] = sp[i]; + } + INC_SP(n); +} + + +/** + @c stack + @e swap top 2 vals + @j X^bNgbv 2 lB + */ +DEFINE_INSN +swap +() +(VALUE val, VALUE obj) +(VALUE obj, VALUE val) +{ + /* none */ +} + +/** + @c stack + @e for stack caching. + @j X^bNLbVOKvB + */ +DEFINE_INSN +reput +() +(..., VALUE val) +(VALUE val) // inc += 0; +{ + /* none */ +} + +/** + @c stack + @e get nth stack value from stack top + @j X^bNgbv n X^bNvbVB + */ +DEFINE_INSN +topn +(rb_num_t n) +(...) +(VALUE val) // inc += 1; +{ + val = TOPN(n); +} + +/** + @c stack + @e set Nth stack entry to stack top + @j X^bNgbvl n X^bNRs[ + */ +DEFINE_INSN +setn +(rb_num_t n) +(..., VALUE val) +(VALUE val) // inc += 0 +{ + TOPN(n-1) = val; +} + +/** + @c stack + @e empt current stack + @j current stack B + */ +DEFINE_INSN +adjuststack +(rb_num_t n) +(...) +(...) // inc -= n +{ + DEC_SP(n); +} + + +/**********************************************************/ +/* deal with setting */ +/**********************************************************/ + +/** + @c setting + @e defined? + @j defined? sB + */ +DEFINE_INSN +defined +(rb_num_t type, VALUE obj, VALUE needstr) +(VALUE v) +(VALUE val) +{ + VALUE klass; + const char *expr_type = 0; + val = Qnil; + + switch (type) { + case DEFINED_IVAR: + if (rb_ivar_defined(GET_SELF(), SYM2ID(obj))) { + expr_type = "instance-variable"; + } + break; + case DEFINED_IVAR2: + klass = vm_get_cbase(GET_ISEQ(), GET_LFP(), GET_DFP()); + break; + case DEFINED_GVAR: + if (rb_gvar_defined((struct global_entry *)(obj & ~1))) { + expr_type = "global-variable"; + } + break; + case DEFINED_CVAR: + klass = vm_get_cbase(GET_ISEQ(), GET_LFP(), GET_DFP()); + if (rb_cvar_defined(klass, SYM2ID(obj))) { + expr_type = "class variable"; + } + break; + case DEFINED_CONST: + klass = v; + if (vm_get_ev_const(th, GET_ISEQ(), klass, SYM2ID(obj), 1)) { + expr_type = "constant"; + } + break; + case DEFINED_FUNC: + klass = CLASS_OF(v); + if (rb_method_boundp(klass, SYM2ID(obj), 0)) { + expr_type = "method"; + } + break; + case DEFINED_METHOD:{ + VALUE klass = CLASS_OF(v); + NODE *method = (NODE *) rb_method_node(klass, SYM2ID(obj)); + + if (method) { + if (!(method->nd_noex & NOEX_PRIVATE)) { + if (!((method->nd_noex & NOEX_PROTECTED) && + !rb_obj_is_kind_of(GET_SELF(), + rb_class_real(klass)))) { + expr_type = "method"; + } + } + } + break; + } + case DEFINED_YIELD: + if (GET_BLOCK_PTR()) { + expr_type = "yield"; + } + break; + case DEFINED_ZSUPER:{ + rb_iseq_t *ip = GET_ISEQ(); + while (ip) { + if (ip->defined_method_id) { + break; + } + ip = ip->parent_iseq; + } + if (ip) { + VALUE klass = vm_search_normal_superclass(ip->klass, GET_SELF()); + if (rb_method_boundp(klass, ip->defined_method_id, 0)) { + expr_type = "super"; + } + } + break; + } + case DEFINED_REF:{ + val = vm_getspecial(th, GET_LFP(), Qfalse, FIX2INT(obj)); + if (val != Qnil) { + expr_type = "global-variable"; + } + break; + } + default: + rb_bug("unimplemented defined? type (VM)"); + break; + } + if (expr_type != 0) { + if (needstr != Qfalse) { + val = rb_str_new2(expr_type); + } + else { + val = Qtrue; + } + } +} + +/** + @c setting + @e trace + @j trace pB + */ +DEFINE_INSN +trace +(rb_num_t nf) +() +() +{ + rb_event_flag_t flag = nf; + + EXEC_EVENT_HOOK(th, flag, GET_SELF(), 0, 0 /* TODO: id, klass */); +} + +/**********************************************************/ +/* deal with control flow 1: class/module */ +/**********************************************************/ + +/** + @c class/module + @e + enter class definition scope. if super is Qfalse, and clsas + "klass" is defined, it's redefine. otherwise, define "klass" class. + @j NX`XR[vsB + super Qfalse klassNX``B + Aklass NX`B + */ +DEFINE_INSN +defineclass +(ID id, ISEQ class_iseq, rb_num_t define_type) +(VALUE cbase, VALUE super) +(VALUE val) +{ + VALUE klass; + + switch ((int)define_type) { + case 0: + /* val is dummy. classdef returns class scope value */ + + if (super == Qnil) { + super = rb_cObject; + } + + vm_check_if_namespace(cbase); + + /* find klass */ + if (rb_const_defined_at(cbase, id)) { + /* already exist */ + klass = rb_const_get_at(cbase, id); + if (TYPE(klass) != T_CLASS) { + rb_raise(rb_eTypeError, "%s is not a class", rb_id2name(id)); + } + + if (super != rb_cObject) { + VALUE tmp; + tmp = rb_class_real(RCLASS_SUPER(klass)); + + if (tmp != super) { + rb_raise(rb_eTypeError, "superclass mismatch for class %s", + rb_id2name(id)); + } + } + } + else { + /* new class declaration */ + klass = rb_define_class_id(id, super); + rb_set_class_path_string(klass, cbase, rb_id2str(id)); + rb_const_set(cbase, id, klass); + rb_class_inherited(super, klass); + } + break; + case 1: + /* val is dummy. classdef returns class scope value */ + /* super is dummy */ + klass = rb_singleton_class(cbase); + break; + case 2: + /* val is dummy. classdef returns class scope value */ + /* super is dummy */ + + vm_check_if_namespace(cbase); + + /* find klass */ + if (rb_const_defined_at(cbase, id)) { + klass = rb_const_get_at(cbase, id); + /* already exist */ + if (TYPE(klass) != T_MODULE) { + rb_raise(rb_eTypeError, "%s is not a module", rb_id2name(id)); + } + } + else { + /* new module declaration */ + klass = rb_define_module_id(id); + rb_set_class_path_string(klass, cbase, rb_id2str(id)); + rb_const_set(cbase, id, klass); + } + break; + default: + rb_bug("unknown defineclass type: %d", (int)define_type); + } + + COPY_CREF(class_iseq->cref_stack, vm_cref_push(th, klass, NOEX_PUBLIC)); + + /* enter scope */ + vm_push_frame(th, class_iseq, + VM_FRAME_MAGIC_CLASS, klass, (VALUE) GET_DFP() | 0x02, + class_iseq->iseq_encoded, GET_SP(), 0, + class_iseq->local_size); + RESTORE_REGS(); + + INC_VM_STATE_VERSION(); + NEXT_INSN(); +} + + +/**********************************************************/ +/* deal with control flow 2: method/iterator */ +/**********************************************************/ + +/** + @c method/iterator + @e obj.send(id, args..) # args.size => num + @j \bhosB + obj.send(id, args..) # args.size => num + flag & VM_CALL_ARGS_SPLAT_BIT != 0 -> splat last arg + flag & VM_CALL_ARGS_BLOCKARG_BIT != 0 -> Proc as Block + flag & VM_CALL_FCALL_BIT != 0 -> FCALL ( func() ) + flag & VM_CALL_VCALL_BIT != 0 -> VCALL ( func ) + ... + */ +DEFINE_INSN +send +(ID op_id, rb_num_t op_argc, ISEQ blockiseq, rb_num_t op_flag, IC ic) +(...) +(VALUE val) // inc += - (op_argc + ((op_flag & VM_CALL_ARGS_BLOCKARG_BIT) ? 1 : 0)); +{ + NODE *mn; + VALUE recv, klass; + rb_block_t *blockptr = 0; + rb_num_t num = caller_setup_args(th, GET_CFP(), op_flag, op_argc, + (rb_iseq_t *)blockiseq, &blockptr); + rb_num_t flag = op_flag; + ID id = op_id; + + /* get receiver */ + recv = (flag & VM_CALL_FCALL_BIT) ? GET_SELF() : TOPN(num); + klass = CLASS_OF(recv); + mn = vm_method_search(id, klass, ic); + + /* send/funcall optimization */ + if (flag & VM_CALL_SEND_BIT) { + vm_send_optimize(GET_CFP(), &mn, &flag, &num, &id, klass); + } + + CALL_METHOD(num, blockptr, flag, id, mn, recv); +} + +/** + @c method/iterator + @e super(args) # args.size => num + @j super sB + super(args) # args.size => num + flag Iyh send B + */ +DEFINE_INSN +invokesuper +(rb_num_t op_argc, ISEQ blockiseq, rb_num_t op_flag) +(...) +(VALUE val) // inc += - (op_argc + ((op_flag & VM_CALL_ARGS_BLOCKARG_BIT) ? 1 : 0)); +{ + rb_block_t *blockptr = !(op_flag & VM_CALL_ARGS_BLOCKARG_BIT) ? GET_BLOCK_PTR() : 0; + int num = caller_setup_args(th, GET_CFP(), op_flag, op_argc, blockiseq, &blockptr); + VALUE recv, klass; + NODE *mn; + ID id; + const VALUE flag = VM_CALL_SUPER_BIT | VM_CALL_FCALL_BIT; + + recv = GET_SELF(); + vm_search_superclass(GET_CFP(), GET_ISEQ(), recv, TOPN(num), &id, &klass); + mn = rb_method_node(klass, id); + + CALL_METHOD(num, blockptr, flag, id, mn, recv); +} + +/** + @c method/iterator + @e yield(args) # args.size => num, flag shows expand argument or not + @j yield sB + yield(args) # args.size => num + */ +DEFINE_INSN +invokeblock +(rb_num_t num, rb_num_t flag) +(...) +(VALUE val) // inc += 1 - num; +{ + val = vm_invoke_block(th, GET_CFP(), num, flag); + if (val == Qundef) { + RESTORE_REGS(); + NEXT_INSN(); + } +} + +/** + @c method/iterator + @e return from this scope. + @j XR[vB + */ +DEFINE_INSN +leave +() +(VALUE val) +(VALUE val) +{ + if (OPT_CHECKED_RUN) { + if (reg_cfp->sp != reg_cfp->bp) { + rb_bug("Stack consistency error (sp: %"PRIdPTRDIFF", bp: %"PRIdPTRDIFF")", + VM_SP_CNT(th, reg_cfp->sp), VM_SP_CNT(th, reg_cfp->bp)); + } + } + + RUBY_VM_CHECK_INTS(); + vm_pop_frame(th); + RESTORE_REGS(); +} + +/** + @c method/iterator + @e return from this vm loop + @j VM loop B + */ +DEFINE_INSN +finish +() +(VALUE val) +(VALUE val) +{ +#if OPT_CALL_THREADED_CODE + rb_bug("unused instruction on OPT_CALL_THREADED_CODE"); +#else + th->cfp++; + return val; +#endif +} + +/**********************************************************/ +/* deal with control flow 3: exception */ +/**********************************************************/ + +/** + @c exception + @e longjump + @j WvsB + */ +DEFINE_INSN +throw +(rb_num_t throw_state) +(VALUE throwobj) +(VALUE val) +{ + RUBY_VM_CHECK_INTS(); + val = vm_throw(th, GET_CFP(), throw_state, throwobj); + THROW_EXCEPTION(val); + /* unreachable */ +} + +/**********************************************************/ +/* deal with control flow 4: local jump */ +/**********************************************************/ + +/** + @c jump + @e set PC to (PC + dst). + @j PC (PC + dst) B + */ +DEFINE_INSN +jump +(OFFSET dst) +() +() +{ + RUBY_VM_CHECK_INTS(); + JUMP(dst); +} + +/** + @c jump + @e if val is not false or nil, set PC to (PC + dst). + @j val false nil APC (PC + dst) B + */ +DEFINE_INSN +branchif +(OFFSET dst) +(VALUE val) +() +{ + if (RTEST(val)) { + RUBY_VM_CHECK_INTS(); + JUMP(dst); + } +} + +/** + @c jump + @e if val is false or nil, set PC to (PC + dst). + @j val false nil APC (PC + dst) B + */ +DEFINE_INSN +branchunless +(OFFSET dst) +(VALUE val) +() +{ + if (!RTEST(val)) { + RUBY_VM_CHECK_INTS(); + JUMP(dst); + } +} + + +/**********************************************************/ +/* for optimize */ +/**********************************************************/ + +/** + @c optimize + @e inline cache + @j CCLbVLAlX^bNvbV dst WvB + */ +DEFINE_INSN +getinlinecache +(IC ic, OFFSET dst) +() +(VALUE val) +{ + if (ic->ic_vmstat == GET_VM_STATE_VERSION()) { + val = ic->ic_value; + JUMP(dst); + } + else { + /* none */ + val = Qnil; + } +} + +/** + @c optimize + @e inline cache (once) + @j once B + */ +DEFINE_INSN +onceinlinecache +(IC ic, OFFSET dst) +() +(VALUE val) +{ + if (ic->ic_vmstat) { + val = ic->ic_value; + JUMP(dst); + } + else { + /* none */ + val = Qnil; + } +} + +/** + @c optimize + @e set inline cache + @j CCLbVlB + */ +DEFINE_INSN +setinlinecache +(OFFSET dst) +(VALUE val) +(VALUE val) +{ + IC ic = GET_CONST_INLINE_CACHE(dst); + + ic->ic_value = val; + ic->ic_vmstat = GET_VM_STATE_VERSION() - ruby_vm_const_missing_count; + ruby_vm_const_missing_count = 0; +} + +/** + @c optimize + @e case dispatcher + @j case A\\WvB + */ +DEFINE_INSN +opt_case_dispatch +(CDHASH hash, OFFSET else_offset) +(..., VALUE key) +() // inc += -1; +{ + if (BASIC_OP_UNREDEFINED_P(BOP_EQQ)) { + VALUE val; + if (st_lookup(RHASH_TBL(hash), key, &val)) { + JUMP(FIX2INT(val)); + } + else { + JUMP(else_offset); + } + } + else { + struct opt_case_dispatch_i_arg arg = { + key, -1 + }; + + st_foreach(RHASH_TBL(hash), opt_case_dispatch_i, (st_data_t)&arg); + + if (arg.label != -1) { + JUMP(arg.label); + } + else { + JUMP(else_offset); + } + } +} + +/** + @c optimize + @e check environment + @j gpB + */ +DEFINE_INSN +opt_checkenv +() +() +() +{ + if (GET_CFP()->bp != GET_DFP() + 1) { + VALUE *new_dfp = GET_CFP()->bp - 1; + /* TODO: copy env and clean stack at creating env? */ + *new_dfp = *GET_DFP(); + SET_DFP(new_dfp); + } +} + + +/** simple functions */ + +/** + @c optimize + @e optimized X+Y. + @j K X+YB + */ +DEFINE_INSN +opt_plus +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (0) { + + } +#if 1 + else if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_PLUS)) { + /* fixnum + fixnum */ +#ifndef LONG_LONG_VALUE + val = (recv + (obj & (~1))); + if ((~(recv ^ obj) & (recv ^ val)) & + ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { + val = rb_big_plus(rb_int2big(FIX2LONG(recv)), + rb_int2big(FIX2LONG(obj))); + } +#else + long a, b, c; + a = FIX2LONG(recv); + b = FIX2LONG(obj); + c = a + b; + if (FIXABLE(c)) { + val = LONG2FIX(c); + } + else { + val = rb_big_plus(rb_int2big(a), rb_int2big(b)); + } +#endif + } +#endif + + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { + if (0) { + } +#if 1 + else if (HEAP_CLASS_OF(recv) == rb_cFloat && + HEAP_CLASS_OF(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_PLUS)) { + val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); + } +#endif + +#if 1 + else if (HEAP_CLASS_OF(recv) == rb_cString && + HEAP_CLASS_OF(obj) == rb_cString && + BASIC_OP_UNREDEFINED_P(BOP_PLUS)) { + val = rb_str_plus(recv, obj); + } +#endif +#if 1 + else if (HEAP_CLASS_OF(recv) == rb_cArray && + BASIC_OP_UNREDEFINED_P(BOP_PLUS)) { + val = rb_ary_plus(recv, obj); + } +#endif + else { + goto INSN_LABEL(normal_dispatch); + } + } + else { + INSN_LABEL(normal_dispatch): + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idPLUS, recv); + } +} + +/** + @c optimize + @e optimized X-Y. + @j K X-YB + */ +DEFINE_INSN +opt_minus +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_MINUS)) { + long a, b, c; + + a = FIX2LONG(recv); + b = FIX2LONG(obj); + c = a - b; + + if (FIXABLE(c)) { + val = LONG2FIX(c); + } + else { + val = rb_big_minus(rb_int2big(a), rb_int2big(b)); + } + } + else { + /* other */ + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idMINUS, recv); + } +} + +/** + @c optimize + @e optimized X*Y. + @j K X*YB + */ +DEFINE_INSN +opt_mult +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_MULT)) { + long a, b, c; + + a = FIX2LONG(recv); + if (a == 0) { + val = recv; + } + else { + b = FIX2LONG(obj); + c = a * b; + + if (FIXABLE(c) && c / a == b) { + val = LONG2FIX(c); + } + else { + val = rb_big_mul(rb_int2big(a), rb_int2big(b)); + } + } + } + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { + if (0) { + } +#if 1 + else if (HEAP_CLASS_OF(recv) == rb_cFloat && + HEAP_CLASS_OF(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_MULT)) { + val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj)); + } +#endif + else { + goto INSN_LABEL(normal_dispatch); + } + } + else { + INSN_LABEL(normal_dispatch): + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idMULT, recv); + } +} + +/** + @c optimize + @e optimized X/Y. + @j K X/YB + */ +DEFINE_INSN +opt_div +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_DIV)) { + long x, y, div; + + x = FIX2LONG(recv); + y = FIX2LONG(obj); + { + /* copied from numeric.c#fixdivmod */ + long mod; + if (y == 0) + goto INSN_LABEL(normal_dispatch); + if (y < 0) { + if (x < 0) + div = -x / -y; + else + div = -(x / -y); + } + else { + if (x < 0) + div = -(-x / y); + else + div = x / y; + } + mod = x - div * y; + if ((mod < 0 && y > 0) || (mod > 0 && y < 0)) { + mod += y; + div -= 1; + } + } + val = LONG2NUM(div); + } + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { + if (0) { + } +#if 1 + else if (HEAP_CLASS_OF(recv) == rb_cFloat && + HEAP_CLASS_OF(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_DIV)) { + val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj)); + } +#endif + else { + goto INSN_LABEL(normal_dispatch); + } + } + else { + INSN_LABEL(normal_dispatch): + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idDIV, recv); + } +} + +/** + @c optimize + @e optimized X%Y. + @j K X%YB + */ +DEFINE_INSN +opt_mod +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_MOD)) { + long x, y, mod; + + x = FIX2LONG(recv); + y = FIX2LONG(obj); + { + /* copied from numeric.c#fixdivmod */ + long div; + + if (y == 0) + rb_num_zerodiv(); + if (y < 0) { + if (x < 0) + div = -x / -y; + else + div = -(x / -y); + } + else { + if (x < 0) + div = -(-x / y); + else + div = x / y; + } + mod = x - div * y; + if ((mod < 0 && y > 0) || (mod > 0 && y < 0)) { + mod += y; + div -= 1; + } + } + val = LONG2FIX(mod); + } + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { + if (0) { + } + else if (HEAP_CLASS_OF(recv) == rb_cFloat && + HEAP_CLASS_OF(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_MOD)) { + double x = RFLOAT_VALUE(recv); + double y = RFLOAT_VALUE(obj); + double div, mod; + + { + double z; + + modf(x / y, &z); + mod = x - z * y; + } + + div = (x - mod) / y; + if (y * mod < 0) { + mod += y; + div -= 1.0; + } + val = DBL2NUM(mod); + } + else { + goto INSN_LABEL(normal_dispatch); + } + } + else { + INSN_LABEL(normal_dispatch): + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idMOD, recv); + } +} + +/** + @c optimize + @e optimized X==Y. + @j K X==YB + */ +DEFINE_INSN +opt_eq +(IC ic) +(VALUE recv, VALUE obj) +(VALUE val) +{ + val = opt_eq_func(recv, obj, ic); + + if (val == Qundef) { + /* other */ + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idEq, recv); + } +} + +/** + @c optimize + @e optimized X!=Y. + @j K X!=YB + */ +DEFINE_INSN +opt_neq +(IC ic1, IC ic2) +(VALUE recv, VALUE obj) +(VALUE val) +{ + extern VALUE rb_obj_not_equal(VALUE obj1, VALUE obj2); + NODE *mn = vm_method_search(idNeq, CLASS_OF(recv), ic1); + val = Qundef; + + if (check_cfunc(mn, rb_obj_not_equal)) { + val = opt_eq_func(recv, obj, ic2); + + if (val != Qundef) { + val = RTEST(val) ? Qfalse : Qtrue; + } + } + + if (val == Qundef) { + /* other */ + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idNeq, recv); + } +} + +/** + @c optimize + @e optimized XY. + @j K X>YB + */ +DEFINE_INSN +opt_gt +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_GT)) { + SIGNED_VALUE a = recv, b = obj; + + if (a > b) { + val = Qtrue; + } + else { + val = Qfalse; + } + } + else { + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idGT, recv); + } +} + +/** + @c optimize + @e optimized X>=Y. + @j K X>=YB + */ +DEFINE_INSN +opt_ge +() +(VALUE recv, VALUE obj) +(VALUE val) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_GE)) { + SIGNED_VALUE a = recv, b = obj; + + if (a >= b) { + val = Qtrue; + } + else { + val = Qfalse; + } + } + else { + PUSH(recv); + PUSH(obj); + CALL_SIMPLE_METHOD(1, idGE, recv); + } +} + +/** + @c optimize + @e << + @j K X<errinfo; + th->errinfo = Qnil; + THROW_EXCEPTION(err); + } + + RESTORE_REGS(); + NEXT_INSN(); +} + +/** + @c joke + @e BLT + @j BLT + */ +DEFINE_INSN +bitblt +() +() +(VALUE ret) +{ + ret = rb_str_new2("a bit of bacon, lettuce and tomato"); +} + +/** + @c joke + @e The Answer to Life, the Universe, and Everything + @j lAFAB + */ +DEFINE_INSN +answer +() +() +(VALUE ret) +{ + ret = INT2FIX(42); +} + diff --git a/install-sh b/install-sh new file mode 100644 index 0000000..e69de29 diff --git a/instruby.rb b/instruby.rb new file mode 100755 index 0000000..17dc6a4 --- /dev/null +++ b/instruby.rb @@ -0,0 +1,416 @@ +#!./miniruby + +load "./rbconfig.rb" +include RbConfig +$".unshift File.expand_path("./rbconfig.rb") + +srcdir = File.dirname(__FILE__) +unless defined?(CROSS_COMPILING) and CROSS_COMPILING + $:.replace([File.expand_path("lib", srcdir), Dir.pwd]) +end +require 'fileutils' +require 'shellwords' +require 'optparse' +require 'optparse/shellwords' +require 'tempfile' + +STDOUT.sync = true +File.umask(0) + +def parse_args(argv = ARGV) + $mantype = 'doc' + $destdir = nil + $extout = nil + $make = 'make' + $mflags = [] + $install = [] + $installed_list = nil + $dryrun = false + $rdocdir = nil + $data_mode = 0644 + $prog_mode = 0755 + $dir_mode = nil + $script_mode = nil + $cmdtype = ('bat' if File::ALT_SEPARATOR == '\\') + mflags = [] + opt = OptionParser.new + opt.on('-n') {$dryrun = true} + opt.on('--dest-dir=DIR') {|dir| $destdir = dir} + opt.on('--extout=DIR') {|dir| $extout = (dir unless dir.empty?)} + opt.on('--make=COMMAND') {|make| $make = make} + opt.on('--mantype=MAN') {|man| $mantype = man} + opt.on('--make-flags=FLAGS', '--mflags', Shellwords) do |v| + if arg = v.first + arg.insert(0, '-') if /\A[^-][^=]*\Z/ =~ arg + end + $mflags.concat(v) + end + opt.on('-i', '--install=TYPE', + [:local, :bin, :"bin-arch", :"bin-comm", :lib, :man, :ext, :"ext-arch", :"ext-comm", :rdoc]) do |ins| + $install << ins + end + opt.on('--data-mode=OCTAL-MODE', OptionParser::OctalInteger) do |mode| + $data_mode = mode + end + opt.on('--prog-mode=OCTAL-MODE', OptionParser::OctalInteger) do |mode| + $prog_mode = mode + end + opt.on('--dir-mode=OCTAL-MODE', OptionParser::OctalInteger) do |mode| + $dir_mode = mode + end + opt.on('--script-mode=OCTAL-MODE', OptionParser::OctalInteger) do |mode| + $script_mode = mode + end + opt.on('--installed-list [FILENAME]') {|name| $installed_list = name} + opt.on('--rdoc-output [DIR]') {|dir| $rdocdir = dir} + opt.on('--cmd-type=TYPE', %w[bat cmd plain]) {|cmd| $cmdtype = (cmd unless cmd == 'plain')} + + opt.order!(argv) do |v| + case v + when /\AINSTALL[-_]([-\w]+)=(.*)/ + argv.unshift("--#{$1.tr('_', '-')}=#{$2}") + when /\A\w[-\w+]*=\z/ + mflags << v + when /\A\w[-\w+]*\z/ + $install << v.intern + else + raise OptionParser::InvalidArgument, v + end + end rescue abort [$!.message, opt].join("\n") + + $make, *rest = Shellwords.shellwords($make) + $mflags.unshift(*rest) unless rest.empty? + $mflags.unshift(*mflags) + + def $mflags.set?(flag) + grep(/\A-(?!-).*#{flag.chr}/i) { return true } + false + end + def $mflags.defined?(var) + grep(/\A#{var}=(.*)/) {return block_given? ? yield($1) : $1} + false + end + + if $mflags.set?(?n) + $dryrun = true + else + $mflags << '-n' if $dryrun + end + + $destdir ||= $mflags.defined?("DESTDIR") + if $extout ||= $mflags.defined?("EXTOUT") + Config.expand($extout) + end + + $continue = $mflags.set?(?k) + + if $installed_list ||= $mflags.defined?('INSTALLED_LIST') + Config.expand($installed_list, Config::CONFIG) + $installed_list = open($installed_list, "ab") + $installed_list.sync = true + end + + $rdocdir ||= $mflags.defined?('RDOCOUT') + + $dir_mode ||= $prog_mode | 0700 + $script_mode ||= $prog_mode +end + +parse_args() + +include FileUtils +include FileUtils::NoWrite if $dryrun +@fileutils_output = STDOUT +@fileutils_label = '' + +$install_procs = Hash.new {[]} +def install?(*types, &block) + $install_procs[:all] <<= block + types.each do |type| + $install_procs[type] <<= block + end +end + +def install(src, dest, options = {}) + options[:preserve] = true + super(src, with_destdir(dest), options) + if $installed_list + dest = File.join(dest, File.basename(src)) if $made_dirs[dest] + $installed_list.puts dest + end +end + +def ln_sf(src, dest) + super(src, with_destdir(dest)) + $installed_list.puts dest if $installed_list +end + +$made_dirs = {} +def makedirs(dirs) + dirs = fu_list(dirs) + dirs.collect! do |dir| + realdir = with_destdir(dir) + realdir unless $made_dirs.fetch(dir) do + $made_dirs[dir] = true + $installed_list.puts(File.join(dir, "")) if $installed_list + File.directory?(realdir) + end + end.compact! + super(dirs, :mode => $dir_mode) unless dirs.empty? +end + +def install_recursive(srcdir, dest, options = {}) + opts = options.clone + noinst = opts.delete(:no_install) + glob = opts.delete(:glob) || "*" + subpath = srcdir.size..-1 + Dir.glob("#{srcdir}/**/#{glob}") do |src| + case base = File.basename(src) + when /\A\#.*\#\z/, /~\z/ + next + end + if noinst + if Array === noinst + next if noinst.any? {|n| File.fnmatch?(n, base)} + else + next if File.fnmatch?(noinst, base) + end + end + d = dest + src[subpath] + if File.directory?(src) + makedirs(d) + else + makedirs(File.dirname(d)) + install src, d, opts + end + end +end + +def open_for_install(path, mode) + data = open(realpath = with_destdir(path), "rb") {|f| f.read} rescue nil + newdata = yield + unless $dryrun + unless newdata == data + open(realpath, "wb", mode) {|f| f.write newdata} + end + File.chmod(mode, realpath) + end + $installed_list.puts path if $installed_list +end + +def with_destdir(dir) + return dir if !$destdir or $destdir.empty? + dir = dir.sub(/\A\w:/, '') if File::PATH_SEPARATOR == ';' + $destdir + dir +end + +exeext = CONFIG["EXEEXT"] + +ruby_install_name = CONFIG["ruby_install_name"] +rubyw_install_name = CONFIG["rubyw_install_name"] +goruby_install_name = "go" + ruby_install_name + +version = CONFIG["ruby_version"] +bindir = CONFIG["bindir"] +libdir = CONFIG["libdir"] +archhdrdir = rubyhdrdir = CONFIG["rubyhdrdir"] +archhdrdir += "/" + CONFIG["arch"] +rubylibdir = CONFIG["rubylibdir"] +archlibdir = CONFIG["archdir"] +sitelibdir = CONFIG["sitelibdir"] +sitearchlibdir = CONFIG["sitearchdir"] +vendorlibdir = CONFIG["vendorlibdir"] +vendorarchlibdir = CONFIG["vendorarchdir"] +mandir = File.join(CONFIG["mandir"], "man") +configure_args = Shellwords.shellwords(CONFIG["configure_args"]) +enable_shared = CONFIG["ENABLE_SHARED"] == 'yes' +dll = CONFIG["LIBRUBY_SO"] +lib = CONFIG["LIBRUBY"] +arc = CONFIG["LIBRUBY_A"] + +install?(:local, :arch, :bin, :'bin-arch') do + puts "installing binary commands" + + makedirs [bindir, libdir, archlibdir] + + install ruby_install_name+exeext, bindir, :mode => $prog_mode + if rubyw_install_name and !rubyw_install_name.empty? + install rubyw_install_name+exeext, bindir, :mode => $prog_mode + end + if File.exist? goruby_install_name+exeext + install goruby_install_name+exeext, bindir, :mode => $prog_mode + end + if enable_shared and dll != lib + install dll, bindir, :mode => $prog_mode + end + install lib, libdir, :mode => $prog_mode unless lib == arc + install arc, libdir, :mode => $data_mode + install "rbconfig.rb", archlibdir, :mode => $data_mode + if CONFIG["ARCHFILE"] + for file in CONFIG["ARCHFILE"].split + install file, archlibdir, :mode => $data_mode + end + end + + if dll == lib and dll != arc + for link in CONFIG["LIBRUBY_ALIASES"].split + ln_sf(dll, File.join(libdir, link)) + end + end +end + +if $extout + extout = "#$extout" + install?(:ext, :arch, :'ext-arch') do + puts "installing extension objects" + makedirs [archlibdir, sitearchlibdir, vendorarchlibdir, archhdrdir] + if noinst = CONFIG["no_install_files"] and noinst.empty? + noinst = nil + end + install_recursive("#{extout}/#{CONFIG['arch']}", archlibdir, :no_install => noinst, :mode => $prog_mode) + install_recursive("#{extout}/include/#{CONFIG['arch']}", archhdrdir, :glob => "*.h", :mode => $data_mode) + end + install?(:ext, :comm, :'ext-comm') do + puts "installing extension scripts" + hdrdir = rubyhdrdir + "/ruby" + makedirs [rubylibdir, sitelibdir, vendorlibdir, hdrdir] + install_recursive("#{extout}/common", rubylibdir, :mode => $data_mode) + install_recursive("#{extout}/include/ruby", hdrdir, :glob => "*.h", :mode => $data_mode) + end +end + +install?(:rdoc) do + if $rdocdir + puts "installing rdoc" + + ridatadir = File.join(CONFIG['datadir'], CONFIG['ruby_install_name'].sub('ruby', 'ri'), CONFIG['ruby_version'], "system") + makedirs [ridatadir] + install_recursive($rdocdir, ridatadir, :mode => $data_mode) + end +end + +install?(:local, :comm, :bin, :'bin-comm') do + puts "installing command scripts" + + Dir.chdir srcdir + makedirs [bindir, rubylibdir] + + ruby_shebang = File.join(bindir, ruby_install_name) + if File::ALT_SEPARATOR + ruby_bin = ruby_shebang.tr(File::SEPARATOR, File::ALT_SEPARATOR) + end + for src in Dir["bin/*"] + next unless File.file?(src) + next if /\/[.#]|(\.(old|bak|orig|rej|diff|patch|core)|~|\/core)$/i =~ src + + name = ruby_install_name.sub(/ruby/, File.basename(src)) + + shebang = '' + body = '' + open(src, "rb") do |f| + shebang = f.gets + body = f.read + end + shebang.sub!(/^\#!.*?ruby\b/) {"#!" + ruby_shebang} + shebang.sub!(/\r$/, '') + body.gsub!(/\r$/, '') + + cmd = File.join(bindir, name) + cmd << ".#{$cmdtype}" if $cmdtype + open_for_install(cmd, $script_mode) do + case $cmdtype + when "bat" + "#{< noinst, :mode => $data_mode) +end + +install?(:local, :arch, :lib) do + puts "installing headers" + + Dir.chdir(srcdir) + makedirs [rubyhdrdir] + noinst = [] + unless RUBY_PLATFORM =~ /mswin|mingw|bccwin/ + noinst << "win32.h" + end + noinst = nil if noinst.empty? + install_recursive("include", rubyhdrdir, :no_install => noinst, :glob => "*.h", :mode => $data_mode) +end + +install?(:local, :comm, :man) do + puts "installing manpages" + + has_goruby = File.exist?(goruby_install_name+exeext) + require File.join(srcdir, "tool/mdoc2man.rb") if $mantype != "doc" + Dir.chdir("#{srcdir}/man") + for mdoc in Dir["*.[1-9]"] + next unless File.file?(mdoc) and open(mdoc){|fh| fh.read(1) == '.'} + if mdoc == "goruby.1" + next unless has_goruby + end + + destdir = mandir + mdoc[/(\d+)$/] + section = $1 + destname = ruby_install_name.sub(/ruby/, File.basename(mdoc, ".#{section}")) + destfile = File.join(destdir, "#{destname}.#{section}") + + makedirs destdir + + if $mantype == "doc" + install mdoc, destfile, :mode => $data_mode + else + w = nil + Tempfile.open(mdoc) do |f| + w = f + open(mdoc) {|r| Mdoc2Man.mdoc2man(r, w)} + end + install w.path, destfile, :mode => $data_mode + w.close! + end + end +end + +$install << :local << :ext if $install.empty? +$install.each do |inst| + if !(procs = $install_procs[inst]) || procs.empty? + next warn("unknown install target - #{inst}") + end + procs.each do |block| + dir = Dir.pwd + begin + block.call + ensure + Dir.chdir(dir) + end + end +end + +# vi:set sw=2: diff --git a/io.c b/io.c new file mode 100644 index 0000000..84c340e --- /dev/null +++ b/io.c @@ -0,0 +1,8802 @@ +/********************************************************************** + + io.c - + + $Author: yugui $ + created at: Fri Oct 15 18:08:59 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/io.h" +#include "dln.h" +#include +#include + +#define free(x) xfree(x) + +#if defined(DOSISH) || defined(__CYGWIN__) +#include +#endif + +#include +#if defined HAVE_NET_SOCKET_H +# include +#elif defined HAVE_SYS_SOCKET_H +# include +#endif + +#if defined(__BOW__) || defined(__CYGWIN__) || defined(_WIN32) || defined(__EMX__) || defined(__BEOS__) +# define NO_SAFE_RENAME +#endif + +#if defined(__CYGWIN__) || defined(_WIN32) +# define NO_LONG_FNAME +#endif + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(sun) || defined(_nec_ews) +# define USE_SETVBUF +#endif + +#ifdef __QNXNTO__ +#include "unix.h" +#endif + +#include +#if defined(HAVE_SYS_IOCTL_H) && !defined(_WIN32) +#include +#endif +#if defined(HAVE_FCNTL_H) || defined(_WIN32) +#include +#elif defined(HAVE_SYS_FCNTL_H) +#include +#endif + +#if !HAVE_OFF_T && !defined(off_t) +# define off_t long +#endif + +#include + +/* EMX has sys/param.h, but.. */ +#if defined(HAVE_SYS_PARAM_H) && !(defined(__EMX__) || defined(__HIUX_MPP__)) +# include +#endif + +#if !defined NOFILE +# define NOFILE 64 +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_SYSCALL_H +#include +#elif defined HAVE_SYS_SYSCALL_H +#include +#endif + +extern void Init_File(void); + +#ifdef __BEOS__ +# ifndef NOFILE +# define NOFILE (OPEN_MAX) +# endif +#endif + +#include "ruby/util.h" + +#ifndef O_ACCMODE +#define O_ACCMODE (O_RDONLY | O_WRONLY | O_RDWR) +#endif + +#if SIZEOF_OFF_T > SIZEOF_LONG && !defined(HAVE_LONG_LONG) +# error off_t is bigger than long, but you have no long long... +#endif + +#ifndef PIPE_BUF +# ifdef _POSIX_PIPE_BUF +# define PIPE_BUF _POSIX_PIPE_BUF +# else +# define PIPE_BUF 512 /* is this ok? */ +# endif +#endif + +VALUE rb_cIO; +VALUE rb_eEOFError; +VALUE rb_eIOError; + +VALUE rb_stdin, rb_stdout, rb_stderr; +VALUE rb_deferr; /* rescue VIM plugin */ +static VALUE orig_stdout, orig_stderr; + +VALUE rb_output_fs; +VALUE rb_rs; +VALUE rb_output_rs; +VALUE rb_default_rs; + +static VALUE argf; + +static ID id_write, id_read, id_getc, id_flush, id_readpartial; +static VALUE sym_mode, sym_perm, sym_extenc, sym_intenc, sym_encoding, sym_open_args; +static VALUE sym_textmode, sym_binmode; + +struct timeval rb_time_interval(VALUE); + +struct argf { + VALUE filename, current_file; + int last_lineno; /* $. */ + int lineno; + int init_p, next_p; + VALUE argv; + char *inplace; + int binmode; + struct rb_io_enc_t encs; +}; + +static int max_file_descriptor = NOFILE; +#define UPDATE_MAXFD(fd) \ + do { \ + if (max_file_descriptor < (fd)) max_file_descriptor = (fd); \ + } while (0) + +#define argf_of(obj) (*(struct argf *)DATA_PTR(obj)) +#define ARGF argf_of(argf) + +#ifdef _STDIO_USES_IOSTREAM /* GNU libc */ +# ifdef _IO_fpos_t +# define STDIO_READ_DATA_PENDING(fp) ((fp)->_IO_read_ptr != (fp)->_IO_read_end) +# else +# define STDIO_READ_DATA_PENDING(fp) ((fp)->_gptr < (fp)->_egptr) +# endif +#elif defined(FILE_COUNT) +# define STDIO_READ_DATA_PENDING(fp) ((fp)->FILE_COUNT > 0) +#elif defined(FILE_READEND) +# define STDIO_READ_DATA_PENDING(fp) ((fp)->FILE_READPTR < (fp)->FILE_READEND) +#elif defined(__BEOS__) +# define STDIO_READ_DATA_PENDING(fp) (fp->_state._eof == 0) +#else +# define STDIO_READ_DATA_PENDING(fp) (!feof(fp)) +#endif + +#define GetWriteIO(io) rb_io_get_write_io(io) + +#define READ_DATA_PENDING(fptr) ((fptr)->rbuf_len) +#define READ_DATA_PENDING_COUNT(fptr) ((fptr)->rbuf_len) +#define READ_DATA_PENDING_PTR(fptr) ((fptr)->rbuf+(fptr)->rbuf_off) +#define READ_DATA_BUFFERED(fptr) READ_DATA_PENDING(fptr) + +#define READ_CHECK(fptr) do {\ + if (!READ_DATA_PENDING(fptr)) {\ + rb_thread_wait_fd((fptr)->fd);\ + rb_io_check_closed(fptr);\ + }\ +} while(0) + +#ifndef S_ISSOCK +# ifdef _S_ISSOCK +# define S_ISSOCK(m) _S_ISSOCK(m) +# else +# ifdef _S_IFSOCK +# define S_ISSOCK(m) ((m & S_IFMT) == _S_IFSOCK) +# else +# ifdef S_IFSOCK +# define S_ISSOCK(m) ((m & S_IFMT) == S_IFSOCK) +# endif +# endif +# endif +#endif + +#if !defined HAVE_SHUTDOWN && !defined shutdown +#define shutdown(a,b) 0 +#endif + +#define rb_sys_fail_path(path) rb_sys_fail(NIL_P(path) ? 0 : RSTRING_PTR(path)) + +#if defined(_WIN32) +#define is_socket(fd, path) rb_w32_is_socket(fd) +#elif !defined(S_ISSOCK) +#define is_socket(fd, path) 0 +#else +static int +is_socket(int fd, VALUE path) +{ + struct stat sbuf; + if (fstat(fd, &sbuf) < 0) + rb_sys_fail_path(path); + return S_ISSOCK(sbuf.st_mode); +} +#endif + +void +rb_eof_error(void) +{ + rb_raise(rb_eEOFError, "end of file reached"); +} + +VALUE +rb_io_taint_check(VALUE io) +{ + if (!OBJ_UNTRUSTED(io) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: operation on trusted IO"); + rb_check_frozen(io); + return io; +} + +void +rb_io_check_initialized(rb_io_t *fptr) +{ + if (!fptr) { + rb_raise(rb_eIOError, "uninitialized stream"); + } +} + +void +rb_io_check_closed(rb_io_t *fptr) +{ + rb_io_check_initialized(fptr); + if (fptr->fd < 0) { + rb_raise(rb_eIOError, "closed stream"); + } +} + +static int io_fflush(rb_io_t *); + +VALUE +rb_io_get_io(VALUE io) +{ + return rb_convert_type(io, T_FILE, "IO", "to_io"); +} + +static VALUE +rb_io_check_io(VALUE io) +{ + return rb_check_convert_type(io, T_FILE, "IO", "to_io"); +} + +VALUE +rb_io_get_write_io(VALUE io) +{ + VALUE write_io; + rb_io_check_initialized(RFILE(io)->fptr); + write_io = RFILE(io)->fptr->tied_io_for_writing; + if (write_io) { + return write_io; + } + return io; +} + +/* + * call-seq: + * IO.try_convert(obj) -> io or nil + * + * Try to convert obj into an IO, using to_io method. + * Returns converted IO or nil if obj cannot be converted + * for any reason. + * + * IO.try_convert(STDOUT) # => STDOUT + * IO.try_convert("STDOUT") # => nil + * + * require 'zlib' + * f = open("/tmp/zz.gz") # => # + * z = Zlib::GzipReader.open(f) # => # + * IO.try_convert(z) # => # + * + */ +static VALUE +rb_io_s_try_convert(VALUE dummy, VALUE io) +{ + return rb_io_check_io(io); +} + +static void +io_unread(rb_io_t *fptr) +{ + off_t r; + rb_io_check_closed(fptr); + if (fptr->rbuf_len == 0 || fptr->mode & FMODE_DUPLEX) + return; + /* xxx: target position may be negative if buffer is filled by ungetc */ + r = lseek(fptr->fd, -fptr->rbuf_len, SEEK_CUR); + if (r < 0) { + if (errno == ESPIPE) + fptr->mode |= FMODE_DUPLEX; + return; + } + fptr->rbuf_off = 0; + fptr->rbuf_len = 0; + return; +} + +static rb_encoding *io_input_encoding(rb_io_t *fptr); + +static void +io_ungetbyte(VALUE str, rb_io_t *fptr) +{ + int len = RSTRING_LEN(str); + + if (fptr->rbuf == NULL) { + fptr->rbuf_off = 0; + fptr->rbuf_len = 0; + if (len > 8192) + fptr->rbuf_capa = len; + else + fptr->rbuf_capa = 8192; + fptr->rbuf = ALLOC_N(char, fptr->rbuf_capa); + } + if (fptr->rbuf_capa < len + fptr->rbuf_len) { + rb_raise(rb_eIOError, "ungetbyte failed"); + } + if (fptr->rbuf_off < len) { + MEMMOVE(fptr->rbuf+fptr->rbuf_capa-fptr->rbuf_len, + fptr->rbuf+fptr->rbuf_off, + char, fptr->rbuf_len); + fptr->rbuf_off = fptr->rbuf_capa-fptr->rbuf_len; + } + fptr->rbuf_off-=len; + fptr->rbuf_len+=len; + MEMMOVE(fptr->rbuf+fptr->rbuf_off, RSTRING_PTR(str), char, len); +} + +static rb_io_t * +flush_before_seek(rb_io_t *fptr) +{ + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + io_unread(fptr); + errno = 0; + return fptr; +} + +#define io_set_eof(fptr) (void)(((fptr)->mode & FMODE_TTY) && ((fptr)->mode |= FMODE_EOF)) +#define io_unset_eof(fptr) (fptr->mode &= ~FMODE_EOF) +#define io_seek(fptr, ofs, whence) (io_unset_eof(fptr), lseek(flush_before_seek(fptr)->fd, ofs, whence)) +#define io_tell(fptr) lseek(flush_before_seek(fptr)->fd, 0, SEEK_CUR) + +#ifndef SEEK_CUR +# define SEEK_SET 0 +# define SEEK_CUR 1 +# define SEEK_END 2 +#endif + +#define FMODE_SYNCWRITE (FMODE_SYNC|FMODE_WRITABLE) + +void +rb_io_check_readable(rb_io_t *fptr) +{ + rb_io_check_closed(fptr); + if (!(fptr->mode & FMODE_READABLE)) { + rb_raise(rb_eIOError, "not opened for reading"); + } + if (fptr->wbuf_len) { + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + } + if (fptr->tied_io_for_writing) { + rb_io_t *wfptr; + GetOpenFile(fptr->tied_io_for_writing, wfptr); + if (io_fflush(wfptr) < 0) + rb_sys_fail(0); + } +} + +static rb_encoding* +io_read_encoding(rb_io_t *fptr) +{ + if (fptr->encs.enc) { + return fptr->encs.enc; + } + return rb_default_external_encoding(); +} + +static rb_encoding* +io_input_encoding(rb_io_t *fptr) +{ + if (fptr->encs.enc2) { + return fptr->encs.enc2; + } + return io_read_encoding(fptr); +} + +void +rb_io_check_writable(rb_io_t *fptr) +{ + rb_io_check_closed(fptr); + if (!(fptr->mode & FMODE_WRITABLE)) { + rb_raise(rb_eIOError, "not opened for writing"); + } + if (fptr->rbuf_len) { + io_unread(fptr); + } +} + +int +rb_io_read_pending(rb_io_t *fptr) +{ + return READ_DATA_PENDING(fptr); +} + +void +rb_read_check(FILE *fp) +{ + if (!STDIO_READ_DATA_PENDING(fp)) { + rb_thread_wait_fd(fileno(fp)); + } +} + +void +rb_io_read_check(rb_io_t *fptr) +{ + if (!READ_DATA_PENDING(fptr)) { + rb_thread_wait_fd(fptr->fd); + } + return; +} + +static int +ruby_dup(int orig) +{ + int fd; + + fd = dup(orig); + if (fd < 0) { + if (errno == EMFILE || errno == ENFILE || errno == ENOMEM) { + rb_gc(); + fd = dup(orig); + } + if (fd < 0) { + rb_sys_fail(0); + } + } + return fd; +} + +static VALUE +io_alloc(VALUE klass) +{ + NEWOBJ(io, struct RFile); + OBJSETUP(io, klass, T_FILE); + + io->fptr = 0; + + return (VALUE)io; +} + +#ifndef S_ISREG +# define S_ISREG(m) ((m & S_IFMT) == S_IFREG) +#endif + +static int +wsplit_p(rb_io_t *fptr) +{ +#if defined(HAVE_FCNTL) && defined(F_GETFL) && defined(O_NONBLOCK) + int r; +#endif + + if (!(fptr->mode & FMODE_WSPLIT_INITIALIZED)) { + struct stat buf; + if (fstat(fptr->fd, &buf) == 0 && + !S_ISREG(buf.st_mode) +#if defined(HAVE_FCNTL) && defined(F_GETFL) && defined(O_NONBLOCK) + && (r = fcntl(fptr->fd, F_GETFL)) != -1 && + !(r & O_NONBLOCK) +#endif + ) { + fptr->mode |= FMODE_WSPLIT; + } + fptr->mode |= FMODE_WSPLIT_INITIALIZED; + } + return fptr->mode & FMODE_WSPLIT; +} + +struct io_internal_struct { + int fd; + void *buf; + size_t capa; +}; + +static VALUE +internal_read_func(void *ptr) +{ + struct io_internal_struct *iis = (struct io_internal_struct*)ptr; + return read(iis->fd, iis->buf, iis->capa); +} + +static VALUE +internal_write_func(void *ptr) +{ + struct io_internal_struct *iis = (struct io_internal_struct*)ptr; + return write(iis->fd, iis->buf, iis->capa); +} + +static int +rb_read_internal(int fd, void *buf, size_t count) +{ + struct io_internal_struct iis; + iis.fd = fd; + iis.buf = buf; + iis.capa = count; + + return rb_thread_blocking_region(internal_read_func, &iis, RUBY_UBF_IO, 0); +} + +static int +rb_write_internal(int fd, void *buf, size_t count) +{ + struct io_internal_struct iis; + iis.fd = fd; + iis.buf = buf; + iis.capa = count; + + return rb_thread_blocking_region(internal_write_func, &iis, RUBY_UBF_IO, 0); +} + +static long +io_writable_length(rb_io_t *fptr, long l) +{ + if (PIPE_BUF < l && + !rb_thread_alone() && + wsplit_p(fptr)) { + l = PIPE_BUF; + } + return l; +} + +static VALUE +io_flush_buffer(VALUE arg) +{ + rb_io_t *fptr = (rb_io_t *)arg; + long l = io_writable_length(fptr, fptr->wbuf_len); + return rb_write_internal(fptr->fd, fptr->wbuf+fptr->wbuf_off, l); +} + +static int +io_fflush(rb_io_t *fptr) +{ + long r; + + rb_io_check_closed(fptr); + if (fptr->wbuf_len == 0) + return 0; + if (!rb_thread_fd_writable(fptr->fd)) { + rb_io_check_closed(fptr); + } + retry: + if (fptr->wbuf_len == 0) + return 0; + if (fptr->write_lock) { + r = rb_mutex_synchronize(fptr->write_lock, io_flush_buffer, (VALUE)fptr); + } + else { + long l = io_writable_length(fptr, fptr->wbuf_len); + r = rb_write_internal(fptr->fd, fptr->wbuf+fptr->wbuf_off, l); + } + /* xxx: Other threads may modify wbuf. + * A lock is required, definitely. */ + rb_io_check_closed(fptr); + if (fptr->wbuf_len <= r) { + fptr->wbuf_off = 0; + fptr->wbuf_len = 0; + return 0; + } + if (0 <= r) { + fptr->wbuf_off += r; + fptr->wbuf_len -= r; + errno = EAGAIN; + } + if (rb_io_wait_writable(fptr->fd)) { + rb_io_check_closed(fptr); + goto retry; + } + return -1; +} + +#ifdef HAVE_RB_FD_INIT +static VALUE +wait_readable(VALUE p) +{ + rb_fdset_t *rfds = (rb_fdset_t *)p; + + return rb_thread_select(rb_fd_max(rfds), rb_fd_ptr(rfds), NULL, NULL, NULL); +} +#endif + +int +rb_io_wait_readable(int f) +{ + rb_fdset_t rfds; + + if (f < 0) { + rb_raise(rb_eIOError, "closed stream"); + } + switch (errno) { + case EINTR: +#if defined(ERESTART) + case ERESTART: +#endif + rb_thread_wait_fd(f); + return Qtrue; + + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + rb_fd_init(&rfds); + rb_fd_set(f, &rfds); +#ifdef HAVE_RB_FD_INIT + rb_ensure(wait_readable, (VALUE)&rfds, + (VALUE (*)(VALUE))rb_fd_term, (VALUE)&rfds); +#else + rb_thread_select(f + 1, rb_fd_ptr(&rfds), NULL, NULL, NULL); +#endif + return Qtrue; + + default: + return Qfalse; + } +} + +#ifdef HAVE_RB_FD_INIT +static VALUE +wait_writable(VALUE p) +{ + rb_fdset_t *wfds = (rb_fdset_t *)p; + + return rb_thread_select(rb_fd_max(wfds), NULL, rb_fd_ptr(wfds), NULL, NULL); +} +#endif + +int +rb_io_wait_writable(int f) +{ + rb_fdset_t wfds; + + if (f < 0) { + rb_raise(rb_eIOError, "closed stream"); + } + switch (errno) { + case EINTR: +#if defined(ERESTART) + case ERESTART: +#endif + rb_thread_fd_writable(f); + return Qtrue; + + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + rb_fd_init(&wfds); + rb_fd_set(f, &wfds); +#ifdef HAVE_RB_FD_INIT + rb_ensure(wait_writable, (VALUE)&wfds, + (VALUE (*)(VALUE))rb_fd_term, (VALUE)&wfds); +#else + rb_thread_select(f + 1, NULL, rb_fd_ptr(&wfds), NULL, NULL); +#endif + return Qtrue; + + default: + return Qfalse; + } +} + +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) +/* Windows */ +# define NEED_NEWLINE_DECORATOR_ON_READ(fptr) (!(fptr->mode & FMODE_BINMODE)) +# define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) (!(fptr->mode & FMODE_BINMODE)) +# define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR +#else +/* Unix */ +# define NEED_NEWLINE_DECORATOR_ON_READ(fptr) (fptr->mode & FMODE_TEXTMODE) +# define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) 0 +#endif +#define NEED_READCONV(fptr) (fptr->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr)) +#define NEED_WRITECONV(fptr) ((fptr->encs.enc != NULL && fptr->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || (fptr->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK))) + +static void +make_writeconv(rb_io_t *fptr) +{ + if (!fptr->writeconv_initialized) { + const char *senc, *denc; + rb_encoding *enc; + int ecflags; + VALUE ecopts; + + fptr->writeconv_initialized = 1; + + ecflags = fptr->encs.ecflags; + ecopts = fptr->encs.ecopts; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + if (NEED_NEWLINE_DECORATOR_ON_WRITE(fptr)) + ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE; +#endif + + if (!fptr->encs.enc || (fptr->encs.enc == rb_ascii8bit_encoding() && !fptr->encs.enc2)) { + /* no encoding conversion */ + fptr->writeconv_pre_ecflags = 0; + fptr->writeconv_pre_ecopts = Qnil; + fptr->writeconv = rb_econv_open_opts("", "", ecflags, ecopts); + if (!fptr->writeconv) + rb_exc_raise(rb_econv_open_exc("", "", ecflags)); + fptr->writeconv_asciicompat = Qnil; + } + else { + enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; + senc = rb_econv_asciicompat_encoding(rb_enc_name(enc)); + if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_DECORATOR_MASK)) { + /* single conversion */ + fptr->writeconv_pre_ecflags = ecflags; + fptr->writeconv_pre_ecopts = ecopts; + fptr->writeconv = NULL; + fptr->writeconv_asciicompat = Qnil; + } + else { + /* double conversion */ + fptr->writeconv_pre_ecflags = ecflags & ~ECONV_STATEFUL_DECORATOR_MASK; + fptr->writeconv_pre_ecopts = ecopts; + if (senc) { + denc = rb_enc_name(enc); + fptr->writeconv_asciicompat = rb_str_new2(senc); + } + else { + senc = denc = ""; + fptr->writeconv_asciicompat = rb_str_new2(rb_enc_name(enc)); + } + ecflags = fptr->encs.ecflags & (ECONV_ERROR_HANDLER_MASK|ECONV_STATEFUL_DECORATOR_MASK); + ecopts = fptr->encs.ecopts; + fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts); + if (!fptr->writeconv) + rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags)); + } + } + } +} + +/* writing functions */ +struct binwrite_arg { + rb_io_t *fptr; + VALUE str; + long offset; + long length; +}; + +static VALUE +io_binwrite_string(VALUE arg) +{ + struct binwrite_arg *p = (struct binwrite_arg *)arg; + long l = io_writable_length(p->fptr, p->length); + return rb_write_internal(p->fptr->fd, RSTRING_PTR(p->str)+p->offset, l); +} + +static long +io_binwrite(VALUE str, rb_io_t *fptr, int nosync) +{ + long len, n, r, offset = 0; + + len = RSTRING_LEN(str); + if ((n = len) <= 0) return n; + if (fptr->wbuf == NULL && !(!nosync && (fptr->mode & FMODE_SYNC))) { + fptr->wbuf_off = 0; + fptr->wbuf_len = 0; + fptr->wbuf_capa = 8192; + fptr->wbuf = ALLOC_N(char, fptr->wbuf_capa); + fptr->write_lock = rb_mutex_new(); + } + if ((!nosync && (fptr->mode & (FMODE_SYNC|FMODE_TTY))) || + (fptr->wbuf && fptr->wbuf_capa <= fptr->wbuf_len + len)) { + struct binwrite_arg arg; + + /* xxx: use writev to avoid double write if available */ + if (fptr->wbuf_len && fptr->wbuf_len+len <= fptr->wbuf_capa) { + if (fptr->wbuf_capa < fptr->wbuf_off+fptr->wbuf_len+len) { + MEMMOVE(fptr->wbuf, fptr->wbuf+fptr->wbuf_off, char, fptr->wbuf_len); + fptr->wbuf_off = 0; + } + MEMMOVE(fptr->wbuf+fptr->wbuf_off+fptr->wbuf_len, RSTRING_PTR(str)+offset, char, len); + fptr->wbuf_len += len; + n = 0; + } + if (io_fflush(fptr) < 0) + return -1L; + if (n == 0) + return len; + /* avoid context switch between "a" and "\n" in STDERR.puts "a". + [ruby-dev:25080] */ + if (fptr->stdio_file != stderr && !rb_thread_fd_writable(fptr->fd)) { + rb_io_check_closed(fptr); + } + arg.fptr = fptr; + arg.str = str; + retry: + arg.offset = offset; + arg.length = n; + if (fptr->write_lock) { + r = rb_mutex_synchronize(fptr->write_lock, io_binwrite_string, (VALUE)&arg); + } + else { + long l = io_writable_length(fptr, n); + r = rb_write_internal(fptr->fd, RSTRING_PTR(str)+offset, l); + } + /* xxx: other threads may modify given string. */ + if (r == n) return len; + if (0 <= r) { + offset += r; + n -= r; + errno = EAGAIN; + } + if (rb_io_wait_writable(fptr->fd)) { + rb_io_check_closed(fptr); + if (offset < RSTRING_LEN(str)) + goto retry; + } + return -1L; + } + + if (fptr->wbuf_off) { + if (fptr->wbuf_len) + MEMMOVE(fptr->wbuf, fptr->wbuf+fptr->wbuf_off, char, fptr->wbuf_len); + fptr->wbuf_off = 0; + } + MEMMOVE(fptr->wbuf+fptr->wbuf_off+fptr->wbuf_len, RSTRING_PTR(str)+offset, char, len); + fptr->wbuf_len += len; + return len; +} + +static VALUE +do_writeconv(VALUE str, rb_io_t *fptr) +{ + if (NEED_WRITECONV(fptr)) { + VALUE common_encoding = Qnil; + + make_writeconv(fptr); + + if (fptr->writeconv) { + if (!NIL_P(fptr->writeconv_asciicompat)) + common_encoding = fptr->writeconv_asciicompat; + else if (!rb_enc_asciicompat(rb_enc_get(str))) { + rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s", + rb_enc_name(rb_enc_get(str))); + } + } + else { + if (fptr->encs.enc2) + common_encoding = rb_enc_from_encoding(fptr->encs.enc2); + else if (fptr->encs.enc != rb_ascii8bit_encoding()) + common_encoding = rb_enc_from_encoding(fptr->encs.enc); + } + + if (!NIL_P(common_encoding)) { + str = rb_str_encode(str, common_encoding, + fptr->writeconv_pre_ecflags, fptr->writeconv_pre_ecopts); + } + + if (fptr->writeconv) { + str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT); + } + } + return str; +} + +static long +io_fwrite(VALUE str, rb_io_t *fptr, int nosync) +{ + str = do_writeconv(str, fptr); + return io_binwrite(str, fptr, nosync); +} + +static VALUE +io_write(VALUE io, VALUE str, int nosync) +{ + rb_io_t *fptr; + long n; + VALUE tmp; + + rb_secure(4); + io = GetWriteIO(io); + str = rb_obj_as_string(str); + tmp = rb_io_check_io(io); + if (NIL_P(tmp)) { + /* port is not IO, call write method for it. */ + return rb_funcall(io, id_write, 1, str); + } + io = tmp; + if (RSTRING_LEN(str) == 0) return INT2FIX(0); + + GetOpenFile(io, fptr); + rb_io_check_writable(fptr); + + n = io_fwrite(str, fptr, nosync); + if (n == -1L) rb_sys_fail_path(fptr->pathv); + + return LONG2FIX(n); +} + +/* + * call-seq: + * ios.write(string) => integer + * + * Writes the given string to ios. The stream must be opened + * for writing. If the argument is not a string, it will be converted + * to a string using to_s. Returns the number of bytes + * written. + * + * count = $stdout.write( "This is a test\n" ) + * puts "That was #{count} bytes of data" + * + * produces: + * + * This is a test + * That was 15 bytes of data + */ + +static VALUE +io_write_m(VALUE io, VALUE str) +{ + return io_write(io, str, 0); +} + +VALUE +rb_io_write(VALUE io, VALUE str) +{ + return rb_funcall(io, id_write, 1, str); +} + +/* + * call-seq: + * ios << obj => ios + * + * String Output---Writes obj to ios. + * obj will be converted to a string using + * to_s. + * + * $stdout << "Hello " << "world!\n" + * + * produces: + * + * Hello world! + */ + + +VALUE +rb_io_addstr(VALUE io, VALUE str) +{ + rb_io_write(io, str); + return io; +} + +/* + * call-seq: + * ios.flush => ios + * + * Flushes any buffered data within ios to the underlying + * operating system (note that this is Ruby internal buffering only; + * the OS may buffer the data as well). + * + * $stdout.print "no newline" + * $stdout.flush + * + * produces: + * + * no newline + */ + +VALUE +rb_io_flush(VALUE io) +{ + rb_io_t *fptr; + + if (TYPE(io) != T_FILE) { + return rb_funcall(io, id_flush, 0); + } + + io = GetWriteIO(io); + GetOpenFile(io, fptr); + + if (fptr->mode & FMODE_WRITABLE) { + if (io_fflush(fptr) < 0) + rb_sys_fail(0); +#ifdef _WIN32 + fsync(fptr->fd); +#endif + } + if (fptr->mode & FMODE_READABLE) { + io_unread(fptr); + } + + return io; +} + +/* + * call-seq: + * ios.pos => integer + * ios.tell => integer + * + * Returns the current offset (in bytes) of ios. + * + * f = File.new("testfile") + * f.pos #=> 0 + * f.gets #=> "This is line one\n" + * f.pos #=> 17 + */ + +static VALUE +rb_io_tell(VALUE io) +{ + rb_io_t *fptr; + off_t pos; + + GetOpenFile(io, fptr); + pos = io_tell(fptr); + if (pos < 0 && errno) rb_sys_fail_path(fptr->pathv); + return OFFT2NUM(pos); +} + +static VALUE +rb_io_seek(VALUE io, VALUE offset, int whence) +{ + rb_io_t *fptr; + off_t pos; + + pos = NUM2OFFT(offset); + GetOpenFile(io, fptr); + pos = io_seek(fptr, pos, whence); + if (pos < 0 && errno) rb_sys_fail_path(fptr->pathv); + + return INT2FIX(0); +} + +/* + * call-seq: + * ios.seek(amount, whence=SEEK_SET) -> 0 + * + * Seeks to a given offset anInteger in the stream according to + * the value of whence: + * + * IO::SEEK_CUR | Seeks to _amount_ plus current position + * --------------+---------------------------------------------------- + * IO::SEEK_END | Seeks to _amount_ plus end of stream (you probably + * | want a negative value for _amount_) + * --------------+---------------------------------------------------- + * IO::SEEK_SET | Seeks to the absolute location given by _amount_ + * + * Example: + * + * f = File.new("testfile") + * f.seek(-13, IO::SEEK_END) #=> 0 + * f.readline #=> "And so on...\n" + */ + +static VALUE +rb_io_seek_m(int argc, VALUE *argv, VALUE io) +{ + VALUE offset, ptrname; + int whence = SEEK_SET; + + if (rb_scan_args(argc, argv, "11", &offset, &ptrname) == 2) { + whence = NUM2INT(ptrname); + } + + return rb_io_seek(io, offset, whence); +} + +/* + * call-seq: + * ios.pos = integer => integer + * + * Seeks to the given position (in bytes) in ios. + * + * f = File.new("testfile") + * f.pos = 17 + * f.gets #=> "This is line two\n" + */ + +static VALUE +rb_io_set_pos(VALUE io, VALUE offset) +{ + rb_io_t *fptr; + off_t pos; + + pos = NUM2OFFT(offset); + GetOpenFile(io, fptr); + pos = io_seek(fptr, pos, SEEK_SET); + if (pos < 0) rb_sys_fail_path(fptr->pathv); + + return OFFT2NUM(pos); +} + +static void clear_readconv(rb_io_t *fptr); + +/* + * call-seq: + * ios.rewind => 0 + * + * Positions ios to the beginning of input, resetting + * lineno to zero. + * + * f = File.new("testfile") + * f.readline #=> "This is line one\n" + * f.rewind #=> 0 + * f.lineno #=> 0 + * f.readline #=> "This is line one\n" + */ + +static VALUE +rb_io_rewind(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (io_seek(fptr, 0L, 0) < 0) rb_sys_fail_path(fptr->pathv); + if (io == ARGF.current_file) { + ARGF.lineno -= fptr->lineno; + } + fptr->lineno = 0; + if (fptr->readconv) { + clear_readconv(fptr); + } + + return INT2FIX(0); +} + +static int +io_fillbuf(rb_io_t *fptr) +{ + int r; + + if (fptr->mode & FMODE_EOF) { + return -1; + } + if (fptr->rbuf == NULL) { + fptr->rbuf_off = 0; + fptr->rbuf_len = 0; + fptr->rbuf_capa = 8192; + fptr->rbuf = ALLOC_N(char, fptr->rbuf_capa); + } + if (fptr->rbuf_len == 0) { + retry: + { + r = rb_read_internal(fptr->fd, fptr->rbuf, fptr->rbuf_capa); + } + if (r < 0) { + if (rb_io_wait_readable(fptr->fd)) + goto retry; + rb_sys_fail_path(fptr->pathv); + } + fptr->rbuf_off = 0; + fptr->rbuf_len = r; + if (r == 0) { + io_set_eof(fptr); + return -1; /* EOF */ + } + } + return 0; +} + +/* + * call-seq: + * ios.eof => true or false + * ios.eof? => true or false + * + * Returns true if ios is at end of file that means + * there are no more data to read. + * The stream must be opened for reading or an IOError will be + * raised. + * + * f = File.new("testfile") + * dummy = f.readlines + * f.eof #=> true + * + * If ios is a stream such as pipe or socket, IO#eof? + * blocks until the other end sends some data or closes it. + * + * r, w = IO.pipe + * Thread.new { sleep 1; w.close } + * r.eof? #=> true after 1 second blocking + * + * r, w = IO.pipe + * Thread.new { sleep 1; w.puts "a" } + * r.eof? #=> false after 1 second blocking + * + * r, w = IO.pipe + * r.eof? # blocks forever + * + * Note that IO#eof? reads data to a input buffer. + * So IO#sysread doesn't work with IO#eof?. + */ + +VALUE +rb_io_eof(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (READ_DATA_PENDING(fptr)) return Qfalse; + READ_CHECK(fptr); + if (io_fillbuf(fptr) < 0) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * ios.sync => true or false + * + * Returns the current ``sync mode'' of ios. When sync mode is + * true, all output is immediately flushed to the underlying operating + * system and is not buffered by Ruby internally. See also + * IO#fsync. + * + * f = File.new("testfile") + * f.sync #=> false + */ + +static VALUE +rb_io_sync(VALUE io) +{ + rb_io_t *fptr; + + io = GetWriteIO(io); + GetOpenFile(io, fptr); + return (fptr->mode & FMODE_SYNC) ? Qtrue : Qfalse; +} + +/* + * call-seq: + * ios.sync = boolean => boolean + * + * Sets the ``sync mode'' to true or false. + * When sync mode is true, all output is immediately flushed to the + * underlying operating system and is not buffered internally. Returns + * the new state. See also IO#fsync. + * + * f = File.new("testfile") + * f.sync = true + * + * (produces no output) + */ + +static VALUE +rb_io_set_sync(VALUE io, VALUE sync) +{ + rb_io_t *fptr; + + io = GetWriteIO(io); + GetOpenFile(io, fptr); + if (RTEST(sync)) { + fptr->mode |= FMODE_SYNC; + } + else { + fptr->mode &= ~FMODE_SYNC; + } + return sync; +} + +/* + * call-seq: + * ios.fsync => 0 or nil + * + * Immediately writes all buffered data in ios to disk. + * Returns nil if the underlying operating system does not + * support fsync(2). Note that fsync differs from + * using IO#sync=. The latter ensures that data is flushed + * from Ruby's buffers, but doesn't not guarantee that the underlying + * operating system actually writes it to disk. + */ + +static VALUE +rb_io_fsync(VALUE io) +{ +#ifdef HAVE_FSYNC + rb_io_t *fptr; + + io = GetWriteIO(io); + GetOpenFile(io, fptr); + + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + if (fsync(fptr->fd) < 0) + rb_sys_fail_path(fptr->pathv); + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * ios.fileno => fixnum + * ios.to_i => fixnum + * + * Returns an integer representing the numeric file descriptor for + * ios. + * + * $stdin.fileno #=> 0 + * $stdout.fileno #=> 1 + */ + +static VALUE +rb_io_fileno(VALUE io) +{ + rb_io_t *fptr; + int fd; + + GetOpenFile(io, fptr); + fd = fptr->fd; + return INT2FIX(fd); +} + + +/* + * call-seq: + * ios.pid => fixnum + * + * Returns the process ID of a child process associated with + * ios. This will be set by IO.popen. + * + * pipe = IO.popen("-") + * if pipe + * $stderr.puts "In parent, child pid is #{pipe.pid}" + * else + * $stderr.puts "In child, pid is #{$$}" + * end + * + * produces: + * + * In child, pid is 26209 + * In parent, child pid is 26209 + */ + +static VALUE +rb_io_pid(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (!fptr->pid) + return Qnil; + return PIDT2NUM(fptr->pid); +} + + +/* + * call-seq: + * ios.inspect => string + * + * Return a string describing this IO object. + */ + +static VALUE +rb_io_inspect(VALUE obj) +{ + rb_io_t *fptr; + const char *cname; + const char *st = ""; + + fptr = RFILE(rb_io_taint_check(obj))->fptr; + if (!fptr || NIL_P(fptr->pathv)) return rb_any_to_s(obj); + cname = rb_obj_classname(obj); + if (fptr->fd < 0) { + st = " (closed)"; + } + return rb_sprintf("#<%s:%s%s>", cname, RSTRING_PTR(fptr->pathv), st); +} + +/* + * call-seq: + * ios.to_io -> ios + * + * Returns ios. + */ + +static VALUE +rb_io_to_io(VALUE io) +{ + return io; +} + +/* reading functions */ +static long +read_buffered_data(char *ptr, long len, rb_io_t *fptr) +{ + long n; + + n = READ_DATA_PENDING_COUNT(fptr); + if (n <= 0) return 0; + if (n > len) n = len; + MEMMOVE(ptr, fptr->rbuf+fptr->rbuf_off, char, n); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + return n; +} + +static long +io_fread(VALUE str, long offset, rb_io_t *fptr) +{ + long len = RSTRING_LEN(str) - offset; + long n = len; + int c; + + if (READ_DATA_PENDING(fptr) == 0) { + while (n > 0) { + again: + c = rb_read_internal(fptr->fd, RSTRING_PTR(str)+offset, n); + if (c == 0) { + io_set_eof(fptr); + break; + } + if (c < 0) { + if (rb_io_wait_readable(fptr->fd)) + goto again; + rb_sys_fail_path(fptr->pathv); + } + offset += c; + if ((n -= c) <= 0) break; + rb_thread_wait_fd(fptr->fd); + } + return len - n; + } + + while (n > 0) { + c = read_buffered_data(RSTRING_PTR(str)+offset, n, fptr); + if (c > 0) { + offset += c; + if ((n -= c) <= 0) break; + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + if (io_fillbuf(fptr) < 0) { + break; + } + } + return len - n; +} + +#define SMALLBUF 100 + +static long +remain_size(rb_io_t *fptr) +{ + struct stat st; + off_t siz = READ_DATA_PENDING_COUNT(fptr); + off_t pos; + + if (fstat(fptr->fd, &st) == 0 && S_ISREG(st.st_mode) +#ifdef __BEOS__ + && (st.st_dev > 3) +#endif + ) + { + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + pos = lseek(fptr->fd, 0, SEEK_CUR); + if (st.st_size >= pos && pos >= 0) { + siz += st.st_size - pos; + if (siz > LONG_MAX) { + rb_raise(rb_eIOError, "file too big for single read"); + } + } + } + else { + siz += BUFSIZ; + } + return (long)siz; +} + +static VALUE +io_enc_str(VALUE str, rb_io_t *fptr) +{ + OBJ_TAINT(str); + rb_enc_associate(str, io_read_encoding(fptr)); + return str; +} + +static void +make_readconv(rb_io_t *fptr, int size) +{ + if (!fptr->readconv) { + int ecflags; + VALUE ecopts; + const char *sname, *dname; + ecflags = fptr->encs.ecflags; + ecopts = fptr->encs.ecopts; + if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) + ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + if (fptr->encs.enc2) { + sname = rb_enc_name(fptr->encs.enc2); + dname = rb_enc_name(fptr->encs.enc); + } + else { + sname = dname = ""; + } + fptr->readconv = rb_econv_open_opts(sname, dname, ecflags, ecopts); + if (!fptr->readconv) + rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags)); + fptr->cbuf_off = 0; + fptr->cbuf_len = 0; + fptr->cbuf_capa = size < 1024 ? 1024 : size; + fptr->cbuf = ALLOC_N(char, fptr->cbuf_capa); + } +} + +static int +more_char(rb_io_t *fptr) +{ + const unsigned char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + int putbackable; + int cbuf_len0; + + if (fptr->cbuf_len == fptr->cbuf_capa) + return 0; /* cbuf full */ + if (fptr->cbuf_len == 0) + fptr->cbuf_off = 0; + else if (fptr->cbuf_off + fptr->cbuf_len == fptr->cbuf_capa) { + memmove(fptr->cbuf, fptr->cbuf+fptr->cbuf_off, fptr->cbuf_len); + fptr->cbuf_off = 0; + } + + cbuf_len0 = fptr->cbuf_len; + + while (1) { + ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off; + se = sp + fptr->rbuf_len; + ds = dp = (unsigned char *)fptr->cbuf + fptr->cbuf_off + fptr->cbuf_len; + de = (unsigned char *)fptr->cbuf + fptr->cbuf_capa; + res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_AFTER_OUTPUT); + fptr->rbuf_off += sp - ss; + fptr->rbuf_len -= sp - ss; + fptr->cbuf_len += dp - ds; + + putbackable = rb_econv_putbackable(fptr->readconv); + if (putbackable) { + rb_econv_putback(fptr->readconv, (unsigned char *)fptr->rbuf + fptr->rbuf_off - putbackable, putbackable); + fptr->rbuf_off -= putbackable; + fptr->rbuf_len += putbackable; + } + + rb_econv_check_error(fptr->readconv); + + if (cbuf_len0 != fptr->cbuf_len) + return 0; + + if (res == econv_finished) { + clear_readconv(fptr); + return -1; + } + + if (res == econv_source_buffer_empty) { + if (fptr->rbuf_len == 0) { + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + if (io_fillbuf(fptr) == -1) { + ds = dp = (unsigned char *)fptr->cbuf + fptr->cbuf_off + fptr->cbuf_len; + de = (unsigned char *)fptr->cbuf + fptr->cbuf_capa; + res = rb_econv_convert(fptr->readconv, NULL, NULL, &dp, de, 0); + fptr->cbuf_len += dp - ds; + rb_econv_check_error(fptr->readconv); + } + } + } + } +} + +static VALUE +io_shift_cbuf(rb_io_t *fptr, int len, VALUE *strp) +{ + VALUE str; + if (NIL_P(*strp)) { + *strp = str = rb_str_new(fptr->cbuf+fptr->cbuf_off, len); + } + else { + size_t slen; + str = *strp; + slen = RSTRING_LEN(str); + rb_str_resize(str, RSTRING_LEN(str) + len); + memcpy(RSTRING_PTR(str)+slen, fptr->cbuf+fptr->cbuf_off, len); + } + fptr->cbuf_off += len; + fptr->cbuf_len -= len; + OBJ_TAINT(str); + rb_enc_associate(str, fptr->encs.enc); + /* xxx: set coderange */ + if (fptr->cbuf_len == 0) + fptr->cbuf_off = 0; + if (fptr->cbuf_off < fptr->cbuf_capa/2) { + memmove(fptr->cbuf, fptr->cbuf+fptr->cbuf_off, fptr->cbuf_len); + fptr->cbuf_off = 0; + } + return str; +} + +static VALUE +read_all(rb_io_t *fptr, long siz, VALUE str) +{ + long bytes; + long n; + long pos; + rb_encoding *enc; + int cr; + + if (NEED_READCONV(fptr)) { + if (NIL_P(str)) str = rb_str_new(NULL, 0); + else rb_str_set_len(str, 0); + make_readconv(fptr, 0); + while (1) { + if (fptr->cbuf_len) { + io_shift_cbuf(fptr, fptr->cbuf_len, &str); + } + if (more_char(fptr) == -1) { + return io_enc_str(str, fptr); + } + } + } + + bytes = 0; + pos = 0; + + enc = io_read_encoding(fptr); + cr = 0; + + if (siz == 0) siz = BUFSIZ; + if (NIL_P(str)) { + str = rb_str_new(0, siz); + } + else { + rb_str_resize(str, siz); + } + for (;;) { + READ_CHECK(fptr); + n = io_fread(str, bytes, fptr); + if (n == 0 && bytes == 0) { + break; + } + bytes += n; + if (cr != ENC_CODERANGE_BROKEN) + pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + bytes, enc, &cr); + if (bytes < siz) break; + siz += BUFSIZ; + rb_str_resize(str, siz); + } + if (bytes != siz) rb_str_resize(str, bytes); + str = io_enc_str(str, fptr); + ENC_CODERANGE_SET(str, cr); + return str; +} + +void +rb_io_set_nonblock(rb_io_t *fptr) +{ + int oflags; +#ifdef F_GETFL + oflags = fcntl(fptr->fd, F_GETFL); + if (oflags == -1) { + rb_sys_fail_path(fptr->pathv); + } +#else + oflags = 0; +#endif + if ((oflags & O_NONBLOCK) == 0) { + oflags |= O_NONBLOCK; + if (fcntl(fptr->fd, F_SETFL, oflags) == -1) { + rb_sys_fail_path(fptr->pathv); + } + } +} + +static VALUE +io_getpartial(int argc, VALUE *argv, VALUE io, int nonblock) +{ + rb_io_t *fptr; + VALUE length, str; + long n, len; + + rb_scan_args(argc, argv, "11", &length, &str); + + if ((len = NUM2LONG(length)) < 0) { + rb_raise(rb_eArgError, "negative length %ld given", len); + } + + if (NIL_P(str)) { + str = rb_str_new(0, len); + } + else { + StringValue(str); + rb_str_modify(str); + rb_str_resize(str, len); + } + OBJ_TAINT(str); + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (len == 0) + return str; + + if (!nonblock) + READ_CHECK(fptr); + if (RSTRING_LEN(str) != len) { + modified: + rb_raise(rb_eRuntimeError, "buffer string modified"); + } + n = read_buffered_data(RSTRING_PTR(str), len, fptr); + if (n <= 0) { + again: + if (RSTRING_LEN(str) != len) goto modified; + if (nonblock) { + rb_io_set_nonblock(fptr); + } + n = rb_read_internal(fptr->fd, RSTRING_PTR(str), len); + if (n < 0) { + if (!nonblock && rb_io_wait_readable(fptr->fd)) + goto again; + rb_sys_fail_path(fptr->pathv); + } + else if (n == 0) { + io_set_eof(fptr); + } + } + rb_str_resize(str, n); + + if (n == 0) + return Qnil; + else + return str; +} + +/* + * call-seq: + * ios.readpartial(maxlen) => string + * ios.readpartial(maxlen, outbuf) => outbuf + * + * Reads at most maxlen bytes from the I/O stream. + * It blocks only if ios has no data immediately available. + * It doesn't block if some data available. + * If the optional outbuf argument is present, + * it must reference a String, which will receive the data. + * It raises EOFError on end of file. + * + * readpartial is designed for streams such as pipe, socket, tty, etc. + * It blocks only when no data immediately available. + * This means that it blocks only when following all conditions hold. + * * the buffer in the IO object is empty. + * * the content of the stream is empty. + * * the stream is not reached to EOF. + * + * When readpartial blocks, it waits data or EOF on the stream. + * If some data is reached, readpartial returns with the data. + * If EOF is reached, readpartial raises EOFError. + * + * When readpartial doesn't blocks, it returns or raises immediately. + * If the buffer is not empty, it returns the data in the buffer. + * Otherwise if the stream has some content, + * it returns the data in the stream. + * Otherwise if the stream is reached to EOF, it raises EOFError. + * + * r, w = IO.pipe # buffer pipe content + * w << "abc" # "" "abc". + * r.readpartial(4096) #=> "abc" "" "" + * r.readpartial(4096) # blocks because buffer and pipe is empty. + * + * r, w = IO.pipe # buffer pipe content + * w << "abc" # "" "abc" + * w.close # "" "abc" EOF + * r.readpartial(4096) #=> "abc" "" EOF + * r.readpartial(4096) # raises EOFError + * + * r, w = IO.pipe # buffer pipe content + * w << "abc\ndef\n" # "" "abc\ndef\n" + * r.gets #=> "abc\n" "def\n" "" + * w << "ghi\n" # "def\n" "ghi\n" + * r.readpartial(4096) #=> "def\n" "" "ghi\n" + * r.readpartial(4096) #=> "ghi\n" "" "" + * + * Note that readpartial behaves similar to sysread. + * The differences are: + * * If the buffer is not empty, read from the buffer instead of "sysread for buffered IO (IOError)". + * * It doesn't cause Errno::EWOULDBLOCK and Errno::EINTR. When readpartial meets EWOULDBLOCK and EINTR by read system call, readpartial retry the system call. + * + * The later means that readpartial is nonblocking-flag insensitive. + * It blocks on the situation IO#sysread causes Errno::EWOULDBLOCK as if the fd is blocking mode. + * + */ + +static VALUE +io_readpartial(int argc, VALUE *argv, VALUE io) +{ + VALUE ret; + + ret = io_getpartial(argc, argv, io, 0); + if (NIL_P(ret)) + rb_eof_error(); + else + return ret; +} + +/* + * call-seq: + * ios.read_nonblock(maxlen) => string + * ios.read_nonblock(maxlen, outbuf) => outbuf + * + * Reads at most maxlen bytes from ios using + * the read(2) system call after O_NONBLOCK is set for + * the underlying file descriptor. + * + * If the optional outbuf argument is present, + * it must reference a String, which will receive the data. + * + * read_nonblock just calls the read(2) system call. + * It causes all errors the read(2) system call causes: Errno::EWOULDBLOCK, Errno::EINTR, etc. + * The caller should care such errors. + * + * read_nonblock causes EOFError on EOF. + * + * If the read buffer is not empty, + * read_nonblock reads from the buffer like readpartial. + * In this case, the read(2) system call is not called. + * + * When read_nonblock raises EWOULDBLOCK, + * read_nonblock should not be called + * until io is readable for avoiding busy loop. + * This can be done as follows. + * + * begin + * result = io.read_nonblock(maxlen) + * rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR + * IO.select([io]) + * retry + * end + * + * Note that this is identical to readpartial + * except the non-blocking flag is set. + */ + +static VALUE +io_read_nonblock(int argc, VALUE *argv, VALUE io) +{ + VALUE ret; + + ret = io_getpartial(argc, argv, io, 1); + if (NIL_P(ret)) + rb_eof_error(); + else + return ret; +} + +/* + * call-seq: + * ios.write_nonblock(string) => integer + * + * Writes the given string to ios using + * the write(2) system call after O_NONBLOCK is set for + * the underlying file descriptor. + * + * It returns the number of bytes written. + * + * write_nonblock just calls the write(2) system call. + * It causes all errors the write(2) system call causes: Errno::EWOULDBLOCK, Errno::EINTR, etc. + * The result may also be smaller than string.length (partial write). + * The caller should care such errors and partial write. + * + * If the write buffer is not empty, it is flushed at first. + * + * When write_nonblock raises EWOULDBLOCK, + * write_nonblock should not be called + * until io is writable for avoiding busy loop. + * This can be done as follows. + * + * begin + * result = io.write_nonblock(string) + * rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR + * IO.select(nil, [io]) + * retry + * end + * + * Note that this doesn't guarantee to write all data in string. + * The length written is reported as result and it should be checked later. + * + */ + +static VALUE +rb_io_write_nonblock(VALUE io, VALUE str) +{ + rb_io_t *fptr; + long n; + + rb_secure(4); + if (TYPE(str) != T_STRING) + str = rb_obj_as_string(str); + + io = GetWriteIO(io); + GetOpenFile(io, fptr); + rb_io_check_writable(fptr); + + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + + rb_io_set_nonblock(fptr); + n = write(fptr->fd, RSTRING_PTR(str), RSTRING_LEN(str)); + + if (n == -1) rb_sys_fail_path(fptr->pathv); + + return LONG2FIX(n); +} + +/* + * call-seq: + * ios.read([length [, buffer]]) => string, buffer, or nil + * + * Reads at most length bytes from the I/O stream, or to the + * end of file if length is omitted or is nil. + * length must be a non-negative integer or nil. + * If the optional buffer argument is present, it must reference + * a String, which will receive the data. + * + * At end of file, it returns nil or "" + * depend on length. + * ios.read() and + * ios.read(nil) returns "". + * ios.read(positive-integer) returns nil. + * + * ios.read(0) returns "". + * + * f = File.new("testfile") + * f.read(16) #=> "This is line one" + */ + +static VALUE +io_read(int argc, VALUE *argv, VALUE io) +{ + rb_io_t *fptr; + long n, len; + VALUE length, str; + + rb_scan_args(argc, argv, "02", &length, &str); + + if (NIL_P(length)) { + if (!NIL_P(str)) StringValue(str); + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + return read_all(fptr, remain_size(fptr), str); + } + len = NUM2LONG(length); + if (len < 0) { + rb_raise(rb_eArgError, "negative length %ld given", len); + } + + if (NIL_P(str)) { + str = rb_str_new(0, len); + } + else { + StringValue(str); + rb_str_modify(str); + rb_str_resize(str,len); + } + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + if (len == 0) return str; + + READ_CHECK(fptr); + if (RSTRING_LEN(str) != len) { + rb_raise(rb_eRuntimeError, "buffer string modified"); + } + n = io_fread(str, 0, fptr); + if (n == 0) { + if (fptr->fd < 0) return Qnil; + rb_str_resize(str, 0); + return Qnil; + } + rb_str_resize(str, n); + OBJ_TAINT(str); + + return str; +} + +static void +rscheck(const char *rsptr, long rslen, VALUE rs) +{ + if (!rs) return; + if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen) + rb_raise(rb_eRuntimeError, "rs modified"); +} + +static int +appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) +{ + VALUE str = *strp; + long limit = *lp; + + if (NEED_READCONV(fptr)) { + make_readconv(fptr, 0); + do { + const char *p, *e; + int searchlen; + if (fptr->cbuf_len) { + p = fptr->cbuf+fptr->cbuf_off; + searchlen = fptr->cbuf_len; + if (0 < limit && limit < searchlen) + searchlen = limit; + e = memchr(p, delim, searchlen); + if (e) { + if (NIL_P(str)) + *strp = str = rb_str_new(p, e-p+1); + else + rb_str_buf_cat(str, p, e-p+1); + fptr->cbuf_off += e-p+1; + fptr->cbuf_len -= e-p+1; + limit -= e-p+1; + *lp = limit; + return delim; + } + + if (NIL_P(str)) + *strp = str = rb_str_new(p, searchlen); + else + rb_str_buf_cat(str, p, searchlen); + fptr->cbuf_off += searchlen; + fptr->cbuf_len -= searchlen; + limit -= searchlen; + + if (limit == 0) { + *lp = limit; + return (unsigned char)RSTRING_PTR(str)[RSTRING_LEN(str)-1]; + } + } + } while (more_char(fptr) != -1); + *lp = limit; + return EOF; + } + + do { + long pending = READ_DATA_PENDING_COUNT(fptr); + if (pending > 0) { + const char *p = READ_DATA_PENDING_PTR(fptr); + const char *e; + long last; + + if (limit > 0 && pending > limit) pending = limit; + e = memchr(p, delim, pending); + if (e) pending = e - p + 1; + if (!NIL_P(str)) { + last = RSTRING_LEN(str); + rb_str_resize(str, last + pending); + } + else { + last = 0; + *strp = str = rb_str_buf_new(pending); + rb_str_set_len(str, pending); + } + read_buffered_data(RSTRING_PTR(str) + last, pending, fptr); /* must not fail */ + limit -= pending; + *lp = limit; + if (e) return delim; + if (limit == 0) + return (unsigned char)RSTRING_PTR(str)[RSTRING_LEN(str)-1]; + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + } while (io_fillbuf(fptr) >= 0); + *lp = limit; + return EOF; +} + +static inline int +swallow(rb_io_t *fptr, int term) +{ + do { + long cnt; + while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) { + char buf[1024]; + const char *p = READ_DATA_PENDING_PTR(fptr); + int i; + if (cnt > sizeof buf) cnt = sizeof buf; + if (*p != term) return Qtrue; + i = cnt; + while (--i && *++p == term); + if (!read_buffered_data(buf, cnt - i, fptr)) /* must not fail */ + rb_sys_fail_path(fptr->pathv); + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + } while (io_fillbuf(fptr) == 0); + return Qfalse; +} + +static VALUE +rb_io_getline_fast(rb_io_t *fptr, rb_encoding *enc) +{ + VALUE str = Qnil; + int len = 0; + long pos = 0; + int cr = 0; + + for (;;) { + long pending = READ_DATA_PENDING_COUNT(fptr); + + if (pending > 0) { + const char *p = READ_DATA_PENDING_PTR(fptr); + const char *e; + + e = memchr(p, '\n', pending); + if (e) { + pending = e - p + 1; + } + if (NIL_P(str)) { + str = rb_str_new(p, pending); + fptr->rbuf_off += pending; + fptr->rbuf_len -= pending; + } + else { + rb_str_resize(str, len + pending); + read_buffered_data(RSTRING_PTR(str)+len, pending, fptr); + } + len += pending; + if (cr != ENC_CODERANGE_BROKEN) + pos = rb_str_coderange_scan_restartable(RSTRING_PTR(str) + pos, RSTRING_PTR(str) + len, enc, &cr); + if (e) break; + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + if (io_fillbuf(fptr) < 0) { + if (NIL_P(str)) return Qnil; + break; + } + } + + str = io_enc_str(str, fptr); + ENC_CODERANGE_SET(str, cr); + fptr->lineno++; + ARGF.last_lineno = fptr->lineno; + + return str; +} + +static void +prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io) +{ + VALUE rs = rb_rs, lim = Qnil; + rb_io_t *fptr; + + if (argc == 1) { + VALUE tmp = Qnil; + + if (NIL_P(argv[0]) || !NIL_P(tmp = rb_check_string_type(argv[0]))) { + rs = tmp; + } + else { + lim = argv[0]; + } + } + else if (2 <= argc) { + rb_scan_args(argc, argv, "2", &rs, &lim); + if (!NIL_P(rs)) + StringValue(rs); + } + if (!NIL_P(rs)) { + rb_encoding *enc_rs, *enc_io; + + GetOpenFile(io, fptr); + enc_rs = rb_enc_get(rs); + enc_io = io_read_encoding(fptr); + if (enc_io != enc_rs && + (rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT || + !rb_enc_asciicompat(enc_io))) { + if (rs == rb_default_rs) { + rs = rb_enc_str_new(0, 0, enc_io); + rb_str_buf_cat_ascii(rs, "\n"); + } + else { + rb_raise(rb_eArgError, "encoding mismatch: %s IO with %s RS", + rb_enc_name(enc_io), + rb_enc_name(enc_rs)); + } + } + } + *rsp = rs; + *limit = NIL_P(lim) ? -1L : NUM2LONG(lim); +} + +static VALUE +rb_io_getline_1(VALUE rs, long limit, VALUE io) +{ + VALUE str = Qnil; + rb_io_t *fptr; + int nolimit = 0; + rb_encoding *enc; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + if (NIL_P(rs) && limit < 0) { + str = read_all(fptr, 0, Qnil); + if (RSTRING_LEN(str) == 0) return Qnil; + } + else if (limit == 0) { + return rb_enc_str_new(0, 0, io_read_encoding(fptr)); + } + else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) && + rb_enc_asciicompat(enc = io_read_encoding(fptr))) { + return rb_io_getline_fast(fptr, enc); + } + else { + int c, newline = -1; + const char *rsptr = 0; + long rslen = 0; + int rspara = 0; + int extra_limit = 16; + + if (!NIL_P(rs)) { + rslen = RSTRING_LEN(rs); + if (rslen == 0) { + rsptr = "\n\n"; + rslen = 2; + rspara = 1; + swallow(fptr, '\n'); + rs = 0; + } + else { + rsptr = RSTRING_PTR(rs); + } + newline = (unsigned char)rsptr[rslen - 1]; + } + + /* MS - Optimisation */ + enc = io_read_encoding(fptr); + while ((c = appendline(fptr, newline, &str, &limit)) != EOF) { + const char *s, *p, *pp, *e; + + if (c == newline) { + if (RSTRING_LEN(str) < rslen) continue; + s = RSTRING_PTR(str); + e = s + RSTRING_LEN(str); + p = e - rslen; + pp = rb_enc_left_char_head(s, p, e, enc); + if (pp != p) continue; + if (!rspara) rscheck(rsptr, rslen, rs); + if (memcmp(p, rsptr, rslen) == 0) break; + } + if (limit == 0) { + s = RSTRING_PTR(str); + p = s + RSTRING_LEN(str); + pp = rb_enc_left_char_head(s, p-1, p, enc); + if (extra_limit && + MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(pp, p, enc))) { + /* relax the limit while incomplete character. + * extra_limit limits the relax length */ + limit = 1; + extra_limit--; + } + else { + nolimit = 1; + break; + } + } + } + + if (rspara) { + if (c != EOF) { + swallow(fptr, '\n'); + } + } + if (!NIL_P(str)) + str = io_enc_str(str, fptr); + } + + if (!NIL_P(str)) { + if (!nolimit) { + fptr->lineno++; + ARGF.last_lineno = fptr->lineno; + } + } + + return str; +} + +static VALUE +rb_io_getline(int argc, VALUE *argv, VALUE io) +{ + VALUE rs; + long limit; + + prepare_getline_args(argc, argv, &rs, &limit, io); + return rb_io_getline_1(rs, limit, io); +} + +VALUE +rb_io_gets(VALUE io) +{ + return rb_io_getline_1(rb_default_rs, -1, io); +} + +/* + * call-seq: + * ios.gets(sep=$/) => string or nil + * ios.gets(limit) => string or nil + * ios.gets(sep, limit) => string or nil + * + * Reads the next ``line'' from the I/O stream; lines are separated by + * sep. A separator of nil reads the entire + * contents, and a zero-length separator reads the input a paragraph at + * a time (two successive newlines in the input separate paragraphs). + * The stream must be opened for reading or an IOError + * will be raised. The line read in will be returned and also assigned + * to $_. Returns nil if called at end of + * file. If the first argument is an integer, or optional second + * argument is given, the returning string would not be longer than the + * given value in bytes. + * + * File.new("testfile").gets #=> "This is line one\n" + * $_ #=> "This is line one\n" + */ + +static VALUE +rb_io_gets_m(int argc, VALUE *argv, VALUE io) +{ + VALUE str; + + str = rb_io_getline(argc, argv, io); + rb_lastline_set(str); + + return str; +} + +/* + * call-seq: + * ios.lineno => integer + * + * Returns the current line number in ios. The stream must be + * opened for reading. lineno counts the number of times + * gets is called, rather than the number of newlines + * encountered. The two values will differ if gets is + * called with a separator other than newline. See also the + * $. variable. + * + * f = File.new("testfile") + * f.lineno #=> 0 + * f.gets #=> "This is line one\n" + * f.lineno #=> 1 + * f.gets #=> "This is line two\n" + * f.lineno #=> 2 + */ + +static VALUE +rb_io_lineno(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + return INT2NUM(fptr->lineno); +} + +/* + * call-seq: + * ios.lineno = integer => integer + * + * Manually sets the current line number to the given value. + * $. is updated only on the next read. + * + * f = File.new("testfile") + * f.gets #=> "This is line one\n" + * $. #=> 1 + * f.lineno = 1000 + * f.lineno #=> 1000 + * $. #=> 1 # lineno of last read + * f.gets #=> "This is line two\n" + * $. #=> 1001 # lineno of last read + */ + +static VALUE +rb_io_set_lineno(VALUE io, VALUE lineno) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + fptr->lineno = NUM2INT(lineno); + return lineno; +} + +/* + * call-seq: + * ios.readline(sep=$/) => string + * ios.readline(limit) => string + * ios.readline(sep, limit) => string + * + * Reads a line as with IO#gets, but raises an + * EOFError on end of file. + */ + +static VALUE +rb_io_readline(int argc, VALUE *argv, VALUE io) +{ + VALUE line = rb_io_gets_m(argc, argv, io); + + if (NIL_P(line)) { + rb_eof_error(); + } + return line; +} + +/* + * call-seq: + * ios.readlines(sep=$/) => array + * ios.readlines(limit) => array + * ios.readlines(sep, limit) => array + * + * Reads all of the lines in ios, and returns them in + * anArray. Lines are separated by the optional sep. If + * sep is nil, the rest of the stream is returned + * as a single record. If the first argument is an integer, or + * optional second argument is given, the returning string would not be + * longer than the given value in bytes. The stream must be opened for + * reading or an IOError will be raised. + * + * f = File.new("testfile") + * f.readlines[0] #=> "This is line one\n" + */ + +static VALUE +rb_io_readlines(int argc, VALUE *argv, VALUE io) +{ + VALUE line, ary, rs; + long limit; + + prepare_getline_args(argc, argv, &rs, &limit, io); + ary = rb_ary_new(); + while (!NIL_P(line = rb_io_getline_1(rs, limit, io))) { + rb_ary_push(ary, line); + } + return ary; +} + +/* + * call-seq: + * ios.each(sep=$/) {|line| block } => ios + * ios.each(limit) {|line| block } => ios + * ios.each(sep,limit) {|line| block } => ios + * ios.each_line(sep=$/) {|line| block } => ios + * ios.each_line(limit) {|line| block } => ios + * ios.each_line(sep,limit) {|line| block } => ios + * + * Executes the block for every line in ios, where lines are + * separated by sep. ios must be opened for + * reading or an IOError will be raised. + * + * f = File.new("testfile") + * f.each {|line| puts "#{f.lineno}: #{line}" } + * + * produces: + * + * 1: This is line one + * 2: This is line two + * 3: This is line three + * 4: And so on... + */ + +static VALUE +rb_io_each_line(int argc, VALUE *argv, VALUE io) +{ + VALUE str, rs; + long limit; + + RETURN_ENUMERATOR(io, argc, argv); + prepare_getline_args(argc, argv, &rs, &limit, io); + while (!NIL_P(str = rb_io_getline_1(rs, limit, io))) { + rb_yield(str); + } + return io; +} + +/* + * call-seq: + * ios.each_byte {|byte| block } => ios + * + * Calls the given block once for each byte (0..255) in ios, + * passing the byte as an argument. The stream must be opened for + * reading or an IOError will be raised. + * + * f = File.new("testfile") + * checksum = 0 + * f.each_byte {|x| checksum ^= x } #=> # + * checksum #=> 12 + */ + +static VALUE +rb_io_each_byte(VALUE io) +{ + rb_io_t *fptr; + char *p, *e; + + RETURN_ENUMERATOR(io, 0, 0); + GetOpenFile(io, fptr); + + for (;;) { + p = fptr->rbuf+fptr->rbuf_off; + e = p + fptr->rbuf_len; + while (p < e) { + fptr->rbuf_off++; + fptr->rbuf_len--; + rb_yield(INT2FIX(*p & 0xff)); + p++; + errno = 0; + } + rb_io_check_readable(fptr); + READ_CHECK(fptr); + if (io_fillbuf(fptr) < 0) { + break; + } + } + return io; +} + +static VALUE +io_getc(rb_io_t *fptr, rb_encoding *enc) +{ + int r, n, cr = 0; + VALUE str; + + if (NEED_READCONV(fptr)) { + VALUE str = Qnil; + + make_readconv(fptr, 0); + + while (1) { + if (fptr->cbuf_len) { + if (fptr->encs.enc) + r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + fptr->encs.enc); + else + r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); + if (!MBCLEN_NEEDMORE_P(r)) + break; + if (fptr->cbuf_len == fptr->cbuf_capa) { + rb_raise(rb_eIOError, "too long character"); + } + } + + if (more_char(fptr) == -1) { + if (fptr->cbuf_len == 0) + return Qnil; + /* return an incomplete character just before EOF */ + return io_shift_cbuf(fptr, fptr->cbuf_len, &str); + } + } + if (MBCLEN_INVALID_P(r)) { + r = rb_enc_mbclen(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + fptr->encs.enc); + return io_shift_cbuf(fptr, r, &str); + } + return io_shift_cbuf(fptr, MBCLEN_CHARFOUND_LEN(r), &str); + } + + if (io_fillbuf(fptr) < 0) { + return Qnil; + } + if (rb_enc_asciicompat(enc) && ISASCII(fptr->rbuf[fptr->rbuf_off])) { + str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1); + fptr->rbuf_off += 1; + fptr->rbuf_len -= 1; + cr = ENC_CODERANGE_7BIT; + } + else { + r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); + if (MBCLEN_CHARFOUND_P(r) && + (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) { + str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + cr = ENC_CODERANGE_VALID; + } + else if (MBCLEN_NEEDMORE_P(r)) { + str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len); + fptr->rbuf_len = 0; + getc_needmore: + if (io_fillbuf(fptr) != -1) { + rb_str_cat(str, fptr->rbuf+fptr->rbuf_off, 1); + fptr->rbuf_off++; + fptr->rbuf_len--; + r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc); + if (MBCLEN_NEEDMORE_P(r)) { + goto getc_needmore; + } + else if (MBCLEN_CHARFOUND_P(r)) { + cr = ENC_CODERANGE_VALID; + } + } + } + else { + str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1); + fptr->rbuf_off++; + fptr->rbuf_len--; + } + } + if (!cr) cr = ENC_CODERANGE_BROKEN; + str = io_enc_str(str, fptr); + ENC_CODERANGE_SET(str, cr); + return str; +} + +/* + * call-seq: + * ios.each_char {|c| block } => ios + * + * Calls the given block once for each character in ios, + * passing the character as an argument. The stream must be opened for + * reading or an IOError will be raised. + * + * f = File.new("testfile") + * f.each_char {|c| print c, ' ' } #=> # + */ + +static VALUE +rb_io_each_char(VALUE io) +{ + rb_io_t *fptr; + rb_encoding *enc; + VALUE c; + + RETURN_ENUMERATOR(io, 0, 0); + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + enc = io_input_encoding(fptr); + READ_CHECK(fptr); + while (!NIL_P(c = io_getc(fptr, enc))) { + rb_yield(c); + } + return io; +} + + + +/* + * call-seq: + * ios.lines(sep=$/) => anEnumerator + * ios.lines(limit) => anEnumerator + * ios.lines(sep, limit) => anEnumerator + * + * Returns an enumerator that gives each line in ios. + * The stream must be opened for reading or an IOError + * will be raised. + * + * f = File.new("testfile") + * f.lines.to_a #=> ["foo\n", "bar\n"] + * f.rewind + * f.lines.sort #=> ["bar\n", "foo\n"] + */ + +static VALUE +rb_io_lines(int argc, VALUE *argv, VALUE io) +{ + return rb_enumeratorize(io, ID2SYM(rb_intern("each_line")), argc, argv); +} + +/* + * call-seq: + * ios.bytes => anEnumerator + * + * Returns an enumerator that gives each byte (0..255) in ios. + * The stream must be opened for reading or an IOError + * will be raised. + * + * f = File.new("testfile") + * f.bytes.to_a #=> [104, 101, 108, 108, 111] + * f.rewind + * f.bytes.sort #=> [101, 104, 108, 108, 111] + */ + +static VALUE +rb_io_bytes(VALUE io) +{ + return rb_enumeratorize(io, ID2SYM(rb_intern("each_byte")), 0, 0); +} + +/* + * call-seq: + * ios.chars => anEnumerator + * + * Returns an enumerator that gives each character in ios. + * The stream must be opened for reading or an IOError + * will be raised. + * + * f = File.new("testfile") + * f.chars.to_a #=> ["h", "e", "l", "l", "o"] + * f.rewind + * f.chars.sort #=> ["e", "h", "l", "l", "o"] + */ + +static VALUE +rb_io_chars(VALUE io) +{ + return rb_enumeratorize(io, ID2SYM(rb_intern("each_char")), 0, 0); +} + +/* + * call-seq: + * ios.getc => string or nil + * + * Reads a one-character string from ios. Returns + * nil if called at end of file. + * + * f = File.new("testfile") + * f.getc #=> "h" + * f.getc #=> "e" + */ + +static VALUE +rb_io_getc(VALUE io) +{ + rb_io_t *fptr; + rb_encoding *enc; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + enc = io_input_encoding(fptr); + READ_CHECK(fptr); + return io_getc(fptr, enc); +} + +/* + * call-seq: + * ios.readchar => string + * + * Reads a one-character string from ios. Raises an + * EOFError on end of file. + * + * f = File.new("testfile") + * f.readchar #=> "h" + * f.readchar #=> "e" + */ + +static VALUE +rb_io_readchar(VALUE io) +{ + VALUE c = rb_io_getc(io); + + if (NIL_P(c)) { + rb_eof_error(); + } + return c; +} + +/* + * call-seq: + * ios.getbyte => fixnum or nil + * + * Gets the next 8-bit byte (0..255) from ios. Returns + * nil if called at end of file. + * + * f = File.new("testfile") + * f.getbyte #=> 84 + * f.getbyte #=> 104 + */ + +VALUE +rb_io_getbyte(VALUE io) +{ + rb_io_t *fptr; + int c; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + READ_CHECK(fptr); + if (fptr->fd == 0 && (fptr->mode & FMODE_TTY) && TYPE(rb_stdout) == T_FILE) { + rb_io_t *ofp; + GetOpenFile(rb_stdout, ofp); + if (ofp->mode & FMODE_TTY) { + rb_io_flush(rb_stdout); + } + } + if (io_fillbuf(fptr) < 0) { + return Qnil; + } + fptr->rbuf_off++; + fptr->rbuf_len--; + c = (unsigned char)fptr->rbuf[fptr->rbuf_off-1]; + return INT2FIX(c & 0xff); +} + +/* + * call-seq: + * ios.readbyte => fixnum + * + * Reads a byte as with IO#getbyte, but raises an + * EOFError on end of file. + */ + +static VALUE +rb_io_readbyte(VALUE io) +{ + VALUE c = rb_io_getbyte(io); + + if (NIL_P(c)) { + rb_eof_error(); + } + return c; +} + +/* + * call-seq: + * ios.ungetbyte(string) => nil + * ios.ungetbyte(integer) => nil + * + * Pushes back bytes (passed as a parameter) onto ios, + * such that a subsequent buffered read will return it. Only one byte + * may be pushed back before a subsequent read operation (that is, + * you will be able to read only the last of several bytes that have been pushed + * back). Has no effect with unbuffered reads (such as IO#sysread). + * + * f = File.new("testfile") #=> # + * b = f.getbyte #=> 0x38 + * f.ungetbyte(b) #=> nil + * f.getbyte #=> 0x38 + */ + +VALUE +rb_io_ungetbyte(VALUE io, VALUE b) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + io_unset_eof(fptr); + if (NIL_P(b)) return Qnil; + if (FIXNUM_P(b)) { + char cc = FIX2INT(b); + b = rb_str_new(&cc, 1); + } + else { + SafeStringValue(b); + } + io_ungetbyte(b, fptr); + return Qnil; +} + +/* + * call-seq: + * ios.ungetc(string) => nil + * + * Pushes back one character (passed as a parameter) onto ios, + * such that a subsequent buffered read will return it. Only one character + * may be pushed back before a subsequent read operation (that is, + * you will be able to read only the last of several characters that have been pushed + * back). Has no effect with unbuffered reads (such as IO#sysread). + * + * f = File.new("testfile") #=> # + * c = f.getc #=> "8" + * f.ungetc(c) #=> nil + * f.getc #=> "8" + */ + +VALUE +rb_io_ungetc(VALUE io, VALUE c) +{ + rb_io_t *fptr; + long len; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + io_unset_eof(fptr); + if (NIL_P(c)) return Qnil; + if (FIXNUM_P(c)) { + int cc = FIX2INT(c); + rb_encoding *enc = io_read_encoding(fptr); + char buf[16]; + + c = rb_str_new(buf, rb_enc_mbcput(cc, buf, enc)); + } + else { + SafeStringValue(c); + } + if (NEED_READCONV(fptr)) { + len = RSTRING_LEN(c); + make_readconv(fptr, len); + if (fptr->cbuf_capa - fptr->cbuf_len < len) + rb_raise(rb_eIOError, "ungetc failed"); + if (fptr->cbuf_off < len) { + MEMMOVE(fptr->cbuf+fptr->cbuf_capa-fptr->cbuf_len, + fptr->cbuf+fptr->cbuf_off, + char, fptr->cbuf_len); + fptr->cbuf_off = fptr->cbuf_capa-fptr->cbuf_len; + } + fptr->cbuf_off -= len; + fptr->cbuf_len += len; + MEMMOVE(fptr->cbuf+fptr->cbuf_off, RSTRING_PTR(c), char, len); + } + else { + io_ungetbyte(c, fptr); + } + return Qnil; +} + +/* + * call-seq: + * ios.isatty => true or false + * ios.tty? => true or false + * + * Returns true if ios is associated with a + * terminal device (tty), false otherwise. + * + * File.new("testfile").isatty #=> false + * File.new("/dev/tty").isatty #=> true + */ + +static VALUE +rb_io_isatty(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (isatty(fptr->fd) == 0) + return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * ios.close_on_exec? => true or false + * + * Returns true if ios will be closed on exec. + * + * f = open("/dev/null") + * f.close_on_exec? #=> false + * f.close_on_exec = true + * f.close_on_exec? #=> true + * f.close_on_exec = false + * f.close_on_exec? #=> false + */ + +static VALUE +rb_io_close_on_exec_p(VALUE io) +{ +#if defined(HAVE_FCNTL) && defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) + rb_io_t *fptr; + VALUE write_io; + int fd, ret; + + write_io = GetWriteIO(io); + if (io != write_io) { + GetOpenFile(write_io, fptr); + if (fptr && 0 <= (fd = fptr->fd)) { + if ((ret = fcntl(fd, F_GETFD)) == -1) rb_sys_fail_path(fptr->pathv); + if (!(ret & FD_CLOEXEC)) return Qfalse; + } + } + + GetOpenFile(io, fptr); + if (fptr && 0 <= (fd = fptr->fd)) { + if ((ret = fcntl(fd, F_GETFD)) == -1) rb_sys_fail_path(fptr->pathv); + if (!(ret & FD_CLOEXEC)) return Qfalse; + } + return Qtrue; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * ios.close_on_exec = bool => true or false + * + * Sets a close-on-exec flag. + * + * f = open("/dev/null") + * f.close_on_exec = true + * system("cat", "/proc/self/fd/#{f.fileno}") # cat: /proc/self/fd/3: No such file or directory + * f.closed? #=> false + */ + +static VALUE +rb_io_set_close_on_exec(VALUE io, VALUE arg) +{ +#if defined(HAVE_FCNTL) && defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) + int flag = RTEST(arg) ? FD_CLOEXEC : 0; + rb_io_t *fptr; + VALUE write_io; + int fd, ret; + + write_io = GetWriteIO(io); + if (io != write_io) { + GetOpenFile(write_io, fptr); + if (fptr && 0 <= (fd = fptr->fd)) { + if ((ret = fcntl(fptr->fd, F_GETFD)) == -1) rb_sys_fail_path(fptr->pathv); + if ((ret & FD_CLOEXEC) != flag) { + ret = (ret & ~FD_CLOEXEC) | flag; + ret = fcntl(fd, F_SETFD, ret); + if (ret == -1) rb_sys_fail_path(fptr->pathv); + } + } + + } + + GetOpenFile(io, fptr); + if (fptr && 0 <= (fd = fptr->fd)) { + if ((ret = fcntl(fd, F_GETFD)) == -1) rb_sys_fail_path(fptr->pathv); + if ((ret & FD_CLOEXEC) != flag) { + ret = (ret & ~FD_CLOEXEC) | flag; + ret = fcntl(fd, F_SETFD, ret); + if (ret == -1) rb_sys_fail_path(fptr->pathv); + } + } +#else + rb_notimplement(); +#endif + return Qnil; +} + +#define FMODE_PREP (1<<16) +#define IS_PREP_STDIO(f) ((f)->mode & FMODE_PREP) +#define PREP_STDIO_NAME(f) (RSTRING_PTR((f)->pathv)) + +static VALUE +finish_writeconv(rb_io_t *fptr, int noalloc) +{ + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + + if (!fptr->wbuf) { + unsigned char buf[1024]; + int r; + + res = econv_destination_buffer_full; + while (res == econv_destination_buffer_full) { + ds = dp = buf; + de = buf + sizeof(buf); + res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0); + while (dp-ds) { + retry: + r = rb_write_internal(fptr->fd, ds, dp-ds); + if (r == dp-ds) + break; + if (0 <= r) { + ds += r; + } + if (rb_io_wait_writable(fptr->fd)) { + if (fptr->fd < 0) + return noalloc ? Qtrue : rb_exc_new3(rb_eIOError, rb_str_new_cstr("closed stream")); + goto retry; + } + return noalloc ? Qtrue : INT2NUM(errno); + } + if (res == econv_invalid_byte_sequence || + res == econv_incomplete_input || + res == econv_undefined_conversion) { + return noalloc ? Qtrue : rb_econv_make_exception(fptr->writeconv); + } + } + + return Qnil; + } + + res = econv_destination_buffer_full; + while (res == econv_destination_buffer_full) { + if (fptr->wbuf_len == fptr->wbuf_capa) { + if (io_fflush(fptr) < 0) + return noalloc ? Qtrue : INT2NUM(errno); + } + + ds = dp = (unsigned char *)fptr->wbuf + fptr->wbuf_off + fptr->wbuf_len; + de = (unsigned char *)fptr->wbuf + fptr->wbuf_capa; + res = rb_econv_convert(fptr->writeconv, NULL, NULL, &dp, de, 0); + fptr->wbuf_len += dp - ds; + if (res == econv_invalid_byte_sequence || + res == econv_incomplete_input || + res == econv_undefined_conversion) { + return noalloc ? Qtrue : rb_econv_make_exception(fptr->writeconv); + } + } + return Qnil; +} + +struct finish_writeconv_arg { + rb_io_t *fptr; + int noalloc; +}; + +static VALUE +finish_writeconv_sync(VALUE arg) +{ + struct finish_writeconv_arg *p = (struct finish_writeconv_arg *)arg; + return finish_writeconv(p->fptr, p->noalloc); +} + +static void +fptr_finalize(rb_io_t *fptr, int noraise) +{ + VALUE err = Qnil; + if (fptr->writeconv) { + if (fptr->write_lock) { + struct finish_writeconv_arg arg; + arg.fptr = fptr; + arg.noalloc = noraise; + err = rb_mutex_synchronize(fptr->write_lock, finish_writeconv_sync, (VALUE)&arg); + } + else { + err = finish_writeconv(fptr, noraise); + } + } + if (fptr->wbuf_len) { + if (io_fflush(fptr) < 0 && NIL_P(err)) + err = noraise ? Qtrue : INT2NUM(errno); + } + if (IS_PREP_STDIO(fptr) || fptr->fd <= 2) { + goto skip_fd_close; + } + if (fptr->stdio_file) { + /* fptr->stdio_file is deallocated anyway + * even if fclose failed. */ + if (fclose(fptr->stdio_file) < 0 && NIL_P(err)) + err = noraise ? Qtrue : INT2NUM(errno); + } + else if (0 <= fptr->fd) { + /* fptr->fd may be closed even if close fails. + * POSIX doesn't specify it. + * We assumes it is closed. */ + if (close(fptr->fd) < 0 && NIL_P(err)) + err = noraise ? Qtrue : INT2NUM(errno); + } + skip_fd_close: + fptr->fd = -1; + fptr->stdio_file = 0; + fptr->mode &= ~(FMODE_READABLE|FMODE_WRITABLE); + + if (!NIL_P(err) && !noraise) { + switch(TYPE(err)) { + case T_FIXNUM: + case T_BIGNUM: + errno = NUM2INT(err); + rb_sys_fail_path(fptr->pathv); + + default: + rb_exc_raise(err); + } + } +} + +static void +rb_io_fptr_cleanup(rb_io_t *fptr, int noraise) +{ + if (fptr->finalize) { + (*fptr->finalize)(fptr, noraise); + } + else { + fptr_finalize(fptr, noraise); + } +} + +static void +clear_readconv(rb_io_t *fptr) +{ + if (fptr->readconv) { + rb_econv_close(fptr->readconv); + fptr->readconv = NULL; + } + if (fptr->cbuf) { + free(fptr->cbuf); + fptr->cbuf = NULL; + } +} + +static void +clear_writeconv(rb_io_t *fptr) +{ + if (fptr->writeconv) { + rb_econv_close(fptr->writeconv); + fptr->writeconv = NULL; + } + fptr->writeconv_initialized = 0; +} + +static void +clear_codeconv(rb_io_t *fptr) +{ + clear_readconv(fptr); + clear_writeconv(fptr); +} + +int +rb_io_fptr_finalize(rb_io_t *fptr) +{ + if (!fptr) return 0; + fptr->pathv = Qnil; + fptr->write_lock = 0; + if (0 <= fptr->fd) + rb_io_fptr_cleanup(fptr, Qtrue); + if (fptr->rbuf) { + free(fptr->rbuf); + fptr->rbuf = 0; + } + if (fptr->wbuf) { + free(fptr->wbuf); + fptr->wbuf = 0; + } + clear_codeconv(fptr); + free(fptr); + return 1; +} + +VALUE +rb_io_close(VALUE io) +{ + rb_io_t *fptr; + int fd; + VALUE write_io; + rb_io_t *write_fptr; + + write_io = GetWriteIO(io); + if (io != write_io) { + write_fptr = RFILE(write_io)->fptr; + if (write_fptr && 0 <= write_fptr->fd) { + rb_io_fptr_cleanup(write_fptr, Qtrue); + } + } + + fptr = RFILE(io)->fptr; + if (!fptr) return Qnil; + if (fptr->fd < 0) return Qnil; + + fd = fptr->fd; + rb_io_fptr_cleanup(fptr, Qfalse); + rb_thread_fd_close(fd); + + if (fptr->pid) { + rb_syswait(fptr->pid); + fptr->pid = 0; + } + + return Qnil; +} + +/* + * call-seq: + * ios.close => nil + * + * Closes ios and flushes any pending writes to the operating + * system. The stream is unavailable for any further data operations; + * an IOError is raised if such an attempt is made. I/O + * streams are automatically closed when they are claimed by the + * garbage collector. + * + * If ios is opened by IO.popen, + * close sets $?. + */ + +static VALUE +rb_io_close_m(VALUE io) +{ + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(io)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + rb_io_check_closed(RFILE(io)->fptr); + rb_io_close(io); + return Qnil; +} + +static VALUE +io_call_close(VALUE io) +{ + return rb_funcall(io, rb_intern("close"), 0, 0); +} + +static VALUE +io_close(VALUE io) +{ + return rb_rescue(io_call_close, io, 0, 0); +} + +/* + * call-seq: + * ios.closed? => true or false + * + * Returns true if ios is completely closed (for + * duplex streams, both reader and writer), false + * otherwise. + * + * f = File.new("testfile") + * f.close #=> nil + * f.closed? #=> true + * f = IO.popen("/bin/sh","r+") + * f.close_write #=> nil + * f.closed? #=> false + * f.close_read #=> nil + * f.closed? #=> true + */ + + +static VALUE +rb_io_closed(VALUE io) +{ + rb_io_t *fptr; + VALUE write_io; + rb_io_t *write_fptr; + + write_io = GetWriteIO(io); + if (io != write_io) { + write_fptr = RFILE(write_io)->fptr; + if (write_fptr && 0 <= write_fptr->fd) { + return Qfalse; + } + } + + fptr = RFILE(io)->fptr; + rb_io_check_initialized(fptr); + return 0 <= fptr->fd ? Qfalse : Qtrue; +} + +/* + * call-seq: + * ios.close_read => nil + * + * Closes the read end of a duplex I/O stream (i.e., one that contains + * both a read and a write stream, such as a pipe). Will raise an + * IOError if the stream is not duplexed. + * + * f = IO.popen("/bin/sh","r+") + * f.close_read + * f.readlines + * + * produces: + * + * prog.rb:3:in `readlines': not opened for reading (IOError) + * from prog.rb:3 + */ + +static VALUE +rb_io_close_read(VALUE io) +{ + rb_io_t *fptr; + VALUE write_io; + + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(io)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + GetOpenFile(io, fptr); + if (is_socket(fptr->fd, fptr->pathv)) { +#ifndef SHUT_RD +# define SHUT_RD 0 +#endif + if (shutdown(fptr->fd, SHUT_RD) < 0) + rb_sys_fail_path(fptr->pathv); + fptr->mode &= ~FMODE_READABLE; + if (!(fptr->mode & FMODE_WRITABLE)) + return rb_io_close(io); + return Qnil; + } + + write_io = GetWriteIO(io); + if (io != write_io) { + rb_io_t *wfptr; + rb_io_fptr_cleanup(fptr, Qfalse); + GetOpenFile(write_io, wfptr); + RFILE(io)->fptr = wfptr; + RFILE(write_io)->fptr = NULL; + rb_io_fptr_finalize(fptr); + return Qnil; + } + + if (fptr->mode & FMODE_WRITABLE) { + rb_raise(rb_eIOError, "closing non-duplex IO for reading"); + } + return rb_io_close(io); +} + +/* + * call-seq: + * ios.close_write => nil + * + * Closes the write end of a duplex I/O stream (i.e., one that contains + * both a read and a write stream, such as a pipe). Will raise an + * IOError if the stream is not duplexed. + * + * f = IO.popen("/bin/sh","r+") + * f.close_write + * f.print "nowhere" + * + * produces: + * + * prog.rb:3:in `write': not opened for writing (IOError) + * from prog.rb:3:in `print' + * from prog.rb:3 + */ + +static VALUE +rb_io_close_write(VALUE io) +{ + rb_io_t *fptr; + VALUE write_io; + + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(io)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + write_io = GetWriteIO(io); + GetOpenFile(write_io, fptr); + if (is_socket(fptr->fd, fptr->pathv)) { +#ifndef SHUT_WR +# define SHUT_WR 1 +#endif + if (shutdown(fptr->fd, SHUT_WR) < 0) + rb_sys_fail_path(fptr->pathv); + fptr->mode &= ~FMODE_WRITABLE; + if (!(fptr->mode & FMODE_READABLE)) + return rb_io_close(write_io); + return Qnil; + } + + if (fptr->mode & FMODE_READABLE) { + rb_raise(rb_eIOError, "closing non-duplex IO for writing"); + } + + rb_io_close(write_io); + if (io != write_io) { + GetOpenFile(io, fptr); + fptr->tied_io_for_writing = 0; + fptr->mode &= ~FMODE_DUPLEX; + } + return Qnil; +} + +/* + * call-seq: + * ios.sysseek(offset, whence=SEEK_SET) => integer + * + * Seeks to a given offset in the stream according to the value + * of whence (see IO#seek for values of + * whence). Returns the new offset into the file. + * + * f = File.new("testfile") + * f.sysseek(-13, IO::SEEK_END) #=> 53 + * f.sysread(10) #=> "And so on." + */ + +static VALUE +rb_io_sysseek(int argc, VALUE *argv, VALUE io) +{ + VALUE offset, ptrname; + int whence = SEEK_SET; + rb_io_t *fptr; + off_t pos; + + if (rb_scan_args(argc, argv, "11", &offset, &ptrname) == 2) { + whence = NUM2INT(ptrname); + } + pos = NUM2OFFT(offset); + GetOpenFile(io, fptr); + if ((fptr->mode & FMODE_READABLE) && READ_DATA_BUFFERED(fptr)) { + rb_raise(rb_eIOError, "sysseek for buffered IO"); + } + if ((fptr->mode & FMODE_WRITABLE) && fptr->wbuf_len) { + rb_warn("sysseek for buffered IO"); + } + pos = lseek(fptr->fd, pos, whence); + if (pos == -1) rb_sys_fail_path(fptr->pathv); + + return OFFT2NUM(pos); +} + +/* + * call-seq: + * ios.syswrite(string) => integer + * + * Writes the given string to ios using a low-level write. + * Returns the number of bytes written. Do not mix with other methods + * that write to ios or you may get unpredictable results. + * Raises SystemCallError on error. + * + * f = File.new("out", "w") + * f.syswrite("ABCDEF") #=> 6 + */ + +static VALUE +rb_io_syswrite(VALUE io, VALUE str) +{ + rb_io_t *fptr; + long n; + + rb_secure(4); + if (TYPE(str) != T_STRING) + str = rb_obj_as_string(str); + + io = GetWriteIO(io); + GetOpenFile(io, fptr); + rb_io_check_writable(fptr); + + if (fptr->wbuf_len) { + rb_warn("syswrite for buffered IO"); + } + if (!rb_thread_fd_writable(fptr->fd)) { + rb_io_check_closed(fptr); + } + + n = write(fptr->fd, RSTRING_PTR(str), RSTRING_LEN(str)); + + if (n == -1) rb_sys_fail_path(fptr->pathv); + + return LONG2FIX(n); +} + +/* + * call-seq: + * ios.sysread(integer[, outbuf]) => string + * + * Reads integer bytes from ios using a low-level + * read and returns them as a string. Do not mix with other methods + * that read from ios or you may get unpredictable results. + * If the optional outbuf argument is present, it must reference + * a String, which will receive the data. + * Raises SystemCallError on error and + * EOFError at end of file. + * + * f = File.new("testfile") + * f.sysread(16) #=> "This is line one" + */ + +static VALUE +rb_io_sysread(int argc, VALUE *argv, VALUE io) +{ + VALUE len, str; + rb_io_t *fptr; + long n, ilen; + + rb_scan_args(argc, argv, "11", &len, &str); + ilen = NUM2LONG(len); + + if (NIL_P(str)) { + str = rb_str_new(0, ilen); + } + else { + StringValue(str); + rb_str_modify(str); + rb_str_resize(str, ilen); + } + if (ilen == 0) return str; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (READ_DATA_BUFFERED(fptr)) { + rb_raise(rb_eIOError, "sysread for buffered IO"); + } + + n = fptr->fd; + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + if (RSTRING_LEN(str) != ilen) { + rb_raise(rb_eRuntimeError, "buffer string modified"); + } + + n = rb_read_internal(fptr->fd, RSTRING_PTR(str), ilen); + + if (n == -1) { + rb_sys_fail_path(fptr->pathv); + } + rb_str_set_len(str, n); + if (n == 0 && ilen > 0) { + rb_eof_error(); + } + rb_str_resize(str, n); + OBJ_TAINT(str); + + return str; +} + +VALUE +rb_io_binmode(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (fptr->readconv) + rb_econv_binmode(fptr->readconv); + if (fptr->writeconv) + rb_econv_binmode(fptr->writeconv); + fptr->mode |= FMODE_BINMODE; + fptr->mode &= ~FMODE_TEXTMODE; + fptr->writeconv_pre_ecflags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR); + return io; +} + +VALUE +rb_io_ascii8bit_binmode(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (fptr->readconv) { + rb_econv_close(fptr->readconv); + fptr->readconv = NULL; + } + if (fptr->writeconv) { + rb_econv_close(fptr->writeconv); + fptr->writeconv = NULL; + } + fptr->mode |= FMODE_BINMODE; + fptr->mode &= ~FMODE_TEXTMODE; + + fptr->encs.enc = rb_ascii8bit_encoding(); + fptr->encs.enc2 = NULL; + fptr->encs.ecflags = 0; + fptr->encs.ecopts = Qnil; + clear_codeconv(fptr); + + return io; +} + +/* + * call-seq: + * ios.binmode => ios + * + * Puts ios into binary mode. + * Once a stream is in binary mode, it cannot be reset to nonbinary mode. + * + * - newline conversion disabled + * - encoding conversion disabled + * - content is treated as ASCII-8BIT + * + */ + +static VALUE +rb_io_binmode_m(VALUE io) +{ + VALUE write_io; + + rb_io_ascii8bit_binmode(io); + + write_io = GetWriteIO(io); + if (write_io != io) + rb_io_ascii8bit_binmode(write_io); + return io; +} + +/* + * call-seq: + * ios.binmode? => true or false + * + * Returns true if ios is binmode. + */ +static VALUE +rb_io_binmode_p(VALUE io) +{ + rb_io_t *fptr; + GetOpenFile(io, fptr); + return fptr->mode & FMODE_BINMODE ? Qtrue : Qfalse; +} + +static const char* +rb_io_fmode_modestr(int fmode) +{ +# define MODE_BTMODE(a,b,c) ((fmode & FMODE_BINMODE) ? (b) : \ + (fmode & FMODE_TEXTMODE) ? (c) : (a)) + if (fmode & FMODE_APPEND) { + if ((fmode & FMODE_READWRITE) == FMODE_READWRITE) { + return MODE_BTMODE("a+", "ab+", "at+"); + } + return MODE_BTMODE("a", "ab", "at"); + } + switch (fmode & FMODE_READWRITE) { + case FMODE_READABLE: + return MODE_BTMODE("r", "rb", "rt"); + case FMODE_WRITABLE: + return MODE_BTMODE("w", "wb", "wt"); + case FMODE_READWRITE: + if (fmode & FMODE_CREATE) { + return MODE_BTMODE("w+", "wb+", "wt+"); + } + return MODE_BTMODE("r+", "rb+", "rt+"); + } + rb_raise(rb_eArgError, "invalid access fmode 0x%x", fmode); + return NULL; /* not reached */ +} + +int +rb_io_modestr_fmode(const char *modestr) +{ + int fmode = 0; + const char *m = modestr; + + switch (*m++) { + case 'r': + fmode |= FMODE_READABLE; + break; + case 'w': + fmode |= FMODE_WRITABLE | FMODE_TRUNC | FMODE_CREATE; + break; + case 'a': + fmode |= FMODE_WRITABLE | FMODE_APPEND | FMODE_CREATE; + break; + default: + error: + rb_raise(rb_eArgError, "invalid access mode %s", modestr); + } + + while (*m) { + switch (*m++) { + case 'b': + fmode |= FMODE_BINMODE; + break; + case 't': + fmode |= FMODE_TEXTMODE; + break; + case '+': + fmode |= FMODE_READWRITE; + break; + default: + goto error; + case ':': + goto finished; + } + } + + finished: + if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE)) + goto error; + + return fmode; +} + +int +rb_io_oflags_fmode(int oflags) +{ + int fmode = 0; + + switch (oflags & (O_RDONLY|O_WRONLY|O_RDWR)) { + case O_RDONLY: + fmode = FMODE_READABLE; + break; + case O_WRONLY: + fmode = FMODE_WRITABLE; + break; + case O_RDWR: + fmode = FMODE_READWRITE; + break; + } + + if (oflags & O_APPEND) { + fmode |= FMODE_APPEND; + } + if (oflags & O_TRUNC) { + fmode |= FMODE_TRUNC; + } + if (oflags & O_CREAT) { + fmode |= FMODE_CREATE; + } +#ifdef O_BINARY + if (oflags & O_BINARY) { + fmode |= FMODE_BINMODE; + } +#endif + + return fmode; +} + +static int +rb_io_fmode_oflags(int fmode) +{ + int oflags = 0; + + switch (fmode & FMODE_READWRITE) { + case FMODE_READABLE: + oflags |= O_RDONLY; + break; + case FMODE_WRITABLE: + oflags |= O_WRONLY; + break; + case FMODE_READWRITE: + oflags |= O_RDWR; + break; + } + + if (fmode & FMODE_APPEND) { + oflags |= O_APPEND; + } + if (fmode & FMODE_TRUNC) { + oflags |= O_TRUNC; + } + if (fmode & FMODE_CREATE) { + oflags |= O_CREAT; + } +#ifdef O_BINARY + if (fmode & FMODE_BINMODE) { + oflags |= O_BINARY; + } +#endif + + return oflags; +} + +int +rb_io_modestr_oflags(const char *modestr) +{ + return rb_io_fmode_oflags(rb_io_modestr_fmode(modestr)); +} + +static const char* +rb_io_oflags_modestr(int oflags) +{ +#ifdef O_BINARY +# define MODE_BINARY(a,b) ((oflags & O_BINARY) ? (b) : (a)) +#else +# define MODE_BINARY(a,b) (a) +#endif + if (oflags & O_APPEND) { + if ((oflags & O_RDWR) == O_RDWR) { + return MODE_BINARY("a+", "ab+"); + } + return MODE_BINARY("a", "ab"); + } + switch (oflags & (O_RDONLY|O_WRONLY|O_RDWR)) { + case O_RDONLY: + return MODE_BINARY("r", "rb"); + case O_WRONLY: + return MODE_BINARY("w", "wb"); + case O_RDWR: + return MODE_BINARY("r+", "rb+"); + } + rb_raise(rb_eArgError, "invalid access oflags 0x%x", oflags); + return NULL; /* not reached */ +} + +/* + * Convert external/internal encodings to enc/enc2 + * NULL => use default encoding + * Qnil => no encoding specified (internal only) + */ +static void +rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2) +{ + int default_ext = 0; + + if (ext == NULL) { + ext = rb_default_external_encoding(); + default_ext = 1; + } + if (intern == NULL && ext != rb_ascii8bit_encoding()) + /* If external is ASCII-8BIT, no default transcoding */ + intern = rb_default_internal_encoding(); + if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) { + /* No internal encoding => use external + no transcoding */ + *enc = (default_ext && intern != ext) ? NULL : ext; + *enc2 = NULL; + } + else { + *enc = intern; + *enc2 = ext; + } +} + +static void +parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p) +{ + const char *p; + char encname[ENCODING_MAXNAMELEN+1]; + int idx, idx2; + rb_encoding *ext_enc, *int_enc; + + /* parse estr as "enc" or "enc2:enc" or "enc:-" */ + + p = strrchr(estr, ':'); + if (p) { + int len = (p++) - estr; + if (len == 0 || len > ENCODING_MAXNAMELEN) + idx = -1; + else { + memcpy(encname, estr, len); + encname[len] = '\0'; + estr = encname; + idx = rb_enc_find_index(encname); + } + } + else + idx = rb_enc_find_index(estr); + + if (idx >= 0) + ext_enc = rb_enc_from_index(idx); + else { + if (idx != -2) + rb_warn("Unsupported encoding %s ignored", estr); + ext_enc = NULL; + } + + int_enc = NULL; + if (p) { + if (*p == '-' && *(p+1) == '\0') { + /* Special case - "-" => no transcoding */ + int_enc = (rb_encoding *)Qnil; + } + else { + idx2 = rb_enc_find_index(p); + if (idx2 < 0) + rb_warn("Unsupported encoding %s ignored", p); + else if (idx2 == idx) { + rb_warn("Ignoring internal encoding %s: it is identical to external encoding %s", p, estr); + int_enc = (rb_encoding *)Qnil; + } + else + int_enc = rb_enc_from_index(idx2); + } + } + + rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p); +} + +static void +mode_enc(rb_io_t *fptr, const char *estr) +{ + clear_codeconv(fptr); + + parse_mode_enc(estr, &fptr->encs.enc, &fptr->encs.enc2); +} + +static void +rb_io_mode_enc(rb_io_t *fptr, const char *modestr) +{ + const char *p = strchr(modestr, ':'); + if (p) { + mode_enc(fptr, p+1); + } +} + +int +rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p) +{ + VALUE encoding=Qnil, extenc=Qundef, intenc=Qundef, tmp; + int extracted = 0; + rb_encoding *extencoding = NULL; + rb_encoding *intencoding = NULL; + + if (!NIL_P(opt)) { + VALUE v; + v = rb_hash_lookup2(opt, sym_encoding, Qnil); + if (v != Qnil) encoding = v; + v = rb_hash_lookup2(opt, sym_extenc, Qundef); + if (v != Qnil) extenc = v; + v = rb_hash_lookup2(opt, sym_intenc, Qundef); + if (v != Qundef) intenc = v; + } + if ((extenc != Qundef || intenc != Qundef) && !NIL_P(encoding)) { + rb_warn("Ignoring encoding parameter '%s': %s_encoding is used", + StringValueCStr(encoding), + extenc == Qundef ? "internal" : "external"); + encoding = Qnil; + } + if (extenc != Qundef && !NIL_P(extenc)) { + extencoding = rb_to_encoding(extenc); + } + if (intenc != Qundef) { + if (NIL_P(intenc)) { + /* internal_encoding: nil => no transcoding */ + intencoding = (rb_encoding *)Qnil; + } + else if (!NIL_P(tmp = rb_check_string_type(intenc))) { + char *p = StringValueCStr(tmp); + + if (*p == '-' && *(p+1) == '\0') { + /* Special case - "-" => no transcoding */ + intencoding = (rb_encoding *)Qnil; + } + else { + intencoding = rb_to_encoding(intenc); + } + } + else { + intencoding = rb_to_encoding(intenc); + } + if (extencoding == intencoding) { + intencoding = (rb_encoding *)Qnil; + } + } + if (!NIL_P(encoding)) { + extracted = 1; + parse_mode_enc(StringValueCStr(encoding), enc_p, enc2_p); + } + else if (extenc != Qundef || intenc != Qundef) { + extracted = 1; + rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p); + } + return extracted; +} + +typedef struct rb_io_enc_t convconfig_t; + +static void +validate_enc_binmode(int fmode, rb_encoding *enc, rb_encoding *enc2) +{ + if ((fmode & FMODE_READABLE) && + !enc2 && + !(fmode & FMODE_BINMODE) && + !rb_enc_asciicompat(enc ? enc : rb_default_external_encoding())) + rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); +} + +static void +extract_binmode(VALUE opthash, int *fmode) +{ + if (!NIL_P(opthash)) { + VALUE v; + v = rb_hash_aref(opthash, sym_textmode); + if (!NIL_P(v) && RTEST(v)) + *fmode |= FMODE_TEXTMODE; + v = rb_hash_aref(opthash, sym_binmode); + if (!NIL_P(v) && RTEST(v)) + *fmode |= FMODE_BINMODE; + + if ((*fmode & FMODE_BINMODE) && (*fmode & FMODE_TEXTMODE)) + rb_raise(rb_eArgError, "both textmode and binmode specified"); + } +} + +static void +rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, + int *oflags_p, int *fmode_p, convconfig_t *convconfig_p) +{ + VALUE vmode; + int oflags, fmode; + rb_encoding *enc, *enc2; + int ecflags; + VALUE ecopts; + int has_enc = 0, has_vmode = 0; + VALUE intmode; + + vmode = *vmode_p; + + /* Set to defaults */ + rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + + if (NIL_P(vmode)) { + fmode = FMODE_READABLE; + oflags = O_RDONLY; + } + else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int"))) { + vmode = intmode; + oflags = NUM2INT(intmode); + fmode = rb_io_oflags_fmode(oflags); + } + else { + const char *p; + + vmode_handle: + SafeStringValue(vmode); + p = StringValueCStr(vmode); + fmode = rb_io_modestr_fmode(p); + oflags = rb_io_fmode_oflags(fmode); + p = strchr(p, ':'); + if (p) { + has_enc = 1; + parse_mode_enc(p+1, &enc, &enc2); + } + else { + rb_encoding *e; + + e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; + rb_io_ext_int_to_encs(e, NULL, &enc, &enc2); + } + } + + if (NIL_P(opthash)) { + ecflags = 0; + ecopts = Qnil; + } + else { + VALUE v; + extract_binmode(opthash, &fmode); +#ifdef O_BINARY + if (fmode & FMODE_BINMODE) + oflags |= O_BINARY; +#endif + if (!has_vmode) { + v = rb_hash_aref(opthash, sym_mode); + if (!NIL_P(v)) { + if (!NIL_P(vmode)) { + rb_raise(rb_eArgError, "mode specified twice"); + } + has_vmode = 1; + vmode = v; + goto vmode_handle; + } + } + v = rb_hash_aref(opthash, sym_perm); + if (!NIL_P(v)) { + if (vperm_p) { + if (!NIL_P(*vperm_p)) { + rb_raise(rb_eArgError, "perm specified twice"); + } + *vperm_p = v; + } + else { + /* perm no use, just ignore */ + } + } + ecflags = rb_econv_prepare_opts(opthash, &ecopts); + + if (rb_io_extract_encoding_option(opthash, &enc, &enc2)) { + if (has_enc) { + rb_raise(rb_eArgError, "encoding specified twice"); + } + } + } + + validate_enc_binmode(fmode, enc, enc2); + + *vmode_p = vmode; + + *oflags_p = oflags; + *fmode_p = fmode; + convconfig_p->enc = enc; + convconfig_p->enc2 = enc2; + convconfig_p->ecflags = ecflags; + convconfig_p->ecopts = ecopts; +} + +struct sysopen_struct { + const char *fname; + int oflags; + mode_t perm; +}; + +static VALUE +sysopen_func(void *ptr) +{ + struct sysopen_struct *data = ptr; + return (VALUE)open(data->fname, data->oflags, data->perm); +} + +static int +rb_sysopen_internal(const char *fname, int oflags, mode_t perm) +{ + struct sysopen_struct data; + data.fname = fname; + data.oflags = oflags; + data.perm = perm; + return (int)rb_thread_blocking_region(sysopen_func, &data, RUBY_UBF_IO, 0); +} + +static int +rb_sysopen(const char *fname, int oflags, mode_t perm) +{ + int fd; + +#ifdef O_BINARY + oflags |= O_BINARY; +#endif + + fd = rb_sysopen_internal(fname, oflags, perm); + if (fd < 0) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + fd = rb_sysopen_internal(fname, oflags, perm); + } + if (fd < 0) { + rb_sys_fail(fname); + } + } + UPDATE_MAXFD(fd); + return fd; +} + +FILE * +rb_fdopen(int fd, const char *modestr) +{ + FILE *file; + +#if defined(sun) + errno = 0; +#endif + file = fdopen(fd, modestr); + if (!file) { + if ( +#if defined(sun) + errno == 0 || +#endif + errno == EMFILE || errno == ENFILE) { + rb_gc(); +#if defined(sun) + errno = 0; +#endif + file = fdopen(fd, modestr); + } + if (!file) { +#ifdef _WIN32 + if (errno == 0) errno = EINVAL; +#elif defined(sun) + if (errno == 0) errno = EMFILE; +#endif + rb_sys_fail(0); + } + } + + /* xxx: should be _IONBF? A buffer in FILE may have trouble. */ +#ifdef USE_SETVBUF + if (setvbuf(file, NULL, _IOFBF, 0) != 0) + rb_warn("setvbuf() can't be honoured (fd=%d)", fd); +#endif + return file; +} + +static void +io_check_tty(rb_io_t *fptr) +{ + if (isatty(fptr->fd)) + fptr->mode |= FMODE_TTY|FMODE_DUPLEX; +} + +static VALUE +rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig_t *convconfig, mode_t perm) +{ + rb_io_t *fptr; + convconfig_t cc; + if (!convconfig) { + /* Set to default encodings */ + rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2); + cc.ecflags = 0; + cc.ecopts = Qnil; + convconfig = &cc; + } + validate_enc_binmode(fmode, convconfig->enc, convconfig->enc2); + + MakeOpenFile(io, fptr); + fptr->mode = fmode; + fptr->encs = *convconfig; + fptr->pathv = rb_str_new_frozen(filename); + fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), oflags, perm); + io_check_tty(fptr); + + return io; +} + +static VALUE +rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) +{ + int fmode = rb_io_modestr_fmode(modestr); + const char *p = strchr(modestr, ':'); + convconfig_t convconfig; + + if (p) { + parse_mode_enc(p+1, &convconfig.enc, &convconfig.enc2); + } + else { + rb_encoding *e; + /* Set to default encodings */ + + e = (fmode & FMODE_BINMODE) ? rb_ascii8bit_encoding() : NULL; + rb_io_ext_int_to_encs(e, NULL, &convconfig.enc, &convconfig.enc2); + convconfig.ecflags = 0; + convconfig.ecopts = Qnil; + } + + return rb_file_open_generic(io, filename, + rb_io_fmode_oflags(fmode), + fmode, + &convconfig, + 0666); +} + +VALUE +rb_file_open_str(VALUE fname, const char *modestr) +{ + FilePathValue(fname); + return rb_file_open_internal(io_alloc(rb_cFile), fname, modestr); +} + +VALUE +rb_file_open(const char *fname, const char *modestr) +{ + return rb_file_open_internal(io_alloc(rb_cFile), rb_str_new_cstr(fname), modestr); +} + +#if defined(__CYGWIN__) || !defined(HAVE_FORK) +static struct pipe_list { + rb_io_t *fptr; + struct pipe_list *next; +} *pipe_list; + +static void +pipe_add_fptr(rb_io_t *fptr) +{ + struct pipe_list *list; + + list = ALLOC(struct pipe_list); + list->fptr = fptr; + list->next = pipe_list; + pipe_list = list; +} + +static void +pipe_del_fptr(rb_io_t *fptr) +{ + struct pipe_list *list = pipe_list; + struct pipe_list *tmp; + + if (list->fptr == fptr) { + pipe_list = list->next; + free(list); + return; + } + + while (list->next) { + if (list->next->fptr == fptr) { + tmp = list->next; + list->next = list->next->next; + free(tmp); + return; + } + list = list->next; + } +} + +static void +pipe_atexit(void) +{ + struct pipe_list *list = pipe_list; + struct pipe_list *tmp; + + while (list) { + tmp = list->next; + rb_io_fptr_finalize(list->fptr); + list = tmp; + } +} + +static void +pipe_finalize(rb_io_t *fptr, int noraise) +{ +#if !defined(HAVE_FORK) && !defined(_WIN32) + int status; + if (fptr->stdio_file) { + status = pclose(fptr->stdio_file); + } + fptr->fd = -1; + fptr->stdio_file = 0; + rb_last_status_set(status, fptr->pid); +#else + fptr_finalize(fptr, noraise); +#endif + pipe_del_fptr(fptr); +} +#endif + +void +rb_io_synchronized(rb_io_t *fptr) +{ + rb_io_check_initialized(fptr); + fptr->mode |= FMODE_SYNC; +} + +void +rb_io_unbuffered(rb_io_t *fptr) +{ + rb_io_synchronized(fptr); +} + +int +rb_pipe(int *pipes) +{ + int ret; + ret = pipe(pipes); + if (ret == -1) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + ret = pipe(pipes); + } + } + if (ret == 0) { + UPDATE_MAXFD(pipes[0]); + UPDATE_MAXFD(pipes[1]); + } + return ret; +} + +#ifdef HAVE_FORK +struct popen_arg { + struct rb_exec_arg *execp; + int modef; + int pair[2]; + int write_pair[2]; +}; + +static void +popen_redirect(struct popen_arg *p) +{ + if ((p->modef & FMODE_READABLE) && (p->modef & FMODE_WRITABLE)) { + close(p->write_pair[1]); + if (p->write_pair[0] != 0) { + dup2(p->write_pair[0], 0); + close(p->write_pair[0]); + } + close(p->pair[0]); + if (p->pair[1] != 1) { + dup2(p->pair[1], 1); + close(p->pair[1]); + } + } + else if (p->modef & FMODE_READABLE) { + close(p->pair[0]); + if (p->pair[1] != 1) { + dup2(p->pair[1], 1); + close(p->pair[1]); + } + } + else { + close(p->pair[1]); + if (p->pair[0] != 0) { + dup2(p->pair[0], 0); + close(p->pair[0]); + } + } +} + +void +rb_close_before_exec(int lowfd, int maxhint, VALUE noclose_fds) +{ + int fd, ret; + int max = max_file_descriptor; + if (max < maxhint) + max = maxhint; + for (fd = lowfd; fd <= max; fd++) { + if (!NIL_P(noclose_fds) && + RTEST(rb_hash_lookup(noclose_fds, INT2FIX(fd)))) + continue; +#ifdef FD_CLOEXEC + ret = fcntl(fd, F_GETFD); + if (ret != -1 && !(ret & FD_CLOEXEC)) { + fcntl(fd, F_SETFD, ret|FD_CLOEXEC); + } +#else + close(fd); +#endif + } +} + +static int +popen_exec(void *pp) +{ + struct popen_arg *p = (struct popen_arg*)pp; + + rb_thread_atfork_before_exec(); + return rb_exec(p->execp); +} +#endif + +static VALUE +pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *modestr, int fmode, convconfig_t *convconfig) +{ + int pid = 0; + rb_io_t *fptr; + VALUE port; + rb_io_t *write_fptr; + VALUE write_port; +#if defined(HAVE_FORK) + int status; + struct popen_arg arg; +#elif defined(_WIN32) + volatile VALUE argbuf; + char **args = NULL; + struct rb_exec_arg sarg; + int pair[2], write_pair[2]; +#endif + FILE *fp = 0; + int fd = -1; + int write_fd = -1; + const char *cmd = 0; + int argc; + VALUE *argv; + + if (prog) + cmd = StringValueCStr(prog); + + if (!eargp) { + /* fork : IO.popen("-") */ + argc = 0; + argv = 0; + } + else if (eargp->argc) { + /* no shell : IO.popen([prog, arg0], arg1, ...) */ + argc = eargp->argc; + argv = eargp->argv; + } + else { + /* with shell : IO.popen(prog) */ + argc = 0; + argv = 0; + } + +#if defined(HAVE_FORK) + arg.execp = eargp; + arg.modef = fmode; + arg.pair[0] = arg.pair[1] = -1; + arg.write_pair[0] = arg.write_pair[1] = -1; + switch (fmode & (FMODE_READABLE|FMODE_WRITABLE)) { + case FMODE_READABLE|FMODE_WRITABLE: + if (rb_pipe(arg.write_pair) < 0) + rb_sys_fail(cmd); + if (rb_pipe(arg.pair) < 0) { + int e = errno; + close(arg.write_pair[0]); + close(arg.write_pair[1]); + errno = e; + rb_sys_fail(cmd); + } + if (eargp) { + rb_exec_arg_addopt(eargp, INT2FIX(0), INT2FIX(arg.write_pair[0])); + rb_exec_arg_addopt(eargp, INT2FIX(1), INT2FIX(arg.pair[1])); + } + break; + case FMODE_READABLE: + if (rb_pipe(arg.pair) < 0) + rb_sys_fail(cmd); + if (eargp) + rb_exec_arg_addopt(eargp, INT2FIX(1), INT2FIX(arg.pair[1])); + break; + case FMODE_WRITABLE: + if (rb_pipe(arg.pair) < 0) + rb_sys_fail(cmd); + if (eargp) + rb_exec_arg_addopt(eargp, INT2FIX(0), INT2FIX(arg.pair[0])); + break; + default: + rb_sys_fail(cmd); + } + if (eargp) { + rb_exec_arg_fixup(arg.execp); + pid = rb_fork(&status, popen_exec, &arg, arg.execp->redirect_fds); + } + else { + fflush(stdin); /* is it really needed? */ + pid = rb_fork(&status, 0, 0, Qnil); + if (pid == 0) { /* child */ + popen_redirect(&arg); + rb_io_synchronized(RFILE(orig_stdout)->fptr); + rb_io_synchronized(RFILE(orig_stderr)->fptr); + return Qnil; + } + } + + /* parent */ + if (pid == -1) { + int e = errno; + close(arg.pair[0]); + close(arg.pair[1]); + if ((fmode & (FMODE_READABLE|FMODE_WRITABLE)) == (FMODE_READABLE|FMODE_WRITABLE)) { + close(arg.write_pair[0]); + close(arg.write_pair[1]); + } + errno = e; + rb_sys_fail(cmd); + } + if ((fmode & FMODE_READABLE) && (fmode & FMODE_WRITABLE)) { + close(arg.pair[1]); + fd = arg.pair[0]; + close(arg.write_pair[0]); + write_fd = arg.write_pair[1]; + } + else if (fmode & FMODE_READABLE) { + close(arg.pair[1]); + fd = arg.pair[0]; + } + else { + close(arg.pair[0]); + fd = arg.pair[1]; + } +#elif defined(_WIN32) + if (argc) { + int i; + + if (argc >= FIXNUM_MAX / sizeof(char *)) { + rb_raise(rb_eArgError, "too many arguments"); + } + argbuf = rb_str_tmp_new((argc+1) * sizeof(char *)); + args = (void *)RSTRING_PTR(argbuf); + for (i = 0; i < argc; ++i) { + args[i] = StringValueCStr(argv[i]); + } + args[i] = NULL; + } + switch (fmode & (FMODE_READABLE|FMODE_WRITABLE)) { + case FMODE_READABLE|FMODE_WRITABLE: + if (rb_pipe(write_pair) < 0) + rb_sys_fail(cmd); + if (rb_pipe(pair) < 0) { + int e = errno; + close(write_pair[0]); + close(write_pair[1]); + errno = e; + rb_sys_fail(cmd); + } + if (eargp) { + rb_exec_arg_addopt(eargp, INT2FIX(0), INT2FIX(write_pair[0])); + rb_exec_arg_addopt(eargp, INT2FIX(1), INT2FIX(pair[1])); + } + break; + case FMODE_READABLE: + if (rb_pipe(pair) < 0) + rb_sys_fail(cmd); + if (eargp) + rb_exec_arg_addopt(eargp, INT2FIX(1), INT2FIX(pair[1])); + break; + case FMODE_WRITABLE: + if (rb_pipe(pair) < 0) + rb_sys_fail(cmd); + if (eargp) + rb_exec_arg_addopt(eargp, INT2FIX(0), INT2FIX(pair[0])); + break; + default: + rb_sys_fail(cmd); + } + if (eargp) { + rb_exec_arg_fixup(eargp); + rb_run_exec_options(eargp, &sarg); + } + while ((pid = (args ? + rb_w32_aspawn(P_NOWAIT, 0, args) : + rb_w32_spawn(P_NOWAIT, cmd, 0))) == -1) { + /* exec failed */ + switch (errno) { + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + rb_thread_sleep(1); + break; + default: + { + int e = errno; + if (eargp) + rb_run_exec_options(&sarg, NULL); + close(pair[0]); + close(pair[1]); + if ((fmode & (FMODE_READABLE|FMODE_WRITABLE)) == (FMODE_READABLE|FMODE_WRITABLE)) { + close(write_pair[0]); + close(write_pair[1]); + } + errno = e; + rb_sys_fail(cmd); + } + break; + } + } + if (eargp) + rb_run_exec_options(&sarg, NULL); + if ((fmode & FMODE_READABLE) && (fmode & FMODE_WRITABLE)) { + close(pair[1]); + fd = pair[0]; + close(write_pair[0]); + write_fd = write_pair[1]; + } + else if (fmode & FMODE_READABLE) { + close(pair[1]); + fd = pair[0]; + } + else { + close(pair[0]); + fd = pair[1]; + } +#else + if (argc) { + prog = rb_ary_join(rb_ary_new4(argc, argv), rb_str_new2(" ")); + cmd = StringValueCStr(prog); + } + if (eargp) { + rb_exec_arg_fixup(eargp); + rb_run_exec_options(eargp, &sarg); + } + fp = popen(cmd, modestr); + if (eargp) + rb_run_exec_options(&sarg, NULL); + if (!fp) rb_sys_fail(RSTRING_PTR(prog)); + fd = fileno(fp); +#endif + + port = io_alloc(rb_cIO); + MakeOpenFile(port, fptr); + fptr->fd = fd; + fptr->stdio_file = fp; + fptr->mode = fmode | FMODE_SYNC|FMODE_DUPLEX; + if (convconfig) { + fptr->encs = *convconfig; + } + fptr->pid = pid; + + if (0 <= write_fd) { + write_port = io_alloc(rb_cIO); + MakeOpenFile(write_port, write_fptr); + write_fptr->fd = write_fd; + write_fptr->mode = (fmode & ~FMODE_READABLE)| FMODE_SYNC|FMODE_DUPLEX; + fptr->mode &= ~FMODE_WRITABLE; + fptr->tied_io_for_writing = write_port; + rb_ivar_set(port, rb_intern("@tied_io_for_writing"), write_port); + } + +#if defined (__CYGWIN__) || !defined(HAVE_FORK) + fptr->finalize = pipe_finalize; + pipe_add_fptr(fptr); +#endif + return port; +} + +static VALUE +pipe_open_v(int argc, VALUE *argv, const char *modestr, int fmode, convconfig_t *convconfig) +{ + VALUE prog; + struct rb_exec_arg earg; + prog = rb_exec_arg_init(argc, argv, Qfalse, &earg); + return pipe_open(&earg, prog, modestr, fmode, convconfig); +} + +static VALUE +pipe_open_s(VALUE prog, const char *modestr, int fmode, convconfig_t *convconfig) +{ + const char *cmd = RSTRING_PTR(prog); + int argc = 1; + VALUE *argv = &prog; + struct rb_exec_arg earg; + + if (RSTRING_LEN(prog) == 1 && cmd[0] == '-') { +#if !defined(HAVE_FORK) + rb_raise(rb_eNotImpError, + "fork() function is unimplemented on this machine"); +#endif + return pipe_open(0, 0, modestr, fmode, convconfig); + } + + rb_exec_arg_init(argc, argv, Qtrue, &earg); + return pipe_open(&earg, prog, modestr, fmode, convconfig); +} + +static VALUE +pop_last_hash(int *argc_p, VALUE *argv) +{ + VALUE last, tmp; + if (*argc_p == 0) + return Qnil; + last = argv[*argc_p-1]; + tmp = rb_check_convert_type(last, T_HASH, "Hash", "to_hash"); + if (NIL_P(tmp)) + return Qnil; + (*argc_p)--; + return tmp; +} + +/* + * call-seq: + * IO.popen(cmd, mode="r" [, opt]) => io + * IO.popen(cmd, mode="r" [, opt]) {|io| block } => obj + * + * Runs the specified command as a subprocess; the subprocess's + * standard input and output will be connected to the returned + * IO object. If _cmd_ is a +String+ + * ``-'', then a new instance of Ruby is started as the + * subprocess. If cmd is an +Array+ of +String+, then it will + * be used as the subprocess's +argv+ bypassing a shell. + * The array can contains a hash at first for environments and + * a hash at last for options similar to spawn. The default + * mode for the new file object is ``r'', but mode may be set + * to any of the modes listed in the description for class IO. + * The last argument opt qualifies mode. + * + * # set IO encoding + * nkf_io = IO.popen("nkf -e filename", :external_encoding=>"EUC-JP") + * euc_jp_string = nkf_io.read + * + * # discard standard error using spawn option. + * # See the document of Kernel.spawn. + * ls_io = IO.popen(["ls", "/", :err=>"/dev/null"]) + * ls_result_with_error = ls_io.read + * + * Raises exceptions which IO.pipe and + * Kernel.spawn raise. + * + * If a block is given, Ruby will run the command as a child connected + * to Ruby with a pipe. Ruby's end of the pipe will be passed as a + * parameter to the block. + * At the end of block, Ruby close the pipe and sets $?. + * In this case IO.popen returns + * the value of the block. + * + * If a block is given with a _cmd_ of ``-'', + * the block will be run in two separate processes: once in the parent, + * and once in a child. The parent process will be passed the pipe + * object as a parameter to the block, the child version of the block + * will be passed nil, and the child's standard in and + * standard out will be connected to the parent through the pipe. Not + * available on all platforms. + * + * f = IO.popen("uname") + * p f.readlines + * puts "Parent is #{Process.pid}" + * IO.popen("date") { |f| puts f.gets } + * IO.popen("-") {|f| $stderr.puts "#{Process.pid} is here, f is #{f}"} + * p $? + * IO.popen(%w"sed -e s|^|| -e s&$&;zot;&", "r+") {|f| + * f.puts "bar"; f.close_write; puts f.gets + * } + * + * produces: + * + * ["Linux\n"] + * Parent is 26166 + * Wed Apr 9 08:53:52 CDT 2003 + * 26169 is here, f is + * 26166 is here, f is # + * # + * bar;zot; + */ + +static VALUE +rb_io_s_popen(int argc, VALUE *argv, VALUE klass) +{ + const char *modestr; + VALUE pname, pmode, port, tmp, opt; + int oflags, fmode; + convconfig_t convconfig; + + opt = pop_last_hash(&argc, argv); + rb_scan_args(argc, argv, "11", &pname, &pmode); + + rb_io_extract_modeenc(&pmode, 0, opt, &oflags, &fmode, &convconfig); + modestr = rb_io_oflags_modestr(oflags); + + tmp = rb_check_array_type(pname); + if (!NIL_P(tmp)) { + tmp = rb_ary_dup(tmp); + RBASIC(tmp)->klass = 0; + port = pipe_open_v(RARRAY_LEN(tmp), RARRAY_PTR(tmp), modestr, fmode, &convconfig); + rb_ary_clear(tmp); + } + else { + SafeStringValue(pname); + port = pipe_open_s(pname, modestr, fmode, &convconfig); + } + if (NIL_P(port)) { + /* child */ + if (rb_block_given_p()) { + rb_yield(Qnil); + rb_io_flush(rb_stdout); + rb_io_flush(rb_stderr); + _exit(0); + } + return Qnil; + } + RBASIC(port)->klass = klass; + if (rb_block_given_p()) { + return rb_ensure(rb_yield, port, io_close, port); + } + return port; +} + +static void +rb_scan_open_args(int argc, VALUE *argv, + VALUE *fname_p, int *oflags_p, int *fmode_p, + convconfig_t *convconfig_p, mode_t *perm_p) +{ + VALUE opt=Qnil, fname, vmode, vperm; + int oflags, fmode; + mode_t perm; + + opt = pop_last_hash(&argc, argv); + rb_scan_args(argc, argv, "12", &fname, &vmode, &vperm); + FilePathValue(fname); +#if defined _WIN32 || defined __APPLE__ + { + static rb_encoding *fs_encoding; + rb_encoding *fname_encoding = rb_enc_get(fname); + if (!fs_encoding) + fs_encoding = rb_filesystem_encoding(); + if (rb_usascii_encoding() != fname_encoding + && rb_ascii8bit_encoding() != fname_encoding +#if defined __APPLE__ + && rb_utf8_encoding() != fname_encoding +#endif + && fs_encoding != fname_encoding) { + static VALUE fs_enc; + if (!fs_enc) + fs_enc = rb_enc_from_encoding(fs_encoding); + fname = rb_str_encode(fname, fs_enc, 0, Qnil); + } + } +#endif + + rb_io_extract_modeenc(&vmode, &vperm, opt, &oflags, &fmode, convconfig_p); + + perm = NIL_P(vperm) ? 0666 : NUM2UINT(vperm); + + *fname_p = fname; + *oflags_p = oflags; + *fmode_p = fmode; + *perm_p = perm; +} + +static VALUE +rb_open_file(int argc, VALUE *argv, VALUE io) +{ + VALUE fname; + int oflags, fmode; + convconfig_t convconfig; + mode_t perm; + + rb_scan_open_args(argc, argv, &fname, &oflags, &fmode, &convconfig, &perm); + rb_file_open_generic(io, fname, oflags, fmode, &convconfig, perm); + + return io; +} + + +/* + * Document-method: File::open + * + * call-seq: + * File.open(filename, mode="r" [, opt]) => file + * File.open(filename [, mode [, perm]] [, opt]) => file + * File.open(filename, mode="r" [, opt]) {|file| block } => obj + * File.open(filename [, mode [, perm]] [, opt]) {|file| block } => obj + * + * With no associated block, open is a synonym for + * File.new. If the optional code block is given, it will + * be passed file as an argument, and the File object will + * automatically be closed when the block terminates. In this instance, + * File.open returns the value of the block. + */ + +/* + * Document-method: IO::open + * + * call-seq: + * IO.open(fd, mode_string="r" [, opt] ) => io + * IO.open(fd, mode_string="r" [, opt] ) {|io| block } => obj + * + * With no associated block, open is a synonym for + * IO.new. If the optional code block is given, it will + * be passed io as an argument, and the IO object will + * automatically be closed when the block terminates. In this instance, + * IO.open returns the value of the block. + * + */ + +static VALUE +rb_io_s_open(int argc, VALUE *argv, VALUE klass) +{ + VALUE io = rb_class_new_instance(argc, argv, klass); + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, io, io_close, io); + } + + return io; +} + +/* + * call-seq: + * IO.sysopen(path, [mode, [perm]]) => fixnum + * + * Opens the given path, returning the underlying file descriptor as a + * Fixnum. + * + * IO.sysopen("testfile") #=> 3 + * + */ + +static VALUE +rb_io_s_sysopen(int argc, VALUE *argv) +{ + VALUE fname, vmode, vperm; + VALUE intmode; + int oflags, fd; + mode_t perm; + char *path; + + rb_scan_args(argc, argv, "12", &fname, &vmode, &vperm); + FilePathValue(fname); + + if (NIL_P(vmode)) + oflags = O_RDONLY; + else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int"))) + oflags = NUM2INT(intmode); + else { + SafeStringValue(vmode); + oflags = rb_io_modestr_oflags(StringValueCStr(vmode)); + } + if (NIL_P(vperm)) perm = 0666; + else perm = NUM2UINT(vperm); + + RB_GC_GUARD(fname) = rb_str_new4(fname); + path = RSTRING_PTR(fname); + fd = rb_sysopen(path, oflags, perm); + return INT2NUM(fd); +} + +static VALUE +check_pipe_command(VALUE filename_or_command) +{ + char *s = RSTRING_PTR(filename_or_command); + long l = RSTRING_LEN(filename_or_command); + char *e = s + l; + int chlen; + + if (rb_enc_ascget(s, e, &chlen, rb_enc_get(filename_or_command)) == '|') { + VALUE cmd = rb_str_new(s+chlen, l-chlen); + OBJ_INFECT(cmd, filename_or_command); + return cmd; + } + return Qnil; +} + +/* + * call-seq: + * open(path [, mode_enc [, perm]] [, opt] ) => io or nil + * open(path [, mode_enc [, perm]] [, opt] ) {|io| block } => obj + * + * Creates an IO object connected to the given stream, + * file, or subprocess. + * + * If path does not start with a pipe character + * (``|''), treat it as the name of a file to open using + * the specified mode (defaulting to ``r''). + * + * The mode_enc is + * either a string or an integer. If it is an integer, it must be + * bitwise-or of open(2) flags, such as File::RDWR or File::EXCL. + * If it is a string, it is either "mode", "mode:ext_enc", or + * "mode:ext_enc:int_enc". + * The mode is one of the following: + * + * r: read (default) + * w: write + * a: append + * + * The mode can be followed by "b" (means binary-mode), or "+" + * (means both reading and writing allowed) or both. + * If ext_enc (external encoding) is specified, + * read string will be tagged by the encoding in reading, + * and output string will be converted + * to the specified encoding in writing. + * If two encoding names, + * ext_enc and int_enc (external encoding and internal encoding), + * are specified, the read string is converted from ext_enc + * to int_enc then tagged with the int_enc in read mode, + * and in write mode, the output string will be + * converted from int_enc to ext_enc before writing. + * + * If a file is being created, its initial permissions may be + * set using the integer third parameter. + * + * If a block is specified, it will be invoked with the + * File object as a parameter, and the file will be + * automatically closed when the block terminates. The call + * returns the value of the block. + * + * If path starts with a pipe character, a subprocess is + * created, connected to the caller by a pair of pipes. The returned + * IO object may be used to write to the standard input + * and read from the standard output of this subprocess. If the command + * following the ``|'' is a single minus sign, Ruby forks, + * and this subprocess is connected to the parent. In the subprocess, + * the open call returns nil. If the command + * is not ``-'', the subprocess runs the command. If a + * block is associated with an open("|-") call, that block + * will be run twice---once in the parent and once in the child. The + * block parameter will be an IO object in the parent and + * nil in the child. The parent's IO object + * will be connected to the child's $stdin and + * $stdout. The subprocess will be terminated at the end + * of the block. + * + * open("testfile") do |f| + * print f.gets + * end + * + * produces: + * + * This is line one + * + * Open a subprocess and read its output: + * + * cmd = open("|date") + * print cmd.gets + * cmd.close + * + * produces: + * + * Wed Apr 9 08:56:31 CDT 2003 + * + * Open a subprocess running the same Ruby program: + * + * f = open("|-", "w+") + * if f == nil + * puts "in Child" + * exit + * else + * puts "Got: #{f.gets}" + * end + * + * produces: + * + * Got: in Child + * + * Open a subprocess using a block to receive the I/O object: + * + * open("|-") do |f| + * if f == nil + * puts "in Child" + * else + * puts "Got: #{f.gets}" + * end + * end + * + * produces: + * + * Got: in Child + */ + +static VALUE +rb_f_open(int argc, VALUE *argv) +{ + ID to_open = 0; + int redirect = Qfalse; + + if (argc >= 1) { + CONST_ID(to_open, "to_open"); + if (rb_respond_to(argv[0], to_open)) { + redirect = Qtrue; + } + else { + VALUE tmp = argv[0]; + FilePathValue(tmp); + if (NIL_P(tmp)) { + redirect = Qtrue; + } + else { + VALUE cmd = check_pipe_command(tmp); + if (!NIL_P(cmd)) { + argv[0] = cmd; + return rb_io_s_popen(argc, argv, rb_cIO); + } + } + } + } + if (redirect) { + VALUE io = rb_funcall2(argv[0], to_open, argc-1, argv+1); + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, io, io_close, io); + } + return io; + } + return rb_io_s_open(argc, argv, rb_cFile); +} + +static VALUE +rb_io_open(VALUE filename, VALUE vmode, VALUE vperm, VALUE opt) +{ + VALUE cmd; + int oflags, fmode; + convconfig_t convconfig; + mode_t perm; + + rb_io_extract_modeenc(&vmode, &vperm, opt, &oflags, &fmode, &convconfig); + perm = NIL_P(vperm) ? 0666 : NUM2UINT(vperm); + + if (!NIL_P(cmd = check_pipe_command(filename))) { + return pipe_open_s(cmd, rb_io_oflags_modestr(oflags), fmode, &convconfig); + } + else { + return rb_file_open_generic(io_alloc(rb_cFile), filename, + oflags, fmode, &convconfig, perm); + } +} + +static VALUE +rb_io_open_with_args(int argc, VALUE *argv) +{ + VALUE io; + + io = io_alloc(rb_cFile); + rb_open_file(argc, argv, io); + return io; +} + +static VALUE +io_reopen(VALUE io, VALUE nfile) +{ + rb_io_t *fptr, *orig; + int fd, fd2; + off_t pos = 0; + + nfile = rb_io_get_io(nfile); + if (rb_safe_level() >= 4 && + (!OBJ_UNTRUSTED(io) || !OBJ_UNTRUSTED(nfile))) { + rb_raise(rb_eSecurityError, "Insecure: can't reopen"); + } + GetOpenFile(io, fptr); + GetOpenFile(nfile, orig); + + if (fptr == orig) return io; + if (IS_PREP_STDIO(fptr)) { + if ((fptr->stdio_file == stdin && !(orig->mode & FMODE_READABLE)) || + (fptr->stdio_file == stdout && !(orig->mode & FMODE_WRITABLE)) || + (fptr->stdio_file == stderr && !(orig->mode & FMODE_WRITABLE))) { + rb_raise(rb_eArgError, + "%s can't change access mode from \"%s\" to \"%s\"", + PREP_STDIO_NAME(fptr), rb_io_fmode_modestr(fptr->mode), + rb_io_fmode_modestr(orig->mode)); + } + } + if (orig->mode & FMODE_READABLE) { + pos = io_tell(orig); + } + if (orig->mode & FMODE_WRITABLE) { + if (io_fflush(orig) < 0) + rb_sys_fail(0); + } + if (fptr->mode & FMODE_WRITABLE) { + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + } + + /* copy rb_io_t structure */ + fptr->mode = orig->mode | (fptr->mode & FMODE_PREP); + fptr->pid = orig->pid; + fptr->lineno = orig->lineno; + if (RTEST(orig->pathv)) fptr->pathv = orig->pathv; + else if (!IS_PREP_STDIO(fptr)) fptr->pathv = Qnil; + fptr->finalize = orig->finalize; +#if defined (__CYGWIN__) || !defined(HAVE_FORK) + if (fptr->finalize == pipe_finalize) + pipe_add_fptr(fptr); +#endif + + fd = fptr->fd; + fd2 = orig->fd; + if (fd != fd2) { + if (IS_PREP_STDIO(fptr)) { + /* need to keep stdio objects */ + if (dup2(fd2, fd) < 0) + rb_sys_fail_path(orig->pathv); + } + else { + if (fptr->stdio_file) + fclose(fptr->stdio_file); + else + close(fptr->fd); + fptr->stdio_file = 0; + fptr->fd = -1; + if (dup2(fd2, fd) < 0) + rb_sys_fail_path(orig->pathv); + fptr->fd = fd; + } + rb_thread_fd_close(fd); + if ((orig->mode & FMODE_READABLE) && pos >= 0) { + if (io_seek(fptr, pos, SEEK_SET) < 0) { + rb_sys_fail_path(fptr->pathv); + } + if (io_seek(orig, pos, SEEK_SET) < 0) { + rb_sys_fail_path(orig->pathv); + } + } + } + + if (fptr->mode & FMODE_BINMODE) { + rb_io_binmode(io); + } + + RBASIC(io)->klass = rb_obj_class(nfile); + return io; +} + +/* + * call-seq: + * ios.reopen(other_IO) => ios + * ios.reopen(path, mode_str) => ios + * + * Reassociates ios with the I/O stream given in + * other_IO or to a new stream opened on path. This may + * dynamically change the actual class of this stream. + * + * f1 = File.new("testfile") + * f2 = File.new("testfile") + * f2.readlines[0] #=> "This is line one\n" + * f2.reopen(f1) #=> # + * f2.readlines[0] #=> "This is line one\n" + */ + +static VALUE +rb_io_reopen(int argc, VALUE *argv, VALUE file) +{ + VALUE fname, nmode; + int oflags; + rb_io_t *fptr; + + rb_secure(4); + if (rb_scan_args(argc, argv, "11", &fname, &nmode) == 1) { + VALUE tmp = rb_io_check_io(fname); + if (!NIL_P(tmp)) { + return io_reopen(file, tmp); + } + } + + FilePathValue(fname); + rb_io_taint_check(file); + fptr = RFILE(file)->fptr; + if (!fptr) { + fptr = RFILE(file)->fptr = ALLOC(rb_io_t); + MEMZERO(fptr, rb_io_t, 1); + } + + if (!NIL_P(nmode)) { + int fmode = rb_io_modestr_fmode(StringValueCStr(nmode)); + if (IS_PREP_STDIO(fptr) && + ((fptr->mode & FMODE_READWRITE) & (fmode & FMODE_READWRITE)) != + (fptr->mode & FMODE_READWRITE)) { + rb_raise(rb_eArgError, + "%s can't change access mode from \"%s\" to \"%s\"", + PREP_STDIO_NAME(fptr), rb_io_fmode_modestr(fptr->mode), + rb_io_fmode_modestr(fmode)); + } + fptr->mode = fmode; + rb_io_mode_enc(fptr, StringValueCStr(nmode)); + fptr->encs.ecflags = 0; + fptr->encs.ecopts = Qnil; + } + + fptr->pathv = rb_str_new_frozen(fname); + oflags = rb_io_fmode_oflags(fptr->mode); + if (fptr->fd < 0) { + fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), oflags, 0666); + fptr->stdio_file = 0; + return file; + } + + if (fptr->mode & FMODE_WRITABLE) { + if (io_fflush(fptr) < 0) + rb_sys_fail(0); + } + fptr->rbuf_off = fptr->rbuf_len = 0; + + if (fptr->stdio_file) { + if (freopen(RSTRING_PTR(fptr->pathv), rb_io_oflags_modestr(oflags), fptr->stdio_file) == 0) { + rb_sys_fail_path(fptr->pathv); + } + fptr->fd = fileno(fptr->stdio_file); +#ifdef USE_SETVBUF + if (setvbuf(fptr->stdio_file, NULL, _IOFBF, 0) != 0) + rb_warn("setvbuf() can't be honoured for %s", RSTRING_PTR(fptr->pathv)); +#endif + } + else { + if (close(fptr->fd) < 0) + rb_sys_fail_path(fptr->pathv); + fptr->fd = -1; + fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), oflags, 0666); + } + + return file; +} + +/* :nodoc: */ +static VALUE +rb_io_init_copy(VALUE dest, VALUE io) +{ + rb_io_t *fptr, *orig; + int fd; + VALUE write_io; + off_t pos; + + io = rb_io_get_io(io); + if (dest == io) return dest; + GetOpenFile(io, orig); + MakeOpenFile(dest, fptr); + + rb_io_flush(io); + + /* copy rb_io_t structure */ + fptr->mode = orig->mode & ~FMODE_PREP; + fptr->encs = orig->encs; + fptr->pid = orig->pid; + fptr->lineno = orig->lineno; + if (!NIL_P(orig->pathv)) fptr->pathv = orig->pathv; + fptr->finalize = orig->finalize; +#if defined (__CYGWIN__) || !defined(HAVE_FORK) + if (fptr->finalize == pipe_finalize) + pipe_add_fptr(fptr); +#endif + + fd = ruby_dup(orig->fd); + fptr->fd = fd; + pos = io_tell(orig); + if (0 <= pos) + io_seek(fptr, pos, SEEK_SET); + if (fptr->mode & FMODE_BINMODE) { + rb_io_binmode(dest); + } + + write_io = GetWriteIO(io); + if (io != write_io) { + write_io = rb_obj_dup(write_io); + fptr->tied_io_for_writing = write_io; + rb_ivar_set(dest, rb_intern("@tied_io_for_writing"), write_io); + } + + return dest; +} + +/* + * call-seq: + * ios.printf(format_string [, obj, ...] ) => nil + * + * Formats and writes to ios, converting parameters under + * control of the format string. See Kernel#sprintf + * for details. + */ + +VALUE +rb_io_printf(int argc, VALUE *argv, VALUE out) +{ + rb_io_write(out, rb_f_sprintf(argc, argv)); + return Qnil; +} + +/* + * call-seq: + * printf(io, string [, obj ... ] ) => nil + * printf(string [, obj ... ] ) => nil + * + * Equivalent to: + * io.write(sprintf(string, obj, ...) + * or + * $stdout.write(sprintf(string, obj, ...) + */ + +static VALUE +rb_f_printf(int argc, VALUE *argv) +{ + VALUE out; + + if (argc == 0) return Qnil; + if (TYPE(argv[0]) == T_STRING) { + out = rb_stdout; + } + else { + out = argv[0]; + argv++; + argc--; + } + rb_io_write(out, rb_f_sprintf(argc, argv)); + + return Qnil; +} + +/* + * call-seq: + * ios.print() => nil + * ios.print(obj, ...) => nil + * + * Writes the given object(s) to ios. The stream must be + * opened for writing. If the output record separator ($\\) + * is not nil, it will be appended to the output. If no + * arguments are given, prints $_. Objects that aren't + * strings will be converted by calling their to_s method. + * With no argument, prints the contents of the variable $_. + * Returns nil. + * + * $stdout.print("This is ", 100, " percent.\n") + * + * produces: + * + * This is 100 percent. + */ + +VALUE +rb_io_print(int argc, VALUE *argv, VALUE out) +{ + int i; + VALUE line; + + /* if no argument given, print `$_' */ + if (argc == 0) { + argc = 1; + line = rb_lastline_get(); + argv = &line; + } + for (i=0; i 0 && !NIL_P(rb_output_rs)) { + rb_io_write(out, rb_output_rs); + } + + return Qnil; +} + +/* + * call-seq: + * print(obj, ...) => nil + * + * Prints each object in turn to $stdout. If the output + * field separator ($,) is not +nil+, its + * contents will appear between each field. If the output record + * separator ($\\) is not +nil+, it will be + * appended to the output. If no arguments are given, prints + * $_. Objects that aren't strings will be converted by + * calling their to_s method. + * + * print "cat", [1,2,3], 99, "\n" + * $, = ", " + * $\ = "\n" + * print "cat", [1,2,3], 99 + * + * produces: + * + * cat12399 + * cat, 1, 2, 3, 99 + */ + +static VALUE +rb_f_print(int argc, VALUE *argv) +{ + rb_io_print(argc, argv, rb_stdout); + return Qnil; +} + +/* + * call-seq: + * ios.putc(obj) => obj + * + * If obj is Numeric, write the character whose + * code is obj, otherwise write the first character of the + * string representation of obj to ios. + * + * $stdout.putc "A" + * $stdout.putc 65 + * + * produces: + * + * AA + */ + +static VALUE +rb_io_putc(VALUE io, VALUE ch) +{ + char c = NUM2CHR(ch); + + rb_io_write(io, rb_str_new(&c, 1)); + return ch; +} + +/* + * call-seq: + * putc(int) => int + * + * Equivalent to: + * + * $stdout.putc(int) + */ + +static VALUE +rb_f_putc(VALUE recv, VALUE ch) +{ + if (recv == rb_stdout) { + return rb_io_putc(recv, ch); + } + return rb_funcall2(rb_stdout, rb_intern("putc"), 1, &ch); +} + +static VALUE +io_puts_ary(VALUE ary, VALUE out, int recur) +{ + VALUE tmp; + long i; + + if (recur) { + tmp = rb_str_new2("[...]"); + rb_io_puts(1, &tmp, out); + return Qnil; + } + for (i=0; i nil + * + * Writes the given objects to ios as with + * IO#print. Writes a record separator (typically a + * newline) after any that do not already end with a newline sequence. + * If called with an array argument, writes each element on a new line. + * If called without arguments, outputs a single record separator. + * + * $stdout.puts("this", "is", "a", "test") + * + * produces: + * + * this + * is + * a + * test + */ + +VALUE +rb_io_puts(int argc, VALUE *argv, VALUE out) +{ + int i; + VALUE line; + + /* if no argument given, print newline. */ + if (argc == 0) { + rb_io_write(out, rb_default_rs); + return Qnil; + } + for (i=0; i nil + * + * Equivalent to + * + * $stdout.puts(obj, ...) + */ + +static VALUE +rb_f_puts(int argc, VALUE *argv, VALUE recv) +{ + if (recv == rb_stdout) { + return rb_io_puts(argc, argv, recv); + } + return rb_funcall2(rb_stdout, rb_intern("puts"), argc, argv); +} + +void +rb_p(VALUE obj) /* for debug print within C code */ +{ + VALUE str = rb_obj_as_string(rb_inspect(obj)); + if (TYPE(rb_stdout) == T_FILE && + rb_method_basic_definition_p(CLASS_OF(rb_stdout), id_write)) { + io_write(rb_stdout, str, 1); + io_write(rb_stdout, rb_default_rs, 0); + } + else { + rb_io_write(rb_stdout, str); + rb_io_write(rb_stdout, rb_default_rs); + } +} + +/* + * call-seq: + * p(obj) => obj + * p(obj1, obj2, ...) => [obj, ...] + * p() => nil + * + * For each object, directly writes + * _obj_.+inspect+ followed by the current output + * record separator to the program's standard output. + * + * S = Struct.new(:name, :state) + * s = S['dave', 'TX'] + * p s + * + * produces: + * + * # + */ + +static VALUE +rb_f_p(int argc, VALUE *argv, VALUE self) +{ + int i; + VALUE ret = Qnil; + + for (i=0; i 1) { + ret = rb_ary_new4(argc, argv); + } + if (TYPE(rb_stdout) == T_FILE) { + rb_io_flush(rb_stdout); + } + return ret; +} + +/* + * call-seq: + * obj.display(port=$>) => nil + * + * Prints obj on the given port (default $>). + * Equivalent to: + * + * def display(port=$>) + * port.write self + * end + * + * For example: + * + * 1.display + * "cat".display + * [ 4, 5, 6 ].display + * puts + * + * produces: + * + * 1cat456 + */ + +static VALUE +rb_obj_display(int argc, VALUE *argv, VALUE self) +{ + VALUE out; + + if (argc == 0) { + out = rb_stdout; + } + else { + rb_scan_args(argc, argv, "01", &out); + } + rb_io_write(out, self); + + return Qnil; +} + +void +rb_write_error2(const char *mesg, long len) +{ + if (rb_stderr == orig_stderr || RFILE(orig_stderr)->fptr->fd < 0) { + fwrite(mesg, sizeof(char), len, stderr); + } + else { + rb_io_write(rb_stderr, rb_str_new(mesg, len)); + } +} + +void +rb_write_error(const char *mesg) +{ + rb_write_error2(mesg, strlen(mesg)); +} + +static void +must_respond_to(ID mid, VALUE val, ID id) +{ + if (!rb_respond_to(val, mid)) { + rb_raise(rb_eTypeError, "%s must have %s method, %s given", + rb_id2name(id), rb_id2name(mid), + rb_obj_classname(val)); + } +} + +static void +stdout_setter(VALUE val, ID id, VALUE *variable) +{ + must_respond_to(id_write, val, id); + *variable = val; +} + +static VALUE +prep_io(int fd, int fmode, VALUE klass, const char *path) +{ + rb_io_t *fp; + VALUE io = io_alloc(klass); + + MakeOpenFile(io, fp); + fp->fd = fd; +#ifdef __CYGWIN__ + if (!isatty(fd)) { + fmode |= FMODE_BINMODE; + setmode(fd, O_BINARY); + } +#endif + fp->mode = fmode; + io_check_tty(fp); + if (path) fp->pathv = rb_obj_freeze(rb_str_new_cstr(path)); + + return io; +} + +VALUE +rb_io_fdopen(int fd, int oflags, const char *path) +{ + VALUE klass = rb_cIO; + + if (path && strcmp(path, "-")) klass = rb_cFile; + return prep_io(fd, rb_io_oflags_fmode(oflags), klass, path); +} + +static VALUE +prep_stdio(FILE *f, int fmode, VALUE klass, const char *path) +{ + rb_io_t *fptr; + VALUE io = prep_io(fileno(f), fmode|FMODE_PREP, klass, path); + + GetOpenFile(io, fptr); + fptr->stdio_file = f; + + return io; +} + +FILE * +rb_io_stdio_file(rb_io_t *fptr) +{ + if (!fptr->stdio_file) { + int oflags = rb_io_fmode_oflags(fptr->mode); + fptr->stdio_file = rb_fdopen(fptr->fd, rb_io_oflags_modestr(oflags)); + } + return fptr->stdio_file; +} + +/* + * call-seq: + * IO.new(fd [, mode] [, opt]) => io + * + * Returns a new IO object (a stream) for the given + * IO object or integer file descriptor and mode + * string. See also IO#fileno and + * IO.for_fd. + * + * === Parameters + * fd:: numeric file descriptor + * mode:: file mode. a string or an integer + * opt:: hash for specifiying mode by name. + * + * ==== Mode + * When mode is an integer it must be combination of + * the modes defined in File::Constants. + * + * When mode is a string it must be in one of the + * following forms: + * - "fmode", + * - "fmode:extern", + * - "fmode:extern:intern". + * extern is the external encoding name for the IO. + * intern is the internal encoding. + * fmode must be combination of the directives. See + * the description of class +IO+ for a description of the directives. + * + * ==== Options + * opt can have the following keys + * :mode :: + * same as mode parameter + * :external_encoding :: + * external encoding for the IO. "-" is a + * synonym for the default external encoding. + * :internal_encoding :: + * internal encoding for the IO. + * "-" is a synonym for the default internal encoding. + * If the value is nil no conversion occurs. + * :encoding :: + * specifies external and internal encodings as "extern:intern". + * :textmode :: + * If the value is truth value, same as "b" in argument mode. + * :binmode :: + * If the value is truth value, same as "t" in argument mode. + * + * Also opt can have same keys in String#encode for + * controlling conversion between the external encoding and the internal encoding. + * + * === Example1 + * + * a = IO.new(2,"w") # '2' is standard error + * $stderr.puts "Hello" + * a.puts "World" + * + * produces: + * + * Hello + * World + * + * === Example2 + * io = IO.new(2, mode: 'w:UTF-16LE', cr_newline: true) + * io.puts "Hello, World!" + * + * io = IO.new(2, mode: 'w', cr_newline: true, external_encoding: Encoding::UTF_16LE) + * io.puts "Hello, World!" + * + * both of aboves print "Hello, World!" in UTF-16LE to standard error output with + * converting EOL generated by puts to CR. + */ + +static VALUE +rb_io_initialize(int argc, VALUE *argv, VALUE io) +{ + VALUE fnum, vmode; + rb_io_t *fp; + int fd, fmode, oflags = O_RDONLY; + convconfig_t convconfig; + VALUE opt; + struct stat st; + + rb_secure(4); + + opt = pop_last_hash(&argc, argv); + rb_scan_args(argc, argv, "11", &fnum, &vmode); + rb_io_extract_modeenc(&vmode, 0, opt, &oflags, &fmode, &convconfig); + + fd = NUM2INT(fnum); + if (fstat(fd, &st) == -1) rb_sys_fail(0); + UPDATE_MAXFD(fd); + if (NIL_P(vmode)) { +#if defined(HAVE_FCNTL) && defined(F_GETFL) + oflags = fcntl(fd, F_GETFL); + if (oflags == -1) rb_sys_fail(0); + fmode = rb_io_oflags_fmode(oflags); +#endif + } + MakeOpenFile(io, fp); + fp->fd = fd; + fp->mode = fmode; + fp->encs = convconfig; + clear_codeconv(fp); + io_check_tty(fp); + if (fileno(stdin) == fd) + fp->stdio_file = stdin; + else if (fileno(stdout) == fd) + fp->stdio_file = stdout; + else if (fileno(stderr) == fd) + fp->stdio_file = stderr; + + return io; +} + +/* + * call-seq: + * File.new(filename, mode="r" [, opt]) => file + * File.new(filename [, mode [, perm]] [, opt]) => file + * + * Opens the file named by _filename_ according to + * _mode_ (default is ``r'') and returns a new + * File object. + * + * === Parameters + * See the description of class +IO+ for a description of _mode_. + * The file mode may optionally be specified as a +Fixnum+ + * by _or_-ing together the flags (O_RDONLY etc, + * again described under +IO+). + * + * Optional permission bits may be given in _perm_. + * These mode and permission bits are platform dependent; + * on Unix systems, see open(2) for details. + * + * Optional _opt_ parameter is same as in . + * + * === Examples + * + * f = File.new("testfile", "r") + * f = File.new("newfile", "w+") + * f = File.new("newfile", File::CREAT|File::TRUNC|File::RDWR, 0644) + */ + +static VALUE +rb_file_initialize(int argc, VALUE *argv, VALUE io) +{ + if (RFILE(io)->fptr) { + rb_raise(rb_eRuntimeError, "reinitializing File"); + } + if (0 < argc && argc < 3) { + VALUE fd = rb_check_convert_type(argv[0], T_FIXNUM, "Fixnum", "to_int"); + + if (!NIL_P(fd)) { + argv[0] = fd; + return rb_io_initialize(argc, argv, io); + } + } + rb_open_file(argc, argv, io); + + return io; +} + +static VALUE +rb_io_s_new(int argc, VALUE *argv, VALUE klass) +{ + if (rb_block_given_p()) { + const char *cname = rb_class2name(klass); + + rb_warn("%s::new() does not take block; use %s::open() instead", + cname, cname); + } + return rb_class_new_instance(argc, argv, klass); +} + + +/* + * call-seq: + * IO.for_fd(fd, mode [, opt]) => io + * + * Synonym for IO.new. + * + */ + +static VALUE +rb_io_s_for_fd(int argc, VALUE *argv, VALUE klass) +{ + VALUE io = rb_obj_alloc(klass); + rb_io_initialize(argc, argv, io); + return io; +} + +static void +argf_mark(void *ptr) +{ + struct argf *p = ptr; + rb_gc_mark(p->filename); + rb_gc_mark(p->current_file); + rb_gc_mark(p->argv); + rb_gc_mark(p->encs.ecopts); +} + +static void +argf_free(void *ptr) +{ + struct argf *p = ptr; + free(p->inplace); +} + +static inline void +argf_init(struct argf *p, VALUE v) +{ + p->filename = Qnil; + p->current_file = Qnil; + p->lineno = 0; + p->argv = v; +} + +static VALUE +argf_alloc(VALUE klass) +{ + struct argf *p; + VALUE argf = Data_Make_Struct(klass, struct argf, argf_mark, argf_free, p); + + argf_init(p, Qnil); + return argf; +} + +#undef rb_argv + +static VALUE +argf_initialize(VALUE argf, VALUE argv) +{ + memset(&ARGF, 0, sizeof(ARGF)); + argf_init(&ARGF, argv); + + return argf; +} + +static VALUE +argf_initialize_copy(VALUE argf, VALUE orig) +{ + ARGF = argf_of(orig); + ARGF.argv = rb_obj_dup(ARGF.argv); + if (ARGF.inplace) { + const char *inplace = ARGF.inplace; + ARGF.inplace = 0; + ARGF.inplace = ruby_strdup(inplace); + } + return argf; +} + +static VALUE +argf_set_lineno(VALUE argf, VALUE val) +{ + ARGF.lineno = NUM2INT(val); + ARGF.last_lineno = ARGF.lineno; + return Qnil; +} + +static VALUE +argf_lineno(VALUE argf) +{ + return INT2FIX(ARGF.lineno); +} + +static VALUE +argf_forward(int argc, VALUE *argv, VALUE argf) +{ + return rb_funcall3(ARGF.current_file, rb_frame_this_func(), argc, argv); +} + +#define next_argv() argf_next_argv(argf) +#define ARGF_GENERIC_INPUT_P() \ + (ARGF.current_file == rb_stdin && TYPE(ARGF.current_file) != T_FILE) +#define ARGF_FORWARD(argc, argv) do {\ + if (ARGF_GENERIC_INPUT_P())\ + return argf_forward(argc, argv, argf);\ +} while (0) +#define NEXT_ARGF_FORWARD(argc, argv) do {\ + if (!next_argv()) return Qnil;\ + ARGF_FORWARD(argc, argv);\ +} while (0) + +static void +argf_close(VALUE file) +{ + rb_funcall3(file, rb_intern("close"), 0, 0); +} + +static int +argf_next_argv(VALUE argf) +{ + char *fn; + rb_io_t *fptr; + int stdout_binmode = 0; + + if (TYPE(rb_stdout) == T_FILE) { + GetOpenFile(rb_stdout, fptr); + if (fptr->mode & FMODE_BINMODE) + stdout_binmode = 1; + } + + if (ARGF.init_p == 0) { + if (!NIL_P(ARGF.argv) && RARRAY_LEN(ARGF.argv) > 0) { + ARGF.next_p = 1; + } + else { + ARGF.next_p = -1; + } + ARGF.init_p = 1; + } + + if (ARGF.next_p == 1) { + ARGF.next_p = 0; + retry: + if (RARRAY_LEN(ARGF.argv) > 0) { + ARGF.filename = rb_ary_shift(ARGF.argv); + fn = StringValueCStr(ARGF.filename); + if (strlen(fn) == 1 && fn[0] == '-') { + ARGF.current_file = rb_stdin; + if (ARGF.inplace) { + rb_warn("Can't do inplace edit for stdio; skipping"); + goto retry; + } + } + else { + int fr = rb_sysopen(fn, O_RDONLY, 0); + + if (ARGF.inplace) { + struct stat st; +#ifndef NO_SAFE_RENAME + struct stat st2; +#endif + VALUE str; + int fw; + + if (TYPE(rb_stdout) == T_FILE && rb_stdout != orig_stdout) { + rb_io_close(rb_stdout); + } + fstat(fr, &st); + if (*ARGF.inplace) { + str = rb_str_new2(fn); +#ifdef NO_LONG_FNAME + ruby_add_suffix(str, ARGF.inplace); +#else + rb_str_cat2(str, ARGF.inplace); +#endif +#ifdef NO_SAFE_RENAME + (void)close(fr); + (void)unlink(RSTRING_PTR(str)); + (void)rename(fn, RSTRING_PTR(str)); + fr = rb_sysopen(RSTRING_PTR(str), O_RDONLY, 0); +#else + if (rename(fn, RSTRING_PTR(str)) < 0) { + rb_warn("Can't rename %s to %s: %s, skipping file", + fn, RSTRING_PTR(str), strerror(errno)); + close(fr); + goto retry; + } +#endif + } + else { +#ifdef NO_SAFE_RENAME + rb_fatal("Can't do inplace edit without backup"); +#else + if (unlink(fn) < 0) { + rb_warn("Can't remove %s: %s, skipping file", + fn, strerror(errno)); + close(fr); + goto retry; + } +#endif + } + fw = rb_sysopen(fn, O_WRONLY|O_CREAT|O_TRUNC, 0666); +#ifndef NO_SAFE_RENAME + fstat(fw, &st2); +#ifdef HAVE_FCHMOD + fchmod(fw, st.st_mode); +#else + chmod(fn, st.st_mode); +#endif + if (st.st_uid!=st2.st_uid || st.st_gid!=st2.st_gid) { + fchown(fw, st.st_uid, st.st_gid); + } +#endif + rb_stdout = prep_io(fw, FMODE_WRITABLE, rb_cFile, fn); + if (stdout_binmode) rb_io_binmode(rb_stdout); + } + ARGF.current_file = prep_io(fr, FMODE_READABLE, rb_cFile, fn); + } + if (ARGF.binmode) rb_io_ascii8bit_binmode(ARGF.current_file); + if (ARGF.encs.enc) { + rb_io_t *fptr; + + GetOpenFile(ARGF.current_file, fptr); + fptr->encs = ARGF.encs; + clear_codeconv(fptr); + } + } + else { + ARGF.next_p = 1; + return Qfalse; + } + } + else if (ARGF.next_p == -1) { + ARGF.current_file = rb_stdin; + ARGF.filename = rb_str_new2("-"); + if (ARGF.inplace) { + rb_warn("Can't do inplace edit for stdio"); + rb_stdout = orig_stdout; + } + } + return Qtrue; +} + +static VALUE +argf_getline(int argc, VALUE *argv, VALUE argf) +{ + VALUE line; + + retry: + if (!next_argv()) return Qnil; + if (ARGF_GENERIC_INPUT_P()) { + line = rb_funcall3(ARGF.current_file, rb_intern("gets"), argc, argv); + } + else { + if (argc == 0 && rb_rs == rb_default_rs) { + line = rb_io_gets(ARGF.current_file); + } + else { + line = rb_io_getline(argc, argv, ARGF.current_file); + } + if (NIL_P(line) && ARGF.next_p != -1) { + argf_close(ARGF.current_file); + ARGF.next_p = 1; + goto retry; + } + } + if (!NIL_P(line)) { + ARGF.lineno++; + ARGF.last_lineno = ARGF.lineno; + } + return line; +} + +static VALUE +argf_lineno_getter(ID id, VALUE *var) +{ + VALUE argf = *var; + return INT2FIX(ARGF.last_lineno); +} + +static void +argf_lineno_setter(VALUE val, ID id, VALUE *var) +{ + VALUE argf = *var; + int n = NUM2INT(val); + ARGF.last_lineno = ARGF.lineno = n; +} + +static VALUE argf_gets(int, VALUE *, VALUE); + +/* + * call-seq: + * gets(sep=$/) => string or nil + * gets(limit) => string or nil + * gets(sep,limit) => string or nil + * + * Returns (and assigns to $_) the next line from the list + * of files in +ARGV+ (or $*), or from standard input if + * no files are present on the command line. Returns +nil+ at end of + * file. The optional argument specifies the record separator. The + * separator is included with the contents of each record. A separator + * of +nil+ reads the entire contents, and a zero-length separator + * reads the input one paragraph at a time, where paragraphs are + * divided by two consecutive newlines. If the first argument is an + * integer, or optional second argument is given, the returning string + * would not be longer than the given value in bytes. If multiple + * filenames are present in +ARGV+, +gets(nil)+ will read the contents + * one file at a time. + * + * ARGV << "testfile" + * print while gets + * + * produces: + * + * This is line one + * This is line two + * This is line three + * And so on... + * + * The style of programming using $_ as an implicit + * parameter is gradually losing favor in the Ruby community. + */ + +static VALUE +rb_f_gets(int argc, VALUE *argv, VALUE recv) +{ + if (recv == argf) { + return argf_gets(argc, argv, argf); + } + return rb_funcall2(argf, rb_intern("gets"), argc, argv); +} + +static VALUE +argf_gets(int argc, VALUE *argv, VALUE argf) +{ + VALUE line; + + line = argf_getline(argc, argv, argf); + rb_lastline_set(line); + + return line; +} + +VALUE +rb_gets(void) +{ + VALUE line; + + if (rb_rs != rb_default_rs) { + return rb_f_gets(0, 0, argf); + } + + retry: + if (!next_argv()) return Qnil; + line = rb_io_gets(ARGF.current_file); + if (NIL_P(line) && ARGF.next_p != -1) { + rb_io_close(ARGF.current_file); + ARGF.next_p = 1; + goto retry; + } + rb_lastline_set(line); + if (!NIL_P(line)) { + ARGF.lineno++; + ARGF.last_lineno = ARGF.lineno; + } + + return line; +} + +static VALUE argf_readline(int, VALUE *, VALUE); + +/* + * call-seq: + * readline(sep=$/) => string + * readline(limit) => string + * readline(sep, limit) => string + * + * Equivalent to Kernel::gets, except + * +readline+ raises +EOFError+ at end of file. + */ + +static VALUE +rb_f_readline(int argc, VALUE *argv, VALUE recv) +{ + if (recv == argf) { + return argf_readline(argc, argv, argf); + } + return rb_funcall2(argf, rb_intern("readline"), argc, argv); +} + +static VALUE +argf_readline(int argc, VALUE *argv, VALUE argf) +{ + VALUE line; + + if (!next_argv()) rb_eof_error(); + ARGF_FORWARD(argc, argv); + line = argf_gets(argc, argv, argf); + if (NIL_P(line)) { + rb_eof_error(); + } + + return line; +} + +static VALUE argf_readlines(int, VALUE *, VALUE); + +/* + * call-seq: + * readlines(sep=$/) => array + * readlines(limit) => array + * readlines(sep,limit) => array + * + * Returns an array containing the lines returned by calling + * Kernel.gets(sep) until the end of file. + */ + +static VALUE +rb_f_readlines(int argc, VALUE *argv, VALUE recv) +{ + if (recv == argf) { + return argf_readlines(argc, argv, argf); + } + return rb_funcall2(argf, rb_intern("readlines"), argc, argv); +} + +static VALUE +argf_readlines(int argc, VALUE *argv, VALUE argf) +{ + VALUE line, ary; + + ary = rb_ary_new(); + while (!NIL_P(line = argf_getline(argc, argv, argf))) { + rb_ary_push(ary, line); + } + + return ary; +} + +/* + * call-seq: + * `cmd` => string + * + * Returns the standard output of running _cmd_ in a subshell. + * The built-in syntax %x{...} uses + * this method. Sets $? to the process status. + * + * `date` #=> "Wed Apr 9 08:56:30 CDT 2003\n" + * `ls testdir`.split[1] #=> "main.rb" + * `echo oops && exit 99` #=> "oops\n" + * $?.exitstatus #=> 99 + */ + +static VALUE +rb_f_backquote(VALUE obj, VALUE str) +{ + volatile VALUE port; + VALUE result; + rb_io_t *fptr; + + SafeStringValue(str); + port = pipe_open_s(str, "r", FMODE_READABLE, NULL); + if (NIL_P(port)) return rb_str_new(0,0); + + GetOpenFile(port, fptr); + result = read_all(fptr, remain_size(fptr), Qnil); + rb_io_close(port); + + return result; +} + +#ifdef HAVE_SYS_SELECT_H +#include +#endif + +static VALUE +select_internal(VALUE read, VALUE write, VALUE except, struct timeval *tp, rb_fdset_t *fds) +{ + VALUE res, list; + fd_set *rp, *wp, *ep; + rb_io_t *fptr; + long i; + int max = 0, n; + int interrupt_flag = 0; + int pending = 0; + struct timeval timerec; + + if (!NIL_P(read)) { + Check_Type(read, T_ARRAY); + for (i=0; ifd, &fds[0]); + if (READ_DATA_PENDING(fptr)) { /* check for buffered data */ + pending++; + rb_fd_set(fptr->fd, &fds[3]); + } + if (max < fptr->fd) max = fptr->fd; + } + if (pending) { /* no blocking if there's buffered data */ + timerec.tv_sec = timerec.tv_usec = 0; + tp = &timerec; + } + rp = rb_fd_ptr(&fds[0]); + } + else + rp = 0; + + if (!NIL_P(write)) { + Check_Type(write, T_ARRAY); + for (i=0; ifd, &fds[1]); + if (max < fptr->fd) max = fptr->fd; + } + wp = rb_fd_ptr(&fds[1]); + } + else + wp = 0; + + if (!NIL_P(except)) { + Check_Type(except, T_ARRAY); + for (i=0; ifd, &fds[2]); + if (max < fptr->fd) max = fptr->fd; + if (io != write_io) { + GetOpenFile(write_io, fptr); + rb_fd_set(fptr->fd, &fds[2]); + if (max < fptr->fd) max = fptr->fd; + } + } + ep = rb_fd_ptr(&fds[2]); + } + else { + ep = 0; + } + + max++; + + n = rb_thread_select(max, rp, wp, ep, tp); + if (n < 0) { + rb_sys_fail(0); + } + if (!pending && n == 0) return Qnil; /* returns nil on timeout */ + + res = rb_ary_new2(3); + rb_ary_push(res, rp?rb_ary_new():rb_ary_new2(0)); + rb_ary_push(res, wp?rb_ary_new():rb_ary_new2(0)); + rb_ary_push(res, ep?rb_ary_new():rb_ary_new2(0)); + + if (interrupt_flag == 0) { + if (rp) { + list = RARRAY_PTR(res)[0]; + for (i=0; i< RARRAY_LEN(read); i++) { + VALUE obj = rb_ary_entry(read, i); + VALUE io = rb_io_get_io(obj); + GetOpenFile(io, fptr); + if (rb_fd_isset(fptr->fd, &fds[0]) || + rb_fd_isset(fptr->fd, &fds[3])) { + rb_ary_push(list, obj); + } + } + } + + if (wp) { + list = RARRAY_PTR(res)[1]; + for (i=0; i< RARRAY_LEN(write); i++) { + VALUE obj = rb_ary_entry(write, i); + VALUE io = rb_io_get_io(obj); + VALUE write_io = GetWriteIO(io); + GetOpenFile(write_io, fptr); + if (rb_fd_isset(fptr->fd, &fds[1])) { + rb_ary_push(list, obj); + } + } + } + + if (ep) { + list = RARRAY_PTR(res)[2]; + for (i=0; i< RARRAY_LEN(except); i++) { + VALUE obj = rb_ary_entry(except, i); + VALUE io = rb_io_get_io(obj); + VALUE write_io = GetWriteIO(io); + GetOpenFile(io, fptr); + if (rb_fd_isset(fptr->fd, &fds[2])) { + rb_ary_push(list, obj); + } + else if (io != write_io) { + GetOpenFile(write_io, fptr); + if (rb_fd_isset(fptr->fd, &fds[2])) { + rb_ary_push(list, obj); + } + } + } + } + } + + return res; /* returns an empty array on interrupt */ +} + +struct select_args { + VALUE read, write, except; + struct timeval *timeout; + rb_fdset_t fdsets[4]; +}; + +#ifdef HAVE_RB_FD_INIT +static VALUE +select_call(VALUE arg) +{ + struct select_args *p = (struct select_args *)arg; + + return select_internal(p->read, p->write, p->except, p->timeout, p->fdsets); +} + +static VALUE +select_end(VALUE arg) +{ + struct select_args *p = (struct select_args *)arg; + int i; + + for (i = 0; i < sizeof(p->fdsets) / sizeof(p->fdsets[0]); ++i) + rb_fd_term(&p->fdsets[i]); + return Qnil; +} +#endif + +/* + * call-seq: + * IO.select(read_array + * [, write_array + * [, error_array + * [, timeout]]] ) => array or nil + * + * See Kernel#select. + */ + +static VALUE +rb_f_select(int argc, VALUE *argv, VALUE obj) +{ + VALUE timeout; + struct select_args args; + struct timeval timerec; + int i; + + rb_scan_args(argc, argv, "13", &args.read, &args.write, &args.except, &timeout); + if (NIL_P(timeout)) { + args.timeout = 0; + } + else { + timerec = rb_time_interval(timeout); + args.timeout = &timerec; + } + + for (i = 0; i < sizeof(args.fdsets) / sizeof(args.fdsets[0]); ++i) + rb_fd_init(&args.fdsets[i]); + +#ifdef HAVE_RB_FD_INIT + return rb_ensure(select_call, (VALUE)&args, select_end, (VALUE)&args); +#else + return select_internal(args.read, args.write, args.except, + args.timeout, args.fdsets); +#endif + +} + +static int +io_cntl(int fd, int cmd, long narg, int io_p) +{ + int retval; + +#ifdef HAVE_FCNTL +# if defined(__CYGWIN__) + retval = io_p?ioctl(fd, cmd, (void*)narg):fcntl(fd, cmd, narg); +# else + retval = io_p?ioctl(fd, cmd, narg):fcntl(fd, cmd, narg); +# endif +#else + if (!io_p) { + rb_notimplement(); + } + retval = ioctl(fd, cmd, narg); +#endif + return retval; +} + +static VALUE +rb_io_ctl(VALUE io, VALUE req, VALUE arg, int io_p) +{ + int cmd = NUM2ULONG(req); + rb_io_t *fptr; + long len = 0; + long narg = 0; + int retval; + + rb_secure(2); + + if (NIL_P(arg) || arg == Qfalse) { + narg = 0; + } + else if (FIXNUM_P(arg)) { + narg = FIX2LONG(arg); + } + else if (arg == Qtrue) { + narg = 1; + } + else { + VALUE tmp = rb_check_string_type(arg); + + if (NIL_P(tmp)) { + narg = NUM2LONG(arg); + } + else { + arg = tmp; +#ifdef IOCPARM_MASK +#ifndef IOCPARM_LEN +#define IOCPARM_LEN(x) (((x) >> 16) & IOCPARM_MASK) +#endif +#endif +#ifdef IOCPARM_LEN + len = IOCPARM_LEN(cmd); /* on BSDish systems we're safe */ +#else + len = 256; /* otherwise guess at what's safe */ +#endif + rb_str_modify(arg); + + if (len <= RSTRING_LEN(arg)) { + len = RSTRING_LEN(arg); + } + if (RSTRING_LEN(arg) < len) { + rb_str_resize(arg, len+1); + } + RSTRING_PTR(arg)[len] = 17; /* a little sanity check here */ + narg = (long)RSTRING_PTR(arg); + } + } + GetOpenFile(io, fptr); + retval = io_cntl(fptr->fd, cmd, narg, io_p); + if (retval < 0) rb_sys_fail_path(fptr->pathv); + if (TYPE(arg) == T_STRING && RSTRING_PTR(arg)[len] != 17) { + rb_raise(rb_eArgError, "return value overflowed string"); + } + + if (!io_p && cmd == F_SETFL) { + if (narg & O_NONBLOCK) { + fptr->mode |= FMODE_WSPLIT_INITIALIZED; + fptr->mode &= ~FMODE_WSPLIT; + } + else { + fptr->mode &= ~(FMODE_WSPLIT_INITIALIZED|FMODE_WSPLIT); + } + } + + return INT2NUM(retval); +} + + +/* + * call-seq: + * ios.ioctl(integer_cmd, arg) => integer + * + * Provides a mechanism for issuing low-level commands to control or + * query I/O devices. Arguments and results are platform dependent. If + * arg is a number, its value is passed directly. If it is a + * string, it is interpreted as a binary sequence of bytes. On Unix + * platforms, see ioctl(2) for details. Not implemented on + * all platforms. + */ + +static VALUE +rb_io_ioctl(int argc, VALUE *argv, VALUE io) +{ + VALUE req, arg; + + rb_scan_args(argc, argv, "11", &req, &arg); + return rb_io_ctl(io, req, arg, 1); +} + +/* + * call-seq: + * ios.fcntl(integer_cmd, arg) => integer + * + * Provides a mechanism for issuing low-level commands to control or + * query file-oriented I/O streams. Arguments and results are platform + * dependent. If arg is a number, its value is passed + * directly. If it is a string, it is interpreted as a binary sequence + * of bytes (Array#pack might be a useful way to build this + * string). On Unix platforms, see fcntl(2) for details. + * Not implemented on all platforms. + */ + +static VALUE +rb_io_fcntl(int argc, VALUE *argv, VALUE io) +{ +#ifdef HAVE_FCNTL + VALUE req, arg; + + rb_scan_args(argc, argv, "11", &req, &arg); + return rb_io_ctl(io, req, arg, 0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * syscall(fixnum [, args...]) => integer + * + * Calls the operating system function identified by _fixnum_, + * passing in the arguments, which must be either +String+ + * objects, or +Integer+ objects that ultimately fit within + * a native +long+. Up to nine parameters may be passed (14 + * on the Atari-ST). The function identified by _fixnum_ is system + * dependent. On some Unix systems, the numbers may be obtained from a + * header file called syscall.h. + * + * syscall 4, 1, "hello\n", 6 # '4' is write(2) on our box + * + * produces: + * + * hello + */ + +static VALUE +rb_f_syscall(int argc, VALUE *argv) +{ +#if defined(HAVE_SYSCALL) && !defined(__CHECKER__) +#ifdef atarist + unsigned long arg[14]; /* yes, we really need that many ! */ +#else + unsigned long arg[8]; +#endif + int retval = -1; + int i = 1; + int items = argc - 1; + + /* This probably won't work on machines where sizeof(long) != sizeof(int) + * or where sizeof(long) != sizeof(char*). But such machines will + * not likely have syscall implemented either, so who cares? + */ + + rb_secure(2); + if (argc == 0) + rb_raise(rb_eArgError, "too few arguments for syscall"); + if (argc > sizeof(arg) / sizeof(arg[0])) + rb_raise(rb_eArgError, "too many arguments for syscall"); + arg[0] = NUM2LONG(argv[0]); argv++; + while (items--) { + VALUE v = rb_check_string_type(*argv); + + if (!NIL_P(v)) { + StringValue(v); + rb_str_modify(v); + arg[i] = (unsigned long)StringValueCStr(v); + } + else { + arg[i] = (unsigned long)NUM2LONG(*argv); + } + argv++; + i++; + } + + switch (argc) { + case 1: + retval = syscall(arg[0]); + break; + case 2: + retval = syscall(arg[0],arg[1]); + break; + case 3: + retval = syscall(arg[0],arg[1],arg[2]); + break; + case 4: + retval = syscall(arg[0],arg[1],arg[2],arg[3]); + break; + case 5: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4]); + break; + case 6: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5]); + break; + case 7: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6]); + break; + case 8: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7]); + break; +#ifdef atarist + case 9: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8]); + break; + case 10: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9]); + break; + case 11: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10]); + break; + case 12: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10], arg[11]); + break; + case 13: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10], arg[11], arg[12]); + break; + case 14: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10], arg[11], arg[12], arg[13]); + break; +#endif /* atarist */ + } + + if (retval < 0) rb_sys_fail(0); + return INT2NUM(retval); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static VALUE +io_new_instance(VALUE args) +{ + return rb_class_new_instance(2, (VALUE*)args+1, *(VALUE*)args); +} + +static void +io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt) +{ + rb_encoding *enc, *enc2; + int ecflags; + VALUE ecopts, tmp; + + if (!NIL_P(v2)) { + enc2 = rb_to_encoding(v1); + tmp = rb_check_string_type(v2); + if (!NIL_P(tmp)) { + char *p = StringValueCStr(tmp); + if (*p == '-' && *(p+1) == '\0') { + /* Special case - "-" => no transcoding */ + enc = enc2; + enc2 = NULL; + } + else + enc = rb_to_encoding(v2); + if (enc == enc2) { + /* Special case - "-" => no transcoding */ + enc2 = NULL; + } + } + else + enc = rb_to_encoding(v2); + ecflags = rb_econv_prepare_opts(opt, &ecopts); + } + else { + if (NIL_P(v1)) { + /* Set to default encodings */ + rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + ecflags = 0; + ecopts = Qnil; + } + else { + tmp = rb_check_string_type(v1); + if (!NIL_P(tmp)) { + parse_mode_enc(StringValueCStr(tmp), &enc, &enc2); + ecflags = rb_econv_prepare_opts(opt, &ecopts); + } + else { + rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2); + ecflags = 0; + ecopts = Qnil; + } + } + } + validate_enc_binmode(fptr->mode, enc, enc2); + fptr->encs.enc = enc; + fptr->encs.enc2 = enc2; + fptr->encs.ecflags = ecflags; + fptr->encs.ecopts = ecopts; + clear_codeconv(fptr); + +} + +/* + * call-seq: + * IO.pipe -> [read_io, write_io] + * IO.pipe(ext_enc) -> [read_io, write_io] + * IO.pipe("ext_enc:int_enc" [, opt]) -> [read_io, write_io] + * IO.pipe(ext_enc, int_enc [, opt]) -> [read_io, write_io] + * + * Creates a pair of pipe endpoints (connected to each other) and + * returns them as a two-element array of IO objects: + * [ read_io, write_io ]. Not + * available on all platforms. + * + * If an encoding (encoding name or encoding object) is specified as an optional argument, + * read string from pipe is tagged with the encoding specified. + * If the argument is a colon separated two encoding names "A:B", + * the read string is converted from encoding A (external encoding) + * to encoding B (internal encoding), then tagged with B. + * If two optional arguments are specified, those must be + * encoding objects or encoding names, + * and the first one is the external encoding, + * and the second one is the internal encoding. + * If the external encoding and the internal encoding is specified, + * optional hash argument specify the conversion option. + * + * In the example below, the two processes close the ends of the pipe + * that they are not using. This is not just a cosmetic nicety. The + * read end of a pipe will not generate an end of file condition if + * there are any writers with the pipe still open. In the case of the + * parent process, the rd.read will never return if it + * does not first issue a wr.close. + * + * rd, wr = IO.pipe + * + * if fork + * wr.close + * puts "Parent got: <#{rd.read}>" + * rd.close + * Process.wait + * else + * rd.close + * puts "Sending message to parent" + * wr.write "Hi Dad" + * wr.close + * end + * + * produces: + * + * Sending message to parent + * Parent got: + */ + +static VALUE +rb_io_s_pipe(int argc, VALUE *argv, VALUE klass) +{ + int pipes[2], state; + VALUE r, w, args[3], v1, v2; + VALUE opt; + rb_io_t *fptr, *fptr2; + int fmode = 0; + + opt = pop_last_hash(&argc, argv); + rb_scan_args(argc, argv, "02", &v1, &v2); + if (rb_pipe(pipes) == -1) + rb_sys_fail(0); + + args[0] = klass; + args[1] = INT2NUM(pipes[0]); + args[2] = INT2FIX(O_RDONLY); + r = rb_protect(io_new_instance, (VALUE)args, &state); + if (state) { + close(pipes[0]); + close(pipes[1]); + rb_jump_tag(state); + } + GetOpenFile(r, fptr); + io_encoding_set(fptr, v1, v2, opt); + args[1] = INT2NUM(pipes[1]); + args[2] = INT2FIX(O_WRONLY); + w = rb_protect(io_new_instance, (VALUE)args, &state); + if (state) { + close(pipes[1]); + if (!NIL_P(r)) rb_io_close(r); + rb_jump_tag(state); + } + GetOpenFile(w, fptr2); + rb_io_synchronized(fptr2); + + extract_binmode(opt, &fmode); + fptr->mode |= fmode; + fptr2->mode |= fmode; + + return rb_assoc_new(r, w); +} + +struct foreach_arg { + int argc; + VALUE *argv; + VALUE io; +}; + +static void +open_key_args(int argc, VALUE *argv, struct foreach_arg *arg) +{ + VALUE opt, v; + + FilePathValue(argv[0]); + arg->io = 0; + arg->argc = argc - 1; + arg->argv = argv + 1; + if (argc == 1) { + no_key: + arg->io = rb_io_open(argv[0], INT2NUM(O_RDONLY), INT2FIX(0666), Qnil); + return; + } + opt = pop_last_hash(&arg->argc, arg->argv); + if (NIL_P(opt)) goto no_key; + + v = rb_hash_aref(opt, sym_open_args); + if (!NIL_P(v)) { + VALUE args; + + v = rb_convert_type(v, T_ARRAY, "Array", "to_ary"); + args = rb_ary_new2(RARRAY_LEN(v)+1); + rb_ary_push(args, argv[0]); + rb_ary_concat(args, v); + + arg->io = rb_io_open_with_args(RARRAY_LEN(args), RARRAY_PTR(args)); + return; + } + arg->io = rb_io_open(argv[0], Qnil, Qnil, opt); +} + +static VALUE +io_s_foreach(struct foreach_arg *arg) +{ + VALUE str; + + while (!NIL_P(str = rb_io_gets_m(arg->argc, arg->argv, arg->io))) { + rb_yield(str); + } + return Qnil; +} + +/* + * call-seq: + * IO.foreach(name, sep=$/ [, open_args]) {|line| block } => nil + * IO.foreach(name, limit [, open_args]) {|line| block } => nil + * IO.foreach(name, sep, limit [, open_args]) {|line| block } => nil + * + * Executes the block for every line in the named I/O port, where lines + * are separated by sep. + * + * IO.foreach("testfile") {|x| print "GOT ", x } + * + * produces: + * + * GOT This is line one + * GOT This is line two + * GOT This is line three + * GOT And so on... + * + * If the last argument is a hash, it's the keyword argument to open. + * See IO.read for detail. + * + */ + +static VALUE +rb_io_s_foreach(int argc, VALUE *argv, VALUE self) +{ + struct foreach_arg arg; + + rb_scan_args(argc, argv, "13", NULL, NULL, NULL, NULL); + RETURN_ENUMERATOR(self, argc, argv); + open_key_args(argc, argv, &arg); + if (NIL_P(arg.io)) return Qnil; + return rb_ensure(io_s_foreach, (VALUE)&arg, rb_io_close, arg.io); +} + +static VALUE +io_s_readlines(struct foreach_arg *arg) +{ + return rb_io_readlines(arg->argc, arg->argv, arg->io); +} + +/* + * call-seq: + * IO.readlines(name, sep=$/ [, open_args]) => array + * IO.readlines(name, limit [, open_args]) => array + * IO.readlines(name, sep, limit [, open_args]) => array + * + * Reads the entire file specified by name as individual + * lines, and returns those lines in an array. Lines are separated by + * sep. + * + * a = IO.readlines("testfile") + * a[0] #=> "This is line one\n" + * + * If the last argument is a hash, it's the keyword argument to open. + * See IO.read for detail. + * + */ + +static VALUE +rb_io_s_readlines(int argc, VALUE *argv, VALUE io) +{ + struct foreach_arg arg; + + rb_scan_args(argc, argv, "13", NULL, NULL, NULL, NULL); + open_key_args(argc, argv, &arg); + if (NIL_P(arg.io)) return Qnil; + return rb_ensure(io_s_readlines, (VALUE)&arg, rb_io_close, arg.io); +} + +static VALUE +io_s_read(struct foreach_arg *arg) +{ + return io_read(arg->argc, arg->argv, arg->io); +} + +/* + * call-seq: + * IO.read(name, [length [, offset]] ) => string + * IO.read(name, [length [, offset]], open_args) => string + * + * Opens the file, optionally seeks to the given offset, then returns + * length bytes (defaulting to the rest of the file). + * read ensures the file is closed before returning. + * + * If the last argument is a hash, it specifies option for internal + * open(). The key would be the following. open_args: is exclusive + * to others. + * + * encoding: string or encoding + * + * specifies encoding of the read string. encoding will be ignored + * if length is specified. + * + * mode: string + * + * specifies mode argument for open(). it should start with "r" + * otherwise it would cause error. + * + * open_args: array of strings + * + * specifies arguments for open() as an array. + * + * IO.read("testfile") #=> "This is line one\nThis is line two\nThis is line three\nAnd so on...\n" + * IO.read("testfile", 20) #=> "This is line one\nThi" + * IO.read("testfile", 20, 10) #=> "ne one\nThis is line " + */ + +static VALUE +rb_io_s_read(int argc, VALUE *argv, VALUE io) +{ + VALUE offset; + struct foreach_arg arg; + + rb_scan_args(argc, argv, "13", NULL, NULL, &offset, NULL); + open_key_args(argc, argv, &arg); + if (NIL_P(arg.io)) return Qnil; + if (!NIL_P(offset)) { + rb_io_binmode(arg.io); + rb_io_seek(arg.io, offset, SEEK_SET); + if (arg.argc == 2) arg.argc = 1; + } + return rb_ensure(io_s_read, (VALUE)&arg, rb_io_close, arg.io); +} + +/* + * call-seq: + * IO.binread(name, [length [, offset]] ) => string + * + * Opens the file, optionally seeks to the given offset, then returns + * length bytes (defaulting to the rest of the file). + * read ensures the file is closed before returning. + * The open mode would be "rb:ASCII-8BIT". + * + * IO.binread("testfile") #=> "This is line one\nThis is line two\nThis is line three\nAnd so on...\n" + * IO.binread("testfile", 20) #=> "This is line one\nThi" + * IO.binread("testfile", 20, 10) #=> "ne one\nThis is line " + */ + +static VALUE +rb_io_s_binread(int argc, VALUE *argv, VALUE io) +{ + VALUE offset; + struct foreach_arg arg; + + rb_scan_args(argc, argv, "12", NULL, NULL, &offset); + FilePathValue(argv[0]); + arg.io = rb_io_open(argv[0], rb_str_new_cstr("rb:ASCII-8BIT"), Qnil, Qnil); + if (NIL_P(arg.io)) return Qnil; + arg.argv = argv+1; + arg.argc = (argc > 1) ? 1 : 0; + if (!NIL_P(offset)) { + rb_io_seek(arg.io, offset, SEEK_SET); + } + return rb_ensure(io_s_read, (VALUE)&arg, rb_io_close, arg.io); +} + +struct copy_stream_struct { + VALUE src; + VALUE dst; + off_t copy_length; /* (off_t)-1 if not specified */ + off_t src_offset; /* (off_t)-1 if not specified */ + + int src_fd; + int dst_fd; + int close_src; + int close_dst; + off_t total; + const char *syserr; + int error_no; + const char *notimp; + rb_fdset_t fds; + VALUE th; +}; + +static int +maygvl_copy_stream_wait_read(struct copy_stream_struct *stp) +{ + int ret; + rb_fd_zero(&stp->fds); + rb_fd_set(stp->src_fd, &stp->fds); + ret = rb_fd_select(rb_fd_max(&stp->fds), &stp->fds, NULL, NULL, NULL); + if (ret == -1) { + stp->syserr = "select"; + stp->error_no = errno; + return -1; + } + return 0; +} + +static int +nogvl_copy_stream_wait_write(struct copy_stream_struct *stp) +{ + int ret; + rb_fd_zero(&stp->fds); + rb_fd_set(stp->dst_fd, &stp->fds); + ret = rb_fd_select(rb_fd_max(&stp->fds), NULL, &stp->fds, NULL, NULL); + if (ret == -1) { + stp->syserr = "select"; + stp->error_no = errno; + return -1; + } + return 0; +} + +#ifdef HAVE_SENDFILE + +#ifdef __linux__ +#define USE_SENDFILE + +#ifdef HAVE_SYS_SENDFILE_H +#include +#endif + +static ssize_t +simple_sendfile(int out_fd, int in_fd, off_t *offset, size_t count) +{ + return sendfile(out_fd, in_fd, offset, count); +} + +#endif + +#endif + +#ifdef USE_SENDFILE +static int +nogvl_copy_stream_sendfile(struct copy_stream_struct *stp) +{ + struct stat src_stat, dst_stat; + ssize_t ss; + int ret; + + off_t copy_length; + off_t src_offset; + int use_pread; + + ret = fstat(stp->src_fd, &src_stat); + if (ret == -1) { + stp->syserr = "fstat"; + stp->error_no = errno; + return -1; + } + if (!S_ISREG(src_stat.st_mode)) + return 0; + + ret = fstat(stp->dst_fd, &dst_stat); + if (ret == -1) { + stp->syserr = "fstat"; + stp->error_no = errno; + return -1; + } + if ((dst_stat.st_mode & S_IFMT) != S_IFSOCK) + return 0; + + src_offset = stp->src_offset; + use_pread = src_offset != (off_t)-1; + + copy_length = stp->copy_length; + if (copy_length == (off_t)-1) { + if (use_pread) + copy_length = src_stat.st_size - src_offset; + else { + off_t cur = lseek(stp->src_fd, 0, SEEK_CUR); + if (cur == (off_t)-1) { + stp->syserr = "lseek"; + stp->error_no = errno; + return -1; + } + copy_length = src_stat.st_size - cur; + } + } + + retry_sendfile: + if (use_pread) { + ss = simple_sendfile(stp->dst_fd, stp->src_fd, &src_offset, copy_length); + } + else { + ss = simple_sendfile(stp->dst_fd, stp->src_fd, NULL, copy_length); + } + if (0 < ss) { + stp->total += ss; + copy_length -= ss; + if (0 < copy_length) { + ss = -1; + errno = EAGAIN; + } + } + if (ss == -1) { + switch (errno) { + case EINVAL: +#ifdef ENOSYS + case ENOSYS: +#endif + return 0; + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + if (nogvl_copy_stream_wait_write(stp) == -1) + return -1; + if (rb_thread_interrupted(stp->th)) + return -1; + goto retry_sendfile; + } + stp->syserr = "sendfile"; + stp->error_no = errno; + return -1; + } + return 1; +} +#endif + +static ssize_t +maygvl_copy_stream_read(struct copy_stream_struct *stp, char *buf, int len, off_t offset) +{ + ssize_t ss; + retry_read: + if (offset == (off_t)-1) + ss = read(stp->src_fd, buf, len); + else { +#ifdef HAVE_PREAD + ss = pread(stp->src_fd, buf, len, offset); +#else + stp->notimp = "pread"; + return -1; +#endif + } + if (ss == 0) { + return 0; + } + if (ss == -1) { + switch (errno) { + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + if (maygvl_copy_stream_wait_read(stp) == -1) + return -1; + goto retry_read; +#ifdef ENOSYS + case ENOSYS: +#endif + stp->notimp = "pread"; + return -1; + } + stp->syserr = offset == (off_t)-1 ? "read" : "pread"; + stp->error_no = errno; + return -1; + } + return ss; +} + +static int +nogvl_copy_stream_write(struct copy_stream_struct *stp, char *buf, int len) +{ + ssize_t ss; + int off = 0; + while (len) { + ss = write(stp->dst_fd, buf+off, len); + if (ss == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + if (nogvl_copy_stream_wait_write(stp) == -1) + return -1; + continue; + } + stp->syserr = "write"; + stp->error_no = errno; + return -1; + } + off += ss; + len -= ss; + stp->total += ss; + } + return 0; +} + +static void +nogvl_copy_stream_read_write(struct copy_stream_struct *stp) +{ + char buf[1024*16]; + int len; + ssize_t ss; + int ret; + off_t copy_length; + int use_eof; + off_t src_offset; + int use_pread; + + copy_length = stp->copy_length; + use_eof = copy_length == (off_t)-1; + src_offset = stp->src_offset; + use_pread = src_offset != (off_t)-1; + + if (use_pread && stp->close_src) { + off_t r; + r = lseek(stp->src_fd, src_offset, SEEK_SET); + if (r == (off_t)-1) { + stp->syserr = "lseek"; + stp->error_no = errno; + return; + } + src_offset = (off_t)-1; + use_pread = 0; + } + + while (use_eof || 0 < copy_length) { + if (!use_eof && copy_length < sizeof(buf)) { + len = copy_length; + } + else { + len = sizeof(buf); + } + if (use_pread) { + ss = maygvl_copy_stream_read(stp, buf, len, src_offset); + if (0 < ss) + src_offset += ss; + } + else { + ss = maygvl_copy_stream_read(stp, buf, len, (off_t)-1); + } + if (ss <= 0) /* EOF or error */ + return; + + ret = nogvl_copy_stream_write(stp, buf, ss); + if (ret < 0) + return; + + if (!use_eof) + copy_length -= ss; + + if (rb_thread_interrupted(stp->th)) + return; + } +} + +static VALUE +nogvl_copy_stream_func(void *arg) +{ + struct copy_stream_struct *stp = (struct copy_stream_struct *)arg; +#ifdef USE_SENDFILE + int ret; +#endif + +#ifdef USE_SENDFILE + ret = nogvl_copy_stream_sendfile(stp); + if (ret != 0) + goto finish; /* error or success */ +#endif + + nogvl_copy_stream_read_write(stp); + +#ifdef USE_SENDFILE + finish: +#endif + return Qnil; +} + +static VALUE +copy_stream_fallback_body(VALUE arg) +{ + struct copy_stream_struct *stp = (struct copy_stream_struct *)arg; + const int buflen = 16*1024; + VALUE n; + VALUE buf = rb_str_buf_new(buflen); + off_t rest = stp->copy_length; + off_t off = stp->src_offset; + ID read_method = id_readpartial; + + if (stp->src_fd == -1) { + if (!rb_respond_to(stp->src, read_method)) { + read_method = id_read; + } + } + + while (1) { + long numwrote; + long l; + if (stp->copy_length == (off_t)-1) { + l = buflen; + } + else { + if (rest == 0) + break; + l = buflen < rest ? buflen : (long)rest; + } + if (stp->src_fd == -1) { + rb_funcall(stp->src, read_method, 2, INT2FIX(l), buf); + } + else { + ssize_t ss; + rb_thread_wait_fd(stp->src_fd); + rb_str_resize(buf, buflen); + ss = maygvl_copy_stream_read(stp, RSTRING_PTR(buf), l, off); + if (ss == -1) + return Qnil; + if (ss == 0) + rb_eof_error(); + rb_str_resize(buf, ss); + if (off != (off_t)-1) + off += ss; + } + n = rb_io_write(stp->dst, buf); + numwrote = NUM2LONG(n); + stp->total += numwrote; + rest -= numwrote; + if (read_method == id_read && RSTRING_LEN(buf) == 0) { + break; + } + } + + return Qnil; +} + +static VALUE +copy_stream_fallback(struct copy_stream_struct *stp) +{ + if (stp->src_fd == -1 && stp->src_offset != (off_t)-1) { + rb_raise(rb_eArgError, "cannot specify src_offset for non-IO"); + } + rb_rescue2(copy_stream_fallback_body, (VALUE)stp, + (VALUE (*) (ANYARGS))0, (VALUE)0, + rb_eEOFError, (VALUE)0); + return Qnil; +} + +static VALUE +copy_stream_body(VALUE arg) +{ + struct copy_stream_struct *stp = (struct copy_stream_struct *)arg; + VALUE src_io, dst_io; + rb_io_t *src_fptr = 0, *dst_fptr = 0; + int src_fd, dst_fd; + + stp->th = rb_thread_current(); + + stp->total = 0; + + if (stp->src == argf || + !(TYPE(stp->src) == T_FILE || + TYPE(stp->src) == T_STRING || + rb_respond_to(stp->src, rb_intern("to_path")))) { + src_fd = -1; + } + else { + src_io = TYPE(stp->src) == T_FILE ? stp->src : Qnil; + if (NIL_P(src_io)) { + VALUE args[2]; + int oflags = O_RDONLY; +#ifdef O_NOCTTY + oflags |= O_NOCTTY; +#endif + FilePathValue(stp->src); + args[0] = stp->src; + args[1] = INT2NUM(oflags); + src_io = rb_class_new_instance(2, args, rb_cFile); + stp->src = src_io; + stp->close_src = 1; + } + GetOpenFile(src_io, src_fptr); + rb_io_check_readable(src_fptr); + src_fd = src_fptr->fd; + } + stp->src_fd = src_fd; + + if (stp->dst == argf || + !(TYPE(stp->dst) == T_FILE || + TYPE(stp->dst) == T_STRING || + rb_respond_to(stp->dst, rb_intern("to_path")))) { + dst_fd = -1; + } + else { + dst_io = TYPE(stp->dst) == T_FILE ? stp->dst : Qnil; + if (NIL_P(dst_io)) { + VALUE args[3]; + int oflags = O_WRONLY|O_CREAT|O_TRUNC; +#ifdef O_NOCTTY + oflags |= O_NOCTTY; +#endif + FilePathValue(stp->dst); + args[0] = stp->dst; + args[1] = INT2NUM(oflags); + args[2] = INT2FIX(0600); + dst_io = rb_class_new_instance(3, args, rb_cFile); + stp->dst = dst_io; + stp->close_dst = 1; + } + else { + dst_io = GetWriteIO(dst_io); + stp->dst = dst_io; + } + GetOpenFile(dst_io, dst_fptr); + rb_io_check_writable(dst_fptr); + dst_fd = dst_fptr->fd; + } + stp->dst_fd = dst_fd; + + if (stp->src_offset == (off_t)-1 && src_fptr && src_fptr->rbuf_len) { + long len = src_fptr->rbuf_len; + VALUE str; + if (stp->copy_length != (off_t)-1 && stp->copy_length < len) { + len = stp->copy_length; + } + str = rb_str_buf_new(len); + rb_str_resize(str,len); + read_buffered_data(RSTRING_PTR(str), len, src_fptr); + if (dst_fptr) { /* IO or filename */ + if (io_binwrite(str, dst_fptr, 0) < 0) + rb_sys_fail(0); + } + else /* others such as StringIO */ + rb_io_write(stp->dst, str); + stp->total += len; + if (stp->copy_length != (off_t)-1) + stp->copy_length -= len; + } + + if (dst_fptr && io_fflush(dst_fptr) < 0) { + rb_raise(rb_eIOError, "flush failed"); + } + + if (stp->copy_length == 0) + return Qnil; + + if (src_fd == -1 || dst_fd == -1) { + return copy_stream_fallback(stp); + } + + rb_fd_init(&stp->fds); + rb_fd_set(src_fd, &stp->fds); + rb_fd_set(dst_fd, &stp->fds); + + return rb_thread_blocking_region(nogvl_copy_stream_func, (void*)stp, RUBY_UBF_IO, 0); +} + +static VALUE +copy_stream_finalize(VALUE arg) +{ + struct copy_stream_struct *stp = (struct copy_stream_struct *)arg; + if (stp->close_src) { + rb_io_close_m(stp->src); + } + if (stp->close_dst) { + rb_io_close_m(stp->dst); + } + rb_fd_term(&stp->fds); + if (stp->syserr) { + errno = stp->error_no; + rb_sys_fail(stp->syserr); + } + if (stp->notimp) { + rb_raise(rb_eNotImpError, "%s() not implemented", stp->notimp); + } + return Qnil; +} + +/* + * call-seq: + * IO.copy_stream(src, dst) + * IO.copy_stream(src, dst, copy_length) + * IO.copy_stream(src, dst, copy_length, src_offset) + * + * IO.copy_stream copies src to dst. + * src and dst is either a filename or an IO. + * + * This method returns the number of bytes copied. + * + * If optional arguments are not given, + * the start position of the copy is + * the beginning of the filename or + * the current file offset of the IO. + * The end position of the copy is the end of file. + * + * If copy_length is given, + * No more than copy_length bytes are copied. + * + * If src_offset is given, + * it specifies the start position of the copy. + * + * When src_offset is specified and + * src is an IO, + * IO.copy_stream doesn't move the current file offset. + * + */ +static VALUE +rb_io_s_copy_stream(int argc, VALUE *argv, VALUE io) +{ + VALUE src, dst, length, src_offset; + struct copy_stream_struct st; + + MEMZERO(&st, struct copy_stream_struct, 1); + + rb_scan_args(argc, argv, "22", &src, &dst, &length, &src_offset); + + st.src = src; + st.dst = dst; + + if (NIL_P(length)) + st.copy_length = (off_t)-1; + else + st.copy_length = NUM2OFFT(length); + + if (NIL_P(src_offset)) + st.src_offset = (off_t)-1; + else + st.src_offset = NUM2OFFT(src_offset); + + rb_ensure(copy_stream_body, (VALUE)&st, copy_stream_finalize, (VALUE)&st); + + return OFFT2NUM(st.total); +} + +/* + * call-seq: + * io.external_encoding => encoding + * + * Returns the Encoding object that represents the encoding of the file. + * If io is write mode and no encoding is specified, returns nil. + */ + +static VALUE +rb_io_external_encoding(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (fptr->encs.enc2) { + return rb_enc_from_encoding(fptr->encs.enc2); + } + if (fptr->mode & FMODE_WRITABLE) { + if (fptr->encs.enc) + return rb_enc_from_encoding(fptr->encs.enc); + return Qnil; + } + return rb_enc_from_encoding(io_read_encoding(fptr)); +} + +/* + * call-seq: + * io.internal_encoding => encoding + * + * Returns the Encoding of the internal string if conversion is + * specified. Otherwise returns nil. + */ + +static VALUE +rb_io_internal_encoding(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (!fptr->encs.enc2) return Qnil; + return rb_enc_from_encoding(io_read_encoding(fptr)); +} + +/* + * call-seq: + * io.set_encoding(ext_enc) => io + * io.set_encoding("ext_enc:int_enc") => io + * io.set_encoding(ext_enc, int_enc) => io + * io.set_encoding("ext_enc:int_enc", opt) => io + * io.set_encoding(ext_enc, int_enc, opt) => io + * + * If single argument is specified, read string from io is tagged + * with the encoding specified. If encoding is a colon separated two + * encoding names "A:B", the read string is converted from encoding A + * (external encoding) to encoding B (internal encoding), then tagged + * with B. If two arguments are specified, those must be encoding + * objects or encoding names, and the first one is the external encoding, and the + * second one is the internal encoding. + * If the external encoding and the internal encoding is specified, + * optional hash argument specify the conversion option. + */ + +static VALUE +rb_io_set_encoding(int argc, VALUE *argv, VALUE io) +{ + rb_io_t *fptr; + VALUE v1, v2, opt; + + opt = pop_last_hash(&argc, argv); + rb_scan_args(argc, argv, "11", &v1, &v2); + GetOpenFile(io, fptr); + io_encoding_set(fptr, v1, v2, opt); + return io; +} + +void +rb_stdio_set_default_encoding(void) +{ + extern VALUE rb_stdin, rb_stdout, rb_stderr; + VALUE val = Qnil; + + rb_io_set_encoding(1, &val, rb_stdin); + rb_io_set_encoding(1, &val, rb_stdout); + rb_io_set_encoding(1, &val, rb_stderr); +} + +static VALUE +argf_external_encoding(VALUE argf) +{ + if (!RTEST(ARGF.current_file)) { + return rb_enc_from_encoding(rb_default_external_encoding()); + } + return rb_io_external_encoding(rb_io_check_io(ARGF.current_file)); +} + +static VALUE +argf_internal_encoding(VALUE argf) +{ + if (!RTEST(ARGF.current_file)) { + return rb_enc_from_encoding(rb_default_external_encoding()); + } + return rb_io_internal_encoding(rb_io_check_io(ARGF.current_file)); +} + +static VALUE +argf_set_encoding(int argc, VALUE *argv, VALUE argf) +{ + rb_io_t *fptr; + + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to set encoding"); + } + rb_io_set_encoding(argc, argv, ARGF.current_file); + GetOpenFile(ARGF.current_file, fptr); + ARGF.encs = fptr->encs; + return argf; +} + +static VALUE +argf_tell(VALUE argf) +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to tell"); + } + ARGF_FORWARD(0, 0); + return rb_io_tell(ARGF.current_file); +} + +static VALUE +argf_seek_m(int argc, VALUE *argv, VALUE argf) +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to seek"); + } + ARGF_FORWARD(argc, argv); + return rb_io_seek_m(argc, argv, ARGF.current_file); +} + +static VALUE +argf_set_pos(VALUE argf, VALUE offset) +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to set position"); + } + ARGF_FORWARD(1, &offset); + return rb_io_set_pos(ARGF.current_file, offset); +} + +static VALUE +argf_rewind(VALUE argf) +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to rewind"); + } + ARGF_FORWARD(0, 0); + return rb_io_rewind(ARGF.current_file); +} + +static VALUE +argf_fileno(VALUE argf) +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream"); + } + ARGF_FORWARD(0, 0); + return rb_io_fileno(ARGF.current_file); +} + +static VALUE +argf_to_io(VALUE argf) +{ + next_argv(); + ARGF_FORWARD(0, 0); + return ARGF.current_file; +} + +static VALUE +argf_eof(VALUE argf) +{ + if (RTEST(ARGF.current_file)) { + if (ARGF.init_p == 0) return Qtrue; + ARGF_FORWARD(0, 0); + if (rb_io_eof(ARGF.current_file)) { + return Qtrue; + } + } + return Qfalse; +} + +static VALUE +argf_read(int argc, VALUE *argv, VALUE argf) +{ + VALUE tmp, str, length; + long len = 0; + + rb_scan_args(argc, argv, "02", &length, &str); + if (!NIL_P(length)) { + len = NUM2LONG(argv[0]); + } + if (!NIL_P(str)) { + StringValue(str); + rb_str_resize(str,0); + argv[1] = Qnil; + } + + retry: + if (!next_argv()) { + return str; + } + if (ARGF_GENERIC_INPUT_P()) { + tmp = argf_forward(argc, argv, argf); + } + else { + tmp = io_read(argc, argv, ARGF.current_file); + } + if (NIL_P(str)) str = tmp; + else if (!NIL_P(tmp)) rb_str_append(str, tmp); + if (NIL_P(tmp) || NIL_P(length)) { + if (ARGF.next_p != -1) { + argf_close(ARGF.current_file); + ARGF.next_p = 1; + goto retry; + } + } + else if (argc >= 1) { + if (RSTRING_LEN(str) < len) { + len -= RSTRING_LEN(str); + argv[0] = INT2NUM(len); + goto retry; + } + } + return str; +} + +struct argf_call_arg { + int argc; + VALUE *argv; + VALUE argf; +}; + +static VALUE +argf_forward_call(VALUE arg) +{ + struct argf_call_arg *p = (struct argf_call_arg *)arg; + argf_forward(p->argc, p->argv, p->argf); + return Qnil; +} + +static VALUE +argf_readpartial(int argc, VALUE *argv, VALUE argf) +{ + VALUE tmp, str, length; + + rb_scan_args(argc, argv, "11", &length, &str); + if (!NIL_P(str)) { + StringValue(str); + argv[1] = str; + } + + if (!next_argv()) { + rb_str_resize(str, 0); + rb_eof_error(); + } + if (ARGF_GENERIC_INPUT_P()) { + struct argf_call_arg arg; + arg.argc = argc; + arg.argv = argv; + arg.argf = argf; + tmp = rb_rescue2(argf_forward_call, (VALUE)&arg, + RUBY_METHOD_FUNC(0), Qnil, rb_eEOFError, (VALUE)0); + } + else { + tmp = io_getpartial(argc, argv, ARGF.current_file, 0); + } + if (NIL_P(tmp)) { + if (ARGF.next_p == -1) { + rb_eof_error(); + } + argf_close(ARGF.current_file); + ARGF.next_p = 1; + if (RARRAY_LEN(ARGF.argv) == 0) + rb_eof_error(); + if (NIL_P(str)) + str = rb_str_new(NULL, 0); + return str; + } + return tmp; +} + +static VALUE +argf_getc(VALUE argf) +{ + VALUE ch; + + retry: + if (!next_argv()) return Qnil; + if (ARGF_GENERIC_INPUT_P()) { + ch = rb_funcall3(ARGF.current_file, rb_intern("getc"), 0, 0); + } + else { + ch = rb_io_getc(ARGF.current_file); + } + if (NIL_P(ch) && ARGF.next_p != -1) { + argf_close(ARGF.current_file); + ARGF.next_p = 1; + goto retry; + } + + return ch; +} + +static VALUE +argf_getbyte(VALUE argf) +{ + VALUE ch; + + retry: + if (!next_argv()) return Qnil; + if (TYPE(ARGF.current_file) != T_FILE) { + ch = rb_funcall3(ARGF.current_file, rb_intern("getbyte"), 0, 0); + } + else { + ch = rb_io_getbyte(ARGF.current_file); + } + if (NIL_P(ch) && ARGF.next_p != -1) { + argf_close(ARGF.current_file); + ARGF.next_p = 1; + goto retry; + } + + return ch; +} + +static VALUE +argf_readchar(VALUE argf) +{ + VALUE ch; + + retry: + if (!next_argv()) rb_eof_error(); + if (TYPE(ARGF.current_file) != T_FILE) { + ch = rb_funcall3(ARGF.current_file, rb_intern("getc"), 0, 0); + } + else { + ch = rb_io_getc(ARGF.current_file); + } + if (NIL_P(ch) && ARGF.next_p != -1) { + argf_close(ARGF.current_file); + ARGF.next_p = 1; + goto retry; + } + + return ch; +} + +static VALUE +argf_readbyte(VALUE argf) +{ + VALUE c; + + NEXT_ARGF_FORWARD(0, 0); + c = argf_getbyte(argf); + if (NIL_P(c)) { + rb_eof_error(); + } + return c; +} + +static VALUE +argf_each_line(int argc, VALUE *argv, VALUE argf) +{ + RETURN_ENUMERATOR(argf, argc, argv); + for (;;) { + if (!next_argv()) return argf; + rb_block_call(ARGF.current_file, rb_intern("each_line"), argc, argv, rb_yield, 0); + ARGF.next_p = 1; + } +} + +static VALUE +argf_each_byte(VALUE argf) +{ + RETURN_ENUMERATOR(argf, 0, 0); + for (;;) { + if (!next_argv()) return argf; + rb_block_call(ARGF.current_file, rb_intern("each_byte"), 0, 0, rb_yield, 0); + ARGF.next_p = 1; + } +} + +static VALUE +argf_each_char(VALUE argf) +{ + RETURN_ENUMERATOR(argf, 0, 0); + for (;;) { + if (!next_argv()) return argf; + rb_block_call(ARGF.current_file, rb_intern("each_char"), 0, 0, rb_yield, 0); + ARGF.next_p = 1; + } +} + +static VALUE +argf_filename(VALUE argf) +{ + next_argv(); + return ARGF.filename; +} + +static VALUE +argf_filename_getter(ID id, VALUE *var) +{ + return argf_filename(*var); +} + +static VALUE +argf_file(VALUE argf) +{ + next_argv(); + return ARGF.current_file; +} + +static VALUE +argf_binmode_m(VALUE argf) +{ + ARGF.binmode = 1; + next_argv(); + ARGF_FORWARD(0, 0); + rb_io_ascii8bit_binmode(ARGF.current_file); + + return argf; +} + +static VALUE +argf_binmode_p(VALUE argf) +{ + return ARGF.binmode ? Qtrue : Qfalse; +} + +static VALUE +argf_skip(VALUE argf) +{ + if (ARGF.init_p && ARGF.next_p == 0) { + argf_close(ARGF.current_file); + ARGF.next_p = 1; + } + return argf; +} + +static VALUE +argf_close_m(VALUE argf) +{ + next_argv(); + argf_close(ARGF.current_file); + if (ARGF.next_p != -1) { + ARGF.next_p = 1; + } + ARGF.lineno = 0; + return argf; +} + +static VALUE +argf_closed(VALUE argf) +{ + next_argv(); + ARGF_FORWARD(0, 0); + return rb_io_closed(ARGF.current_file); +} + +static VALUE +argf_to_s(VALUE argf) +{ + return rb_str_new2("ARGF"); +} + +static VALUE +argf_inplace_mode_get(VALUE argf) +{ + if (!ARGF.inplace) return Qnil; + return rb_str_new2(ARGF.inplace); +} + +static VALUE +opt_i_get(ID id, VALUE *var) +{ + return argf_inplace_mode_get(*var); +} + +static VALUE +argf_inplace_mode_set(VALUE argf, VALUE val) +{ + if (!RTEST(val)) { + if (ARGF.inplace) free(ARGF.inplace); + ARGF.inplace = 0; + } + else { + StringValue(val); + if (ARGF.inplace) free(ARGF.inplace); + ARGF.inplace = 0; + ARGF.inplace = strdup(RSTRING_PTR(val)); + } + return argf; +} + +static void +opt_i_set(VALUE val, ID id, VALUE *var) +{ + argf_inplace_mode_set(*var, val); +} + +const char * +ruby_get_inplace_mode(void) +{ + return ARGF.inplace; +} + +void +ruby_set_inplace_mode(const char *suffix) +{ + if (ARGF.inplace) free(ARGF.inplace); + ARGF.inplace = 0; + if (suffix) ARGF.inplace = strdup(suffix); +} + +static VALUE +argf_argv(VALUE argf) +{ + return ARGF.argv; +} + +static VALUE +argf_argv_getter(ID id, VALUE *var) +{ + return argf_argv(*var); +} + +VALUE +rb_get_argv(void) +{ + return ARGF.argv; +} + +/* + * Class IO is the basis for all input and output in Ruby. + * An I/O stream may be duplexed (that is, bidirectional), and + * so may use more than one native operating system stream. + * + * Many of the examples in this section use class File, + * the only standard subclass of IO. The two classes are + * closely associated. + * + * As used in this section, portname may take any of the + * following forms. + * + * * A plain string represents a filename suitable for the underlying + * operating system. + * + * * A string starting with ``|'' indicates a subprocess. + * The remainder of the string following the ``|'' is + * invoked as a process with appropriate input/output channels + * connected to it. + * + * * A string equal to ``|-'' will create another Ruby + * instance as a subprocess. + * + * Ruby will convert pathnames between different operating system + * conventions if possible. For instance, on a Windows system the + * filename ``/gumby/ruby/test.rb'' will be opened as + * ``\gumby\ruby\test.rb''. When specifying a + * Windows-style filename in a Ruby string, remember to escape the + * backslashes: + * + * "c:\\gumby\\ruby\\test.rb" + * + * Our examples here will use the Unix-style forward slashes; + * File::SEPARATOR can be used to get the + * platform-specific separator character. + * + * I/O ports may be opened in any one of several different modes, which + * are shown in this section as mode. The mode may + * either be a Fixnum or a String. If numeric, it should be + * one of the operating system specific constants (O_RDONLY, + * O_WRONLY, O_RDWR, O_APPEND and so on). See man open(2) for + * more information. + * + * If the mode is given as a String, it must be one of the + * values listed in the following table. + * + * Mode | Meaning + * -----+-------------------------------------------------------- + * "r" | Read-only, starts at beginning of file (default mode). + * -----+-------------------------------------------------------- + * "r+" | Read-write, starts at beginning of file. + * -----+-------------------------------------------------------- + * "w" | Write-only, truncates existing file + * | to zero length or creates a new file for writing. + * -----+-------------------------------------------------------- + * "w+" | Read-write, truncates existing file to zero length + * | or creates a new file for reading and writing. + * -----+-------------------------------------------------------- + * "a" | Write-only, starts at end of file if file exists, + * | otherwise creates a new file for writing. + * -----+-------------------------------------------------------- + * "a+" | Read-write, starts at end of file if file exists, + * | otherwise creates a new file for reading and + * | writing. + * -----+-------------------------------------------------------- + * "b" | Binary file mode (may appear with + * | any of the key letters listed above). + * | Suppresses EOL <-> CRLF conversion on Windows. And + * | sets external encoding to ASCII-8BIT unless explicitly + * | specified. + * -----+-------------------------------------------------------- + * "t" | Text file mode (may appear with + * | any of the key letters listed above except "b"). + * + * + * The global constant ARGF (also accessible as $<) provides an + * IO-like stream which allows access to all files mentioned on the + * command line (or STDIN if no files are mentioned). ARGF provides + * the methods #path and #filename to access + * the name of the file currently being read. + */ + +void +Init_IO(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + VALUE rb_cARGF; +#ifdef __CYGWIN__ +#include + static struct __cygwin_perfile pf[] = + { + {"", O_RDONLY | O_BINARY}, + {"", O_WRONLY | O_BINARY}, + {"", O_RDWR | O_BINARY}, + {"", O_APPEND | O_BINARY}, + {NULL, 0} + }; + cygwin_internal(CW_PERFILE, pf); +#endif + + rb_eIOError = rb_define_class("IOError", rb_eStandardError); + rb_eEOFError = rb_define_class("EOFError", rb_eIOError); + + id_write = rb_intern("write"); + id_read = rb_intern("read"); + id_getc = rb_intern("getc"); + id_flush = rb_intern("flush"); + id_readpartial = rb_intern("readpartial"); + + rb_define_global_function("syscall", rb_f_syscall, -1); + + rb_define_global_function("open", rb_f_open, -1); + rb_define_global_function("printf", rb_f_printf, -1); + rb_define_global_function("print", rb_f_print, -1); + rb_define_global_function("putc", rb_f_putc, 1); + rb_define_global_function("puts", rb_f_puts, -1); + rb_define_global_function("gets", rb_f_gets, -1); + rb_define_global_function("readline", rb_f_readline, -1); + rb_define_global_function("select", rb_f_select, -1); + + rb_define_global_function("readlines", rb_f_readlines, -1); + + rb_define_global_function("`", rb_f_backquote, 1); + + rb_define_global_function("p", rb_f_p, -1); + rb_define_method(rb_mKernel, "display", rb_obj_display, -1); + + rb_cIO = rb_define_class("IO", rb_cObject); + rb_include_module(rb_cIO, rb_mEnumerable); + +#if 0 + /* This is necessary only for forcing rdoc handle File::open */ + rb_define_singleton_method(rb_cFile, "open", rb_io_s_open, -1); +#endif + + rb_define_alloc_func(rb_cIO, io_alloc); + rb_define_singleton_method(rb_cIO, "new", rb_io_s_new, -1); + rb_define_singleton_method(rb_cIO, "open", rb_io_s_open, -1); + rb_define_singleton_method(rb_cIO, "sysopen", rb_io_s_sysopen, -1); + rb_define_singleton_method(rb_cIO, "for_fd", rb_io_s_for_fd, -1); + rb_define_singleton_method(rb_cIO, "popen", rb_io_s_popen, -1); + rb_define_singleton_method(rb_cIO, "foreach", rb_io_s_foreach, -1); + rb_define_singleton_method(rb_cIO, "readlines", rb_io_s_readlines, -1); + rb_define_singleton_method(rb_cIO, "read", rb_io_s_read, -1); + rb_define_singleton_method(rb_cIO, "binread", rb_io_s_binread, -1); + rb_define_singleton_method(rb_cIO, "select", rb_f_select, -1); + rb_define_singleton_method(rb_cIO, "pipe", rb_io_s_pipe, -1); + rb_define_singleton_method(rb_cIO, "try_convert", rb_io_s_try_convert, 1); + rb_define_singleton_method(rb_cIO, "copy_stream", rb_io_s_copy_stream, -1); + + rb_define_method(rb_cIO, "initialize", rb_io_initialize, -1); + + rb_output_fs = Qnil; + rb_define_hooked_variable("$,", &rb_output_fs, 0, rb_str_setter); + + rb_rs = rb_default_rs = rb_str_new2("\n"); + rb_gc_register_mark_object(rb_default_rs); + rb_output_rs = Qnil; + OBJ_FREEZE(rb_default_rs); /* avoid modifying RS_default */ + rb_define_hooked_variable("$/", &rb_rs, 0, rb_str_setter); + rb_define_hooked_variable("$-0", &rb_rs, 0, rb_str_setter); + rb_define_hooked_variable("$\\", &rb_output_rs, 0, rb_str_setter); + + rb_define_virtual_variable("$_", rb_lastline_get, rb_lastline_set); + + rb_define_method(rb_cIO, "initialize_copy", rb_io_init_copy, 1); + rb_define_method(rb_cIO, "reopen", rb_io_reopen, -1); + + rb_define_method(rb_cIO, "print", rb_io_print, -1); + rb_define_method(rb_cIO, "putc", rb_io_putc, 1); + rb_define_method(rb_cIO, "puts", rb_io_puts, -1); + rb_define_method(rb_cIO, "printf", rb_io_printf, -1); + + rb_define_method(rb_cIO, "each", rb_io_each_line, -1); + rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1); + rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0); + rb_define_method(rb_cIO, "each_char", rb_io_each_char, 0); + rb_define_method(rb_cIO, "lines", rb_io_lines, -1); + rb_define_method(rb_cIO, "bytes", rb_io_bytes, 0); + rb_define_method(rb_cIO, "chars", rb_io_chars, 0); + + rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1); + rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1); + + rb_define_method(rb_cIO, "fileno", rb_io_fileno, 0); + rb_define_alias(rb_cIO, "to_i", "fileno"); + rb_define_method(rb_cIO, "to_io", rb_io_to_io, 0); + + rb_define_method(rb_cIO, "fsync", rb_io_fsync, 0); + rb_define_method(rb_cIO, "sync", rb_io_sync, 0); + rb_define_method(rb_cIO, "sync=", rb_io_set_sync, 1); + + rb_define_method(rb_cIO, "lineno", rb_io_lineno, 0); + rb_define_method(rb_cIO, "lineno=", rb_io_set_lineno, 1); + + rb_define_method(rb_cIO, "readlines", rb_io_readlines, -1); + + rb_define_method(rb_cIO, "read_nonblock", io_read_nonblock, -1); + rb_define_method(rb_cIO, "write_nonblock", rb_io_write_nonblock, 1); + rb_define_method(rb_cIO, "readpartial", io_readpartial, -1); + rb_define_method(rb_cIO, "read", io_read, -1); + rb_define_method(rb_cIO, "write", io_write_m, 1); + rb_define_method(rb_cIO, "gets", rb_io_gets_m, -1); + rb_define_method(rb_cIO, "readline", rb_io_readline, -1); + rb_define_method(rb_cIO, "getc", rb_io_getc, 0); + rb_define_method(rb_cIO, "getbyte", rb_io_getbyte, 0); + rb_define_method(rb_cIO, "readchar", rb_io_readchar, 0); + rb_define_method(rb_cIO, "readbyte", rb_io_readbyte, 0); + rb_define_method(rb_cIO, "ungetbyte",rb_io_ungetbyte, 1); + rb_define_method(rb_cIO, "ungetc",rb_io_ungetc, 1); + rb_define_method(rb_cIO, "<<", rb_io_addstr, 1); + rb_define_method(rb_cIO, "flush", rb_io_flush, 0); + rb_define_method(rb_cIO, "tell", rb_io_tell, 0); + rb_define_method(rb_cIO, "seek", rb_io_seek_m, -1); + rb_define_const(rb_cIO, "SEEK_SET", INT2FIX(SEEK_SET)); + rb_define_const(rb_cIO, "SEEK_CUR", INT2FIX(SEEK_CUR)); + rb_define_const(rb_cIO, "SEEK_END", INT2FIX(SEEK_END)); + rb_define_method(rb_cIO, "rewind", rb_io_rewind, 0); + rb_define_method(rb_cIO, "pos", rb_io_tell, 0); + rb_define_method(rb_cIO, "pos=", rb_io_set_pos, 1); + rb_define_method(rb_cIO, "eof", rb_io_eof, 0); + rb_define_method(rb_cIO, "eof?", rb_io_eof, 0); + + rb_define_method(rb_cIO, "close_on_exec?", rb_io_close_on_exec_p, 0); + rb_define_method(rb_cIO, "close_on_exec=", rb_io_set_close_on_exec, 1); + + rb_define_method(rb_cIO, "close", rb_io_close_m, 0); + rb_define_method(rb_cIO, "closed?", rb_io_closed, 0); + rb_define_method(rb_cIO, "close_read", rb_io_close_read, 0); + rb_define_method(rb_cIO, "close_write", rb_io_close_write, 0); + + rb_define_method(rb_cIO, "isatty", rb_io_isatty, 0); + rb_define_method(rb_cIO, "tty?", rb_io_isatty, 0); + rb_define_method(rb_cIO, "binmode", rb_io_binmode_m, 0); + rb_define_method(rb_cIO, "binmode?", rb_io_binmode_p, 0); + rb_define_method(rb_cIO, "sysseek", rb_io_sysseek, -1); + + rb_define_method(rb_cIO, "ioctl", rb_io_ioctl, -1); + rb_define_method(rb_cIO, "fcntl", rb_io_fcntl, -1); + rb_define_method(rb_cIO, "pid", rb_io_pid, 0); + rb_define_method(rb_cIO, "inspect", rb_io_inspect, 0); + + rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0); + rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0); + rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1); + + rb_define_variable("$stdin", &rb_stdin); + rb_stdin = prep_stdio(stdin, FMODE_READABLE, rb_cIO, ""); + rb_define_hooked_variable("$stdout", &rb_stdout, 0, stdout_setter); + rb_stdout = prep_stdio(stdout, FMODE_WRITABLE, rb_cIO, ""); + rb_define_hooked_variable("$stderr", &rb_stderr, 0, stdout_setter); + rb_stderr = prep_stdio(stderr, FMODE_WRITABLE|FMODE_SYNC, rb_cIO, ""); + rb_define_hooked_variable("$>", &rb_stdout, 0, stdout_setter); + orig_stdout = rb_stdout; + rb_deferr = orig_stderr = rb_stderr; + + /* constants to hold original stdin/stdout/stderr */ + rb_define_global_const("STDIN", rb_stdin); + rb_define_global_const("STDOUT", rb_stdout); + rb_define_global_const("STDERR", rb_stderr); + + rb_cARGF = rb_class_new(rb_cObject); + rb_set_class_path(rb_cARGF, rb_cObject, "ARGF.class"); + rb_define_alloc_func(rb_cARGF, argf_alloc); + + rb_include_module(rb_cARGF, rb_mEnumerable); + + rb_define_method(rb_cARGF, "initialize", argf_initialize, -2); + rb_define_method(rb_cARGF, "initialize_copy", argf_initialize_copy, 1); + rb_define_method(rb_cARGF, "to_s", argf_to_s, 0); + rb_define_method(rb_cARGF, "argv", argf_argv, 0); + + rb_define_method(rb_cARGF, "fileno", argf_fileno, 0); + rb_define_method(rb_cARGF, "to_i", argf_fileno, 0); + rb_define_method(rb_cARGF, "to_io", argf_to_io, 0); + rb_define_method(rb_cARGF, "each", argf_each_line, -1); + rb_define_method(rb_cARGF, "each_line", argf_each_line, -1); + rb_define_method(rb_cARGF, "each_byte", argf_each_byte, 0); + rb_define_method(rb_cARGF, "each_char", argf_each_char, 0); + rb_define_method(rb_cARGF, "lines", argf_each_line, -1); + rb_define_method(rb_cARGF, "bytes", argf_each_byte, 0); + rb_define_method(rb_cARGF, "chars", argf_each_char, 0); + + rb_define_method(rb_cARGF, "read", argf_read, -1); + rb_define_method(rb_cARGF, "readpartial", argf_readpartial, -1); + rb_define_method(rb_cARGF, "readlines", argf_readlines, -1); + rb_define_method(rb_cARGF, "to_a", argf_readlines, -1); + rb_define_method(rb_cARGF, "gets", argf_gets, -1); + rb_define_method(rb_cARGF, "readline", argf_readline, -1); + rb_define_method(rb_cARGF, "getc", argf_getc, 0); + rb_define_method(rb_cARGF, "getbyte", argf_getbyte, 0); + rb_define_method(rb_cARGF, "readchar", argf_readchar, 0); + rb_define_method(rb_cARGF, "readbyte", argf_readbyte, 0); + rb_define_method(rb_cARGF, "tell", argf_tell, 0); + rb_define_method(rb_cARGF, "seek", argf_seek_m, -1); + rb_define_method(rb_cARGF, "rewind", argf_rewind, 0); + rb_define_method(rb_cARGF, "pos", argf_tell, 0); + rb_define_method(rb_cARGF, "pos=", argf_set_pos, 1); + rb_define_method(rb_cARGF, "eof", argf_eof, 0); + rb_define_method(rb_cARGF, "eof?", argf_eof, 0); + rb_define_method(rb_cARGF, "binmode", argf_binmode_m, 0); + rb_define_method(rb_cARGF, "binmode?", argf_binmode_p, 0); + + rb_define_method(rb_cARGF, "filename", argf_filename, 0); + rb_define_method(rb_cARGF, "path", argf_filename, 0); + rb_define_method(rb_cARGF, "file", argf_file, 0); + rb_define_method(rb_cARGF, "skip", argf_skip, 0); + rb_define_method(rb_cARGF, "close", argf_close_m, 0); + rb_define_method(rb_cARGF, "closed?", argf_closed, 0); + + rb_define_method(rb_cARGF, "lineno", argf_lineno, 0); + rb_define_method(rb_cARGF, "lineno=", argf_set_lineno, 1); + + rb_define_method(rb_cARGF, "inplace_mode", argf_inplace_mode_get, 0); + rb_define_method(rb_cARGF, "inplace_mode=", argf_inplace_mode_set, 1); + + rb_define_method(rb_cARGF, "external_encoding", argf_external_encoding, 0); + rb_define_method(rb_cARGF, "internal_encoding", argf_internal_encoding, 0); + rb_define_method(rb_cARGF, "set_encoding", argf_set_encoding, -1); + + argf = rb_class_new_instance(0, 0, rb_cARGF); + + rb_define_readonly_variable("$<", &argf); + rb_define_global_const("ARGF", argf); + + rb_define_hooked_variable("$.", &argf, argf_lineno_getter, argf_lineno_setter); + rb_define_hooked_variable("$FILENAME", &argf, argf_filename_getter, rb_gvar_readonly_setter); + ARGF.filename = rb_str_new2("-"); + + rb_define_hooked_variable("$-i", &argf, opt_i_get, opt_i_set); + rb_define_hooked_variable("$*", &argf, argf_argv_getter, rb_gvar_readonly_setter); + +#if defined (_WIN32) || defined(__CYGWIN__) + atexit(pipe_atexit); +#endif + + Init_File(); + + rb_define_method(rb_cFile, "initialize", rb_file_initialize, -1); + + rb_file_const("RDONLY", INT2FIX(O_RDONLY)); + rb_file_const("WRONLY", INT2FIX(O_WRONLY)); + rb_file_const("RDWR", INT2FIX(O_RDWR)); + rb_file_const("APPEND", INT2FIX(O_APPEND)); + rb_file_const("CREAT", INT2FIX(O_CREAT)); + rb_file_const("EXCL", INT2FIX(O_EXCL)); +#if defined(O_NDELAY) || defined(O_NONBLOCK) +# ifdef O_NONBLOCK + rb_file_const("NONBLOCK", INT2FIX(O_NONBLOCK)); +# else + rb_file_const("NONBLOCK", INT2FIX(O_NDELAY)); +# endif +#endif + rb_file_const("TRUNC", INT2FIX(O_TRUNC)); +#ifdef O_NOCTTY + rb_file_const("NOCTTY", INT2FIX(O_NOCTTY)); +#endif +#ifdef O_BINARY + rb_file_const("BINARY", INT2FIX(O_BINARY)); +#else + rb_file_const("BINARY", INT2FIX(0)); +#endif +#ifdef O_SYNC + rb_file_const("SYNC", INT2FIX(O_SYNC)); +#endif +#ifdef O_DSYNC + rb_file_const("DSYNC", INT2FIX(O_DSYNC)); +#endif +#ifdef O_RSYNC + rb_file_const("RSYNC", INT2FIX(O_RSYNC)); +#endif +#ifdef O_NOFOLLOW + rb_file_const("NOFOLLOW", INT2FIX(O_NOFOLLOW)); /* FreeBSD, Linux */ +#endif + + sym_mode = ID2SYM(rb_intern("mode")); + sym_perm = ID2SYM(rb_intern("perm")); + sym_extenc = ID2SYM(rb_intern("external_encoding")); + sym_intenc = ID2SYM(rb_intern("internal_encoding")); + sym_encoding = ID2SYM(rb_intern("encoding")); + sym_open_args = ID2SYM(rb_intern("open_args")); + sym_textmode = ID2SYM(rb_intern("textmode")); + sym_binmode = ID2SYM(rb_intern("binmode")); +} diff --git a/iseq.c b/iseq.c new file mode 100644 index 0000000..6bf6a0a --- /dev/null +++ b/iseq.c @@ -0,0 +1,1372 @@ +/********************************************************************** + + iseq.c - + + $Author: yugui $ + created at: 2006-07-11(Tue) 09:00:03 +0900 + + Copyright (C) 2006 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" + +/* #define MARK_FREE_DEBUG 1 */ +#include "gc.h" +#include "vm_core.h" +#include "iseq.h" + +#include "insns.inc" +#include "insns_info.inc" + +VALUE rb_cISeq; + +#define hidden_obj_p(obj) (!SPECIAL_CONST_P(obj) && !RBASIC(obj)->klass) + +static void +compile_data_free(struct iseq_compile_data *compile_data) +{ + if (compile_data) { + struct iseq_compile_data_storage *cur, *next; + cur = compile_data->storage_head; + while (cur) { + next = cur->next; + ruby_xfree(cur); + cur = next; + } + ruby_xfree(compile_data); + } +} + +static void +iseq_free(void *ptr) +{ + rb_iseq_t *iseq; + RUBY_FREE_ENTER("iseq"); + + if (ptr) { + iseq = ptr; + if (!iseq->orig) { + /* It's possible that strings are freed + * GC_INFO("%s @ %s\n", RSTRING_PTR(iseq->name), + * RSTRING_PTR(iseq->filename)); + */ + if (iseq->iseq != iseq->iseq_encoded) { + RUBY_FREE_UNLESS_NULL(iseq->iseq_encoded); + } + + RUBY_FREE_UNLESS_NULL(iseq->iseq); + RUBY_FREE_UNLESS_NULL(iseq->insn_info_table); + RUBY_FREE_UNLESS_NULL(iseq->local_table); + RUBY_FREE_UNLESS_NULL(iseq->catch_table); + RUBY_FREE_UNLESS_NULL(iseq->arg_opt_table); + compile_data_free(iseq->compile_data); + } + ruby_xfree(ptr); + } + RUBY_FREE_LEAVE("iseq"); +} + +static void +iseq_mark(void *ptr) +{ + rb_iseq_t *iseq; + RUBY_MARK_ENTER("iseq"); + + if (ptr) { + iseq = ptr; + RUBY_GC_INFO("%s @ %s\n", RSTRING_PTR(iseq->name), RSTRING_PTR(iseq->filename)); + RUBY_MARK_UNLESS_NULL(iseq->mark_ary); + RUBY_MARK_UNLESS_NULL(iseq->name); + RUBY_MARK_UNLESS_NULL(iseq->filename); + RUBY_MARK_UNLESS_NULL((VALUE)iseq->cref_stack); + RUBY_MARK_UNLESS_NULL(iseq->klass); + RUBY_MARK_UNLESS_NULL(iseq->coverage); +/* RUBY_MARK_UNLESS_NULL((VALUE)iseq->node); */ +/* RUBY_MARK_UNLESS_NULL(iseq->cached_special_block); */ + RUBY_MARK_UNLESS_NULL(iseq->orig); + + if (iseq->compile_data != 0) { + RUBY_MARK_UNLESS_NULL(iseq->compile_data->mark_ary); + RUBY_MARK_UNLESS_NULL(iseq->compile_data->err_info); + RUBY_MARK_UNLESS_NULL(iseq->compile_data->catch_table_ary); + } + } + RUBY_MARK_LEAVE("iseq"); +} + +static VALUE +iseq_alloc(VALUE klass) +{ + VALUE volatile obj; + rb_iseq_t *iseq; + + obj = Data_Make_Struct(klass, rb_iseq_t, iseq_mark, iseq_free, iseq); + MEMZERO(iseq, rb_iseq_t, 1); + return obj; +} + +static void +set_relation(rb_iseq_t *iseq, const VALUE parent) +{ + const int type = iseq->type; + rb_thread_t *th = GET_THREAD(); + + /* set class nest stack */ + if (type == ISEQ_TYPE_TOP) { + /* toplevel is private */ + iseq->cref_stack = NEW_BLOCK(th->top_wrapper ? th->top_wrapper : rb_cObject); + iseq->cref_stack->nd_file = 0; + iseq->cref_stack->nd_visi = NOEX_PRIVATE; + } + else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) { + iseq->cref_stack = NEW_BLOCK(0); /* place holder */ + iseq->cref_stack->nd_file = 0; + } + else if (RTEST(parent)) { + rb_iseq_t *piseq; + GetISeqPtr(parent, piseq); + iseq->cref_stack = piseq->cref_stack; + } + + if (type == ISEQ_TYPE_TOP || + type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) { + iseq->local_iseq = iseq; + } + else if (RTEST(parent)) { + rb_iseq_t *piseq; + GetISeqPtr(parent, piseq); + iseq->local_iseq = piseq->local_iseq; + } + + if (RTEST(parent)) { + rb_iseq_t *piseq; + GetISeqPtr(parent, piseq); + iseq->parent_iseq = piseq; + } +} + +static VALUE +prepare_iseq_build(rb_iseq_t *iseq, + VALUE name, VALUE filename, VALUE line_no, + VALUE parent, VALUE type, VALUE block_opt, + const rb_compile_option_t *option) +{ + OBJ_FREEZE(name); + OBJ_FREEZE(filename); + + iseq->name = name; + iseq->filename = filename; + iseq->line_no = line_no; + iseq->defined_method_id = 0; + iseq->mark_ary = rb_ary_new(); + RBASIC(iseq->mark_ary)->klass = 0; + + iseq->type = type; + iseq->arg_rest = -1; + iseq->arg_block = -1; + iseq->klass = 0; + + /* + * iseq->special_block_builder = GC_GUARDED_PTR_REF(block_opt); + * iseq->cached_special_block_builder = 0; + * iseq->cached_special_block = 0; + */ + + iseq->compile_data = ALLOC(struct iseq_compile_data); + MEMZERO(iseq->compile_data, struct iseq_compile_data, 1); + iseq->compile_data->mark_ary = rb_ary_new(); + RBASIC(iseq->compile_data->mark_ary)->klass = 0; + + iseq->compile_data->storage_head = iseq->compile_data->storage_current = + (struct iseq_compile_data_storage *) + ALLOC_N(char, INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE + + sizeof(struct iseq_compile_data_storage)); + + iseq->compile_data->catch_table_ary = rb_ary_new(); + iseq->compile_data->storage_head->pos = 0; + iseq->compile_data->storage_head->next = 0; + iseq->compile_data->storage_head->size = + INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE; + iseq->compile_data->storage_head->buff = + (char *)(&iseq->compile_data->storage_head->buff + 1); + iseq->compile_data->option = option; + + set_relation(iseq, parent); + + iseq->coverage = Qfalse; + if (!GET_THREAD()->parse_in_eval) { + extern VALUE rb_get_coverages(void); + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages)) { + iseq->coverage = rb_hash_lookup(coverages, filename); + if (NIL_P(iseq->coverage)) iseq->coverage = Qfalse; + } + } + + return Qtrue; +} + +static VALUE +cleanup_iseq_build(rb_iseq_t *iseq) +{ + struct iseq_compile_data *data = iseq->compile_data; + VALUE err = data->err_info; + iseq->compile_data = 0; + compile_data_free(data); + + if (RTEST(err)) { + rb_funcall2(err, rb_intern("set_backtrace"), 1, &iseq->filename); + rb_exc_raise(err); + } + return Qtrue; +} + +static rb_compile_option_t COMPILE_OPTION_DEFAULT = { + OPT_INLINE_CONST_CACHE, /* int inline_const_cache; */ + OPT_PEEPHOLE_OPTIMIZATION, /* int peephole_optimization; */ + OPT_TAILCALL_OPTIMIZATION, /* int tailcall_optimization */ + OPT_SPECIALISED_INSTRUCTION, /* int specialized_instruction; */ + OPT_OPERANDS_UNIFICATION, /* int operands_unification; */ + OPT_INSTRUCTIONS_UNIFICATION, /* int instructions_unification; */ + OPT_STACK_CACHING, /* int stack_caching; */ + OPT_TRACE_INSTRUCTION, /* int trace_instruction */ +}; +static const rb_compile_option_t COMPILE_OPTION_FALSE = {0}; + +static void +make_compile_option(rb_compile_option_t *option, VALUE opt) +{ + if (opt == Qnil) { + *option = COMPILE_OPTION_DEFAULT; + } + else if (opt == Qfalse) { + *option = COMPILE_OPTION_FALSE; + } + else if (opt == Qtrue) { + memset(option, 1, sizeof(rb_compile_option_t)); + } + else if (CLASS_OF(opt) == rb_cHash) { + *option = COMPILE_OPTION_DEFAULT; + +#define SET_COMPILE_OPTION(o, h, mem) \ + { VALUE flag = rb_hash_aref(h, ID2SYM(rb_intern(#mem))); \ + if (flag == Qtrue) { o->mem = 1; } \ + else if (flag == Qfalse) { o->mem = 0; } \ + } +#define SET_COMPILE_OPTION_NUM(o, h, mem) \ + { VALUE num = rb_hash_aref(opt, ID2SYM(rb_intern(#mem))); \ + if (!NIL_P(num)) o->mem = NUM2INT(num); \ + } + SET_COMPILE_OPTION(option, opt, inline_const_cache); + SET_COMPILE_OPTION(option, opt, peephole_optimization); + SET_COMPILE_OPTION(option, opt, tailcall_optimization); + SET_COMPILE_OPTION(option, opt, specialized_instruction); + SET_COMPILE_OPTION(option, opt, operands_unification); + SET_COMPILE_OPTION(option, opt, instructions_unification); + SET_COMPILE_OPTION(option, opt, stack_caching); + SET_COMPILE_OPTION(option, opt, trace_instruction); + SET_COMPILE_OPTION_NUM(option, opt, debug_level); +#undef SET_COMPILE_OPTION +#undef SET_COMPILE_OPTION_NUM + } + else { + rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil"); + } +} + +static VALUE +make_compile_option_value(rb_compile_option_t *option) +{ + VALUE opt = rb_hash_new(); +#define SET_COMPILE_OPTION(o, h, mem) \ + rb_hash_aset(h, ID2SYM(rb_intern(#mem)), o->mem ? Qtrue : Qfalse) +#define SET_COMPILE_OPTION_NUM(o, h, mem) \ + rb_hash_aset(h, ID2SYM(rb_intern(#mem)), INT2NUM(o->mem)) + { + SET_COMPILE_OPTION(option, opt, inline_const_cache); + SET_COMPILE_OPTION(option, opt, peephole_optimization); + SET_COMPILE_OPTION(option, opt, tailcall_optimization); + SET_COMPILE_OPTION(option, opt, specialized_instruction); + SET_COMPILE_OPTION(option, opt, operands_unification); + SET_COMPILE_OPTION(option, opt, instructions_unification); + SET_COMPILE_OPTION(option, opt, stack_caching); + SET_COMPILE_OPTION_NUM(option, opt, debug_level); + } +#undef SET_COMPILE_OPTION +#undef SET_COMPILE_OPTION_NUM + return opt; +} + +VALUE +rb_iseq_new(NODE *node, VALUE name, VALUE filename, + VALUE parent, VALUE type) +{ + return rb_iseq_new_with_opt(node, name, filename, INT2FIX(0), parent, type, + &COMPILE_OPTION_DEFAULT); +} + +VALUE +rb_iseq_new_top(NODE *node, VALUE name, VALUE filename, VALUE parent) +{ + return rb_iseq_new_with_opt(node, name, filename, INT2FIX(0), parent, ISEQ_TYPE_TOP, + &COMPILE_OPTION_DEFAULT); +} + +VALUE +rb_iseq_new_main(NODE *node, VALUE filename) +{ + rb_thread_t *th = GET_THREAD(); + VALUE parent = th->base_block->iseq->self; + return rb_iseq_new_with_opt(node, rb_str_new2("
"), filename, INT2FIX(0), + parent, ISEQ_TYPE_MAIN, &COMPILE_OPTION_DEFAULT); +} + +static VALUE +rb_iseq_new_with_bopt_and_opt(NODE *node, VALUE name, VALUE filename, VALUE line_no, + VALUE parent, VALUE type, VALUE bopt, + const rb_compile_option_t *option) +{ + rb_iseq_t *iseq; + VALUE self = iseq_alloc(rb_cISeq); + + GetISeqPtr(self, iseq); + iseq->self = self; + + prepare_iseq_build(iseq, name, filename, line_no, parent, type, bopt, option); + rb_iseq_compile_node(self, node); + cleanup_iseq_build(iseq); + return self; +} + +VALUE +rb_iseq_new_with_opt(NODE *node, VALUE name, VALUE filename, VALUE line_no, + VALUE parent, VALUE type, + const rb_compile_option_t *option) +{ + return rb_iseq_new_with_bopt_and_opt(node, name, filename, line_no, parent, type, + Qfalse, option); +} + +VALUE +rb_iseq_new_with_bopt(NODE *node, VALUE name, VALUE filename, VALUE line_no, + VALUE parent, VALUE type, VALUE bopt) +{ + return rb_iseq_new_with_bopt_and_opt(node, name, filename, line_no, parent, type, + bopt, &COMPILE_OPTION_DEFAULT); +} + +#define CHECK_ARRAY(v) rb_convert_type(v, T_ARRAY, "Array", "to_ary") +#define CHECK_STRING(v) rb_convert_type(v, T_STRING, "String", "to_str") +#define CHECK_SYMBOL(v) rb_convert_type(v, T_SYMBOL, "Symbol", "to_sym") +static inline VALUE CHECK_INTEGER(VALUE v) {NUM2LONG(v); return v;} +static VALUE +iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt) +{ + VALUE iseqval = iseq_alloc(self); + + VALUE magic, version1, version2, format_type, misc; + VALUE name, filename, line_no; + VALUE type, body, locals, args, exception; + + VALUE iseq_type; + struct st_table *type_map = 0; + rb_iseq_t *iseq; + rb_compile_option_t option; + int i = 0; + + /* [magic, major_version, minor_version, format_type, misc, + * name, filename, line_no, + * type, locals, args, exception_table, body] + */ + + data = CHECK_ARRAY(data); + + magic = CHECK_STRING(rb_ary_entry(data, i++)); + version1 = CHECK_INTEGER(rb_ary_entry(data, i++)); + version2 = CHECK_INTEGER(rb_ary_entry(data, i++)); + format_type = CHECK_INTEGER(rb_ary_entry(data, i++)); + misc = rb_ary_entry(data, i++); /* TODO */ + + name = CHECK_STRING(rb_ary_entry(data, i++)); + filename = CHECK_STRING(rb_ary_entry(data, i++)); + line_no = CHECK_INTEGER(rb_ary_entry(data, i++)); + + type = CHECK_SYMBOL(rb_ary_entry(data, i++)); + locals = CHECK_ARRAY(rb_ary_entry(data, i++)); + + args = rb_ary_entry(data, i++); + if (FIXNUM_P(args) || (args = CHECK_ARRAY(args))) { + /* */ + } + + exception = CHECK_ARRAY(rb_ary_entry(data, i++)); + body = CHECK_ARRAY(rb_ary_entry(data, i++)); + + GetISeqPtr(iseqval, iseq); + iseq->self = iseqval; + + if (type_map == 0) { + type_map = st_init_numtable(); + st_insert(type_map, ID2SYM(rb_intern("top")), ISEQ_TYPE_TOP); + st_insert(type_map, ID2SYM(rb_intern("method")), ISEQ_TYPE_METHOD); + st_insert(type_map, ID2SYM(rb_intern("block")), ISEQ_TYPE_BLOCK); + st_insert(type_map, ID2SYM(rb_intern("class")), ISEQ_TYPE_CLASS); + st_insert(type_map, ID2SYM(rb_intern("rescue")), ISEQ_TYPE_RESCUE); + st_insert(type_map, ID2SYM(rb_intern("ensure")), ISEQ_TYPE_ENSURE); + st_insert(type_map, ID2SYM(rb_intern("eval")), ISEQ_TYPE_EVAL); + st_insert(type_map, ID2SYM(rb_intern("main")), ISEQ_TYPE_MAIN); + st_insert(type_map, ID2SYM(rb_intern("defined_guard")), ISEQ_TYPE_DEFINED_GUARD); + } + + if (st_lookup(type_map, type, &iseq_type) == 0) { + const char *typename = rb_id2name(type); + if (typename) + rb_raise(rb_eTypeError, "unsupport type: :%s", typename); + else + rb_raise(rb_eTypeError, "unsupport type: %p", (void *)type); + } + + if (parent == Qnil) { + parent = 0; + } + + make_compile_option(&option, opt); + prepare_iseq_build(iseq, name, filename, line_no, + parent, iseq_type, 0, &option); + + rb_iseq_build_from_ary(iseq, locals, args, exception, body); + + cleanup_iseq_build(iseq); + return iseqval; +} + +static VALUE +iseq_s_load(int argc, VALUE *argv, VALUE self) +{ + VALUE data, opt=Qnil; + rb_scan_args(argc, argv, "11", &data, &opt); + + return iseq_load(self, data, 0, opt); +} + +VALUE +ruby_iseq_load(VALUE data, VALUE parent, VALUE opt) +{ + return iseq_load(rb_cISeq, data, parent, opt); +} + +static NODE * +parse_string(VALUE str, VALUE file, VALUE line) +{ + VALUE parser = rb_parser_new(); + NODE *node = rb_parser_compile_string(parser, StringValueCStr(file), + str, NUM2INT(line)); + + if (!node) { + rb_exc_raise(GET_THREAD()->errinfo); /* TODO: check err */ + } + return node; +} + +VALUE +rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE line, VALUE opt) +{ + rb_compile_option_t option; + NODE *node = parse_string(StringValue(src), file, line); + rb_thread_t *th = GET_THREAD(); + make_compile_option(&option, opt); + + if (th->base_block && th->base_block->iseq) { + return rb_iseq_new_with_opt(node, th->base_block->iseq->name, + file, line, th->base_block->iseq->self, + ISEQ_TYPE_EVAL, &option); + } + else { + return rb_iseq_new_with_opt(node, rb_str_new2(""), file, line, Qfalse, + ISEQ_TYPE_TOP, &option); + } +} + +VALUE +rb_iseq_compile(VALUE src, VALUE file, VALUE line) +{ + return rb_iseq_compile_with_option(src, file, line, Qnil); +} + +static VALUE +iseq_s_compile(int argc, VALUE *argv, VALUE self) +{ + VALUE src, file = Qnil, line = INT2FIX(1), opt = Qnil; + + rb_secure(1); + + rb_scan_args(argc, argv, "13", &src, &file, &line, &opt); + file = file == Qnil ? rb_str_new2("") : file; + line = line == Qnil ? INT2FIX(1) : line; + + return rb_iseq_compile_with_option(src, file, line, opt); +} + +static VALUE +iseq_s_compile_file(int argc, VALUE *argv, VALUE self) +{ + VALUE file, line = INT2FIX(1), opt = Qnil; + VALUE parser; + VALUE f; + NODE *node; + const char *fname; + rb_compile_option_t option; + + rb_secure(1); + rb_scan_args(argc, argv, "11", &file, &opt); + FilePathValue(file); + fname = StringValueCStr(file); + + f = rb_file_open_str(file, "r"); + + parser = rb_parser_new(); + node = rb_parser_compile_file(parser, fname, f, NUM2INT(line)); + make_compile_option(&option, opt); + return rb_iseq_new_with_opt(node, rb_str_new2("
"), file, line, Qfalse, + ISEQ_TYPE_TOP, &option); +} + +static VALUE +iseq_s_compile_option_set(VALUE self, VALUE opt) +{ + rb_compile_option_t option; + rb_secure(1); + make_compile_option(&option, opt); + COMPILE_OPTION_DEFAULT = option; + return opt; +} + +static VALUE +iseq_s_compile_option_get(VALUE self) +{ + return make_compile_option_value(&COMPILE_OPTION_DEFAULT); +} + +static rb_iseq_t * +iseq_check(VALUE val) +{ + rb_iseq_t *iseq; + GetISeqPtr(val, iseq); + if (!iseq->name) { + rb_raise(rb_eTypeError, "uninitialized InstructionSequence"); + } + return iseq; +} + +static VALUE +iseq_eval(VALUE self) +{ + rb_secure(1); + return rb_iseq_eval(self); +} + +static VALUE +iseq_inspect(VALUE self) +{ + rb_iseq_t *iseq; + GetISeqPtr(self, iseq); + if (!iseq->name) { + return rb_sprintf("#<%s: uninitialized>", rb_obj_classname(self)); + } + + return rb_sprintf("<%s:%s@%s>", + rb_obj_classname(self), + RSTRING_PTR(iseq->name), RSTRING_PTR(iseq->filename)); +} + +static +VALUE iseq_data_to_ary(rb_iseq_t *iseq); + +static VALUE +iseq_to_a(VALUE self) +{ + rb_iseq_t *iseq = iseq_check(self); + rb_secure(1); + return iseq_data_to_ary(iseq); +} + +int +rb_iseq_first_lineno(rb_iseq_t *iseq) +{ + return FIX2INT(iseq->line_no); +} + +/* TODO: search algorithm is brute force. + this should be binary search or so. */ + +static struct iseq_insn_info_entry * +get_insn_info(const rb_iseq_t *iseq, const unsigned long pos) +{ + unsigned long i, size = iseq->insn_info_size; + struct iseq_insn_info_entry *table = iseq->insn_info_table; + + for (i = 0; i < size; i++) { + if (table[i].position == pos) { + return &table[i]; + } + } + + return 0; +} + +static unsigned short +find_line_no(rb_iseq_t *iseq, unsigned long pos) +{ + struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos); + if (entry) { + return entry->line_no; + } + else { + return 0; + } +} + +static unsigned short +find_prev_line_no(rb_iseq_t *iseqdat, unsigned long pos) +{ + unsigned long i, size = iseqdat->insn_info_size; + struct iseq_insn_info_entry *iiary = iseqdat->insn_info_table; + + for (i = 0; i < size; i++) { + if (iiary[i].position == pos) { + if (i > 0) { + return iiary[i - 1].line_no; + } + else { + return 0; + } + } + } + + return 0; +} + +static VALUE +insn_operand_intern(rb_iseq_t *iseq, + int insn, int op_no, VALUE op, + int len, int pos, VALUE *pnop, VALUE child) +{ + const char *types = insn_op_types(insn); + char type = types[op_no]; + VALUE ret; + + switch (type) { + case TS_OFFSET: /* LONG */ + ret = rb_sprintf("%ld", pos + len + op); + break; + + case TS_NUM: /* ULONG */ + ret = rb_sprintf("%lu", op); + break; + + case TS_LINDEX: + { + rb_iseq_t *ip = iseq->local_iseq; + int lidx = ip->local_size - op; + const char *name = rb_id2name(ip->local_table[lidx]); + + if (name) { + ret = rb_str_new2(name); + } + else { + ret = rb_str_new2("*"); + } + break; + } + case TS_DINDEX:{ + if (insn == BIN(getdynamic) || insn == BIN(setdynamic)) { + rb_iseq_t *ip = iseq; + int level = *pnop, i; + const char *name; + for (i = 0; i < level; i++) { + ip = ip->parent_iseq; + } + name = rb_id2name(ip->local_table[ip->local_size - op]); + + if (!name) { + name = "*"; + } + ret = rb_str_new2(name); + } + else { + ret = rb_inspect(INT2FIX(op)); + } + break; + } + case TS_ID: /* ID (symbol) */ + op = ID2SYM(op); + + case TS_VALUE: /* VALUE */ + if (hidden_obj_p(op)) { + switch (BUILTIN_TYPE(op)) { + case T_STRING: + op = rb_str_replace(rb_str_new(0, 0), op); + break; + case T_ARRAY: + op = rb_ary_replace(rb_ary_new2(0), op); + break; + } + } + ret = rb_inspect(op); + if (CLASS_OF(op) == rb_cISeq) { + rb_ary_push(child, op); + } + break; + + case TS_ISEQ: /* iseq */ + { + rb_iseq_t *iseq = (rb_iseq_t *)op; + if (iseq) { + ret = iseq->name; + if (child) { + rb_ary_push(child, iseq->self); + } + } + else { + ret = rb_str_new2("nil"); + } + break; + } + case TS_GENTRY: + { + struct global_entry *entry = (struct global_entry *)op; + ret = rb_str_dup(rb_id2str(entry->id)); + } + break; + + case TS_IC: + ret = rb_str_new2(""); + break; + + case TS_CDHASH: + ret = rb_str_new2(""); + break; + + case TS_FUNCPTR: + ret = rb_str_new2(""); + break; + + default: + rb_bug("rb_iseq_disasm: unknown operand type: %c", type); + } + return ret; +} + +/** + * Disassemble a instruction + * Iseq -> Iseq inspect object + */ +VALUE +rb_iseq_disasm_insn(VALUE ret, VALUE *iseq, int pos, + rb_iseq_t *iseqdat, VALUE child) +{ + int insn = iseq[pos]; + int len = insn_len(insn); + int j; + const char *types = insn_op_types(insn); + VALUE str = rb_str_new(0, 0); + const char *insn_name_buff; + + insn_name_buff = insn_name(insn); + if (1) { + rb_str_catf(str, "%04d %-16s ", pos, insn_name_buff); + } + else { + rb_str_catf(str, "%04d %-16.*s ", pos, + (int)strcspn(insn_name_buff, "_"), insn_name_buff); + } + + for (j = 0; types[j]; j++) { + const char *types = insn_op_types(insn); + VALUE opstr = insn_operand_intern(iseqdat, insn, j, iseq[pos + j + 1], + len, pos, &iseq[pos + j + 2], + child); + rb_str_concat(str, opstr); + + if (types[j + 1]) { + rb_str_cat2(str, ", "); + } + } + + if (1) { + int line_no = find_line_no(iseqdat, pos); + int prev = find_prev_line_no(iseqdat, pos); + if (line_no && line_no != prev) { + long slen = RSTRING_LEN(str); + slen = (slen > 70) ? 0 : (70 - slen); + str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no); + } + } + else { + /* for debug */ + struct iseq_insn_info_entry *entry = get_insn_info(iseqdat, pos); + long slen = RSTRING_LEN(str); + slen = (slen > 60) ? 0 : (60 - slen); + str = rb_str_catf(str, "%*s(line: %d, sp: %d)", + (int)slen, "", entry->line_no, entry->sp); + } + + if (ret) { + rb_str_cat2(str, "\n"); + rb_str_concat(ret, str); + } + else { + printf("%s\n", RSTRING_PTR(str)); + } + return len; +} + +static const char * +catch_type(int type) +{ + switch (type) { + case CATCH_TYPE_RESCUE: + return "rescue"; + case CATCH_TYPE_ENSURE: + return "ensure"; + case CATCH_TYPE_RETRY: + return "retry"; + case CATCH_TYPE_BREAK: + return "break"; + case CATCH_TYPE_REDO: + return "redo"; + case CATCH_TYPE_NEXT: + return "next"; + default: + rb_bug("unknown catch type (%d)", type); + return 0; + } +} + +VALUE +rb_iseq_disasm(VALUE self) +{ + rb_iseq_t *iseqdat = iseq_check(self); + VALUE *iseq; + VALUE str = rb_str_new(0, 0); + VALUE child = rb_ary_new(); + unsigned long size; + int i; + long l; + ID *tbl; + enum {header_minlen = 72}; + + rb_secure(1); + + iseq = iseqdat->iseq; + size = iseqdat->iseq_size; + + rb_str_cat2(str, "== disasm: "); + + rb_str_concat(str, iseq_inspect(iseqdat->self)); + if ((l = RSTRING_LEN(str)) < header_minlen) { + rb_str_resize(str, header_minlen); + memset(RSTRING_PTR(str) + l, '=', header_minlen - l); + } + rb_str_cat2(str, "\n"); + + /* show catch table information */ + if (iseqdat->catch_table_size != 0) { + rb_str_cat2(str, "== catch table\n"); + } + for (i = 0; i < iseqdat->catch_table_size; i++) { + struct iseq_catch_table_entry *entry = &iseqdat->catch_table[i]; + rb_str_catf(str, + "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n", + catch_type((int)entry->type), (int)entry->start, + (int)entry->end, (int)entry->sp, (int)entry->cont); + if (entry->iseq) { + rb_str_concat(str, rb_iseq_disasm(entry->iseq)); + } + } + if (iseqdat->catch_table_size != 0) { + rb_str_cat2(str, "|-------------------------------------" + "-----------------------------------\n"); + } + + /* show local table information */ + tbl = iseqdat->local_table; + + if (tbl) { + rb_str_catf(str, + "local table (size: %d, argc: %d " + "[opts: %d, rest: %d, post: %d, block: %d] s%d)\n", + iseqdat->local_size, iseqdat->argc, + iseqdat->arg_opts, iseqdat->arg_rest, + iseqdat->arg_post_len, iseqdat->arg_block, + iseqdat->arg_simple); + + for (i = 0; i < iseqdat->local_table_size; i++) { + const char *name = rb_id2name(tbl[i]); + char info[0x100]; + char argi[0x100] = ""; + char opti[0x100] = ""; + + if (iseqdat->arg_opts) { + int argc = iseqdat->argc; + int opts = iseqdat->arg_opts; + if (i >= argc && i < argc + opts - 1) { + snprintf(opti, sizeof(opti), "Opt=%ld", + iseqdat->arg_opt_table[i - argc]); + } + } + + snprintf(argi, sizeof(argi), "%s%s%s%s%s", /* arg, opts, rest, post block */ + iseqdat->argc > i ? "Arg" : "", + opti, + iseqdat->arg_rest == i ? "Rest" : "", + (iseqdat->arg_post_start <= i && + i < iseqdat->arg_post_start + iseqdat->arg_post_len) ? "Post" : "", + iseqdat->arg_block == i ? "Block" : ""); + + snprintf(info, sizeof(info), "%s%s%s%s", name ? name : "?", + *argi ? "<" : "", argi, *argi ? ">" : ""); + + rb_str_catf(str, "[%2d] %-11s", iseqdat->local_size - i, info); + } + rb_str_cat2(str, "\n"); + } + + /* show each line */ + for (i = 0; i < size;) { + i += rb_iseq_disasm_insn(str, iseq, i, iseqdat, child); + } + + for (i = 0; i < RARRAY_LEN(child); i++) { + VALUE isv = rb_ary_entry(child, i); + rb_str_concat(str, rb_iseq_disasm(isv)); + } + + return str; +} + +static VALUE +iseq_s_disasm(VALUE klass, VALUE body) +{ + extern NODE *rb_method_body(VALUE body); + NODE *node; + VALUE ret = Qnil; + + rb_secure(1); + + if ((node = rb_method_body(body)) != 0) { + if (nd_type(node) == RUBY_VM_METHOD_NODE) { + VALUE iseqval = (VALUE)node->nd_body; + ret = rb_iseq_disasm(iseqval); + } + } + + return ret; +} + +const char * +ruby_node_name(int node) +{ + switch (node) { +#include "node_name.inc" + default: + rb_bug("unknown node (%d)", node); + return 0; + } +} + +#define DECL_SYMBOL(name) \ + static VALUE sym_##name + +#define INIT_SYMBOL(name) \ + sym_##name = ID2SYM(rb_intern(#name)) + +static VALUE +register_label(struct st_table *table, int idx) +{ + VALUE sym; + char buff[8 + (sizeof(idx) * CHAR_BIT * 32 / 100)]; + + snprintf(buff, sizeof(buff), "label_%u", idx); + sym = ID2SYM(rb_intern(buff)); + st_insert(table, idx, sym); + return sym; +} + +static VALUE +exception_type2symbol(VALUE type) +{ + ID id; + switch(type) { + case CATCH_TYPE_RESCUE: CONST_ID(id, "rescue"); break; + case CATCH_TYPE_ENSURE: CONST_ID(id, "ensure"); break; + case CATCH_TYPE_RETRY: CONST_ID(id, "retry"); break; + case CATCH_TYPE_BREAK: CONST_ID(id, "break"); break; + case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break; + case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break; + default: + rb_bug("..."); + } + return ID2SYM(id); +} + +static int +cdhash_each(VALUE key, VALUE value, VALUE ary) +{ + rb_ary_push(ary, key); + rb_ary_push(ary, value); + return ST_CONTINUE; +} + +static VALUE +iseq_data_to_ary(rb_iseq_t *iseq) +{ + int i, pos, line = 0; + VALUE *seq; + + VALUE val = rb_ary_new(); + VALUE type; /* Symbol */ + VALUE locals = rb_ary_new(); + VALUE args = rb_ary_new(); + VALUE body = rb_ary_new(); /* [[:insn1, ...], ...] */ + VALUE nbody; + VALUE exception = rb_ary_new(); /* [[....]] */ + VALUE misc = rb_hash_new(); + + static VALUE insn_syms[VM_INSTRUCTION_SIZE]; + struct st_table *labels_table = st_init_numtable(); + + DECL_SYMBOL(top); + DECL_SYMBOL(method); + DECL_SYMBOL(block); + DECL_SYMBOL(class); + DECL_SYMBOL(rescue); + DECL_SYMBOL(ensure); + DECL_SYMBOL(eval); + DECL_SYMBOL(main); + DECL_SYMBOL(defined_guard); + + if (sym_top == 0) { + int i; + for (i=0; itype) { + case ISEQ_TYPE_TOP: type = sym_top; break; + case ISEQ_TYPE_METHOD: type = sym_method; break; + case ISEQ_TYPE_BLOCK: type = sym_block; break; + case ISEQ_TYPE_CLASS: type = sym_class; break; + case ISEQ_TYPE_RESCUE: type = sym_rescue; break; + case ISEQ_TYPE_ENSURE: type = sym_ensure; break; + case ISEQ_TYPE_EVAL: type = sym_eval; break; + case ISEQ_TYPE_MAIN: type = sym_main; break; + case ISEQ_TYPE_DEFINED_GUARD: type = sym_defined_guard; break; + default: rb_bug("unsupported iseq type"); + }; + + /* locals */ + for (i=0; ilocal_table_size; i++) { + ID lid = iseq->local_table[i]; + if (lid) { + if (rb_id2str(lid)) rb_ary_push(locals, ID2SYM(lid)); + } + else { + rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest"))); + } + } + + /* args */ + { + /* + * [argc, # argc + * [label1, label2, ...] # opts + * rest index, + * post_len + * post_start + * block index, + * simple, + * ] + */ + VALUE arg_opt_labels = rb_ary_new(); + int j; + + for (j=0; jarg_opts; j++) { + rb_ary_push(arg_opt_labels, + register_label(labels_table, iseq->arg_opt_table[j])); + } + + /* commit */ + if (iseq->arg_simple == 1) { + args = INT2FIX(iseq->argc); + } + else { + rb_ary_push(args, INT2FIX(iseq->argc)); + rb_ary_push(args, arg_opt_labels); + rb_ary_push(args, INT2FIX(iseq->arg_post_len)); + rb_ary_push(args, INT2FIX(iseq->arg_post_start)); + rb_ary_push(args, INT2FIX(iseq->arg_rest)); + rb_ary_push(args, INT2FIX(iseq->arg_block)); + rb_ary_push(args, INT2FIX(iseq->arg_simple)); + } + } + + /* body */ + for (seq = iseq->iseq; seq < iseq->iseq + iseq->iseq_size; ) { + VALUE insn = *seq++; + int j, len = insn_len(insn); + VALUE *nseq = seq + len - 1; + VALUE ary = rb_ary_new2(len); + + rb_ary_push(ary, insn_syms[insn]); + for (j=0; jiseq + *seq; + rb_ary_push(ary, register_label(labels_table, idx)); + break; + } + case TS_LINDEX: + case TS_DINDEX: + case TS_NUM: + rb_ary_push(ary, INT2FIX(*seq)); + break; + case TS_VALUE: + rb_ary_push(ary, *seq); + break; + case TS_ISEQ: + { + rb_iseq_t *iseq = (rb_iseq_t *)*seq; + if (iseq) { + VALUE val = iseq_data_to_ary(iseq); + rb_ary_push(ary, val); + } + else { + rb_ary_push(ary, Qnil); + } + } + break; + case TS_GENTRY: + { + struct global_entry *entry = (struct global_entry *)*seq; + rb_ary_push(ary, ID2SYM(entry->id)); + } + break; + case TS_IC: + rb_ary_push(ary, Qnil); + break; + case TS_ID: + rb_ary_push(ary, ID2SYM(*seq)); + break; + case TS_CDHASH: + { + VALUE hash = *seq; + VALUE val = rb_ary_new(); + int i; + + rb_hash_foreach(hash, cdhash_each, val); + + for (i=0; iiseq + pos; + + rb_ary_store(val, i+1, + register_label(labels_table, idx)); + } + rb_ary_push(ary, val); + } + break; + default: + rb_bug("unknown operand: %c", insn_op_type(insn, j)); + } + } + rb_ary_push(body, ary); + } + + nbody = body; + + /* exception */ + for (i=0; icatch_table_size; i++) { + VALUE ary = rb_ary_new(); + struct iseq_catch_table_entry *entry = &iseq->catch_table[i]; + rb_ary_push(ary, exception_type2symbol(entry->type)); + if (entry->iseq) { + rb_iseq_t *eiseq; + GetISeqPtr(entry->iseq, eiseq); + rb_ary_push(ary, iseq_data_to_ary(eiseq)); + } + else { + rb_ary_push(ary, Qnil); + } + rb_ary_push(ary, register_label(labels_table, entry->start)); + rb_ary_push(ary, register_label(labels_table, entry->end)); + rb_ary_push(ary, register_label(labels_table, entry->cont)); + rb_ary_push(ary, INT2FIX(entry->sp)); + rb_ary_push(exception, ary); + } + + /* make body with labels and insert line number */ + body = rb_ary_new(); + + for (i=0, pos=0; iinsn_info_table[i].line_no != line) { + line = iseq->insn_info_table[i].line_no; + rb_ary_push(body, INT2FIX(line)); + } + + rb_ary_push(body, ary); + pos += RARRAY_LEN(ary); + } + + st_free_table(labels_table); + + rb_hash_aset(misc, ID2SYM(rb_intern("arg_size")), INT2FIX(iseq->arg_size)); + rb_hash_aset(misc, ID2SYM(rb_intern("local_size")), INT2FIX(iseq->local_size)); + rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq->stack_max)); + + /* + * [:magic, :major_version, :minor_version, :format_type, :misc, + * :name, :filename, :line_no, :type, :locals, :args, + * :catch_table, :bytecode] + */ + rb_ary_push(val, rb_str_new2("YARVInstructionSequence/SimpleDataFormat")); + rb_ary_push(val, INT2FIX(1)); + rb_ary_push(val, INT2FIX(1)); + rb_ary_push(val, INT2FIX(1)); + rb_ary_push(val, misc); + rb_ary_push(val, iseq->name); + rb_ary_push(val, iseq->filename); + rb_ary_push(val, iseq->line_no); + rb_ary_push(val, type); + rb_ary_push(val, locals); + rb_ary_push(val, args); + rb_ary_push(val, exception); + rb_ary_push(val, body); + return val; +} + +VALUE +rb_iseq_clone(VALUE iseqval, VALUE newcbase) +{ + VALUE newiseq = iseq_alloc(rb_cISeq); + rb_iseq_t *iseq0, *iseq1; + + GetISeqPtr(iseqval, iseq0); + GetISeqPtr(newiseq, iseq1); + + *iseq1 = *iseq0; + iseq1->self = newiseq; + if (!iseq1->orig) { + iseq1->orig = iseqval; + } + if (newcbase) { + iseq1->cref_stack = NEW_BLOCK(newcbase); + if (iseq0->cref_stack->nd_next) { + iseq1->cref_stack->nd_next = iseq0->cref_stack->nd_next; + } + } + + return newiseq; +} + +/* ruby2cext */ + +VALUE +rb_iseq_build_for_ruby2cext( + const rb_iseq_t *iseq_template, + const rb_insn_func_t *func, + const struct iseq_insn_info_entry *insn_info_table, + const char **local_table, + const VALUE *arg_opt_table, + const struct iseq_catch_table_entry *catch_table, + const char *name, + const char *filename, + const unsigned short line_no) +{ + int i; + VALUE iseqval = iseq_alloc(rb_cISeq); + rb_iseq_t *iseq; + GetISeqPtr(iseqval, iseq); + + /* copy iseq */ + *iseq = *iseq_template; + iseq->name = rb_str_new2(name); + iseq->filename = rb_str_new2(filename); + iseq->mark_ary = rb_ary_new(); + iseq->line_no = line_no; + iseq->self = iseqval; + + iseq->iseq = ALLOC_N(VALUE, iseq->iseq_size); + + for (i=0; iiseq_size; i+=2) { + iseq->iseq[i] = BIN(opt_call_c_function); + iseq->iseq[i+1] = (VALUE)func; + } + + rb_iseq_translate_threaded_code(iseq); + +#define ALLOC_AND_COPY(dst, src, type, size) do { \ + if (size) { \ + (dst) = ALLOC_N(type, (size)); \ + MEMCPY((dst), (src), type, (size)); \ + } \ +} while (0) + + ALLOC_AND_COPY(iseq->insn_info_table, insn_info_table, + struct iseq_insn_info_entry, iseq->insn_info_size); + + ALLOC_AND_COPY(iseq->catch_table, catch_table, + struct iseq_catch_table_entry, iseq->catch_table_size); + + ALLOC_AND_COPY(iseq->arg_opt_table, arg_opt_table, + VALUE, iseq->arg_opts); + + set_relation(iseq, 0); + + return iseqval; +} + +void +Init_ISeq(void) +{ + /* declare ::VM::InstructionSequence */ + rb_cISeq = rb_define_class_under(rb_cRubyVM, "InstructionSequence", rb_cObject); + rb_define_alloc_func(rb_cISeq, iseq_alloc); + rb_define_method(rb_cISeq, "inspect", iseq_inspect, 0); + rb_define_method(rb_cISeq, "disasm", rb_iseq_disasm, 0); + rb_define_method(rb_cISeq, "disassemble", rb_iseq_disasm, 0); + rb_define_method(rb_cISeq, "to_a", iseq_to_a, 0); + rb_define_method(rb_cISeq, "eval", iseq_eval, 0); + + /* disable this feature because there is no verifier. */ + /* rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1); */ + (void)iseq_s_load; + + rb_define_singleton_method(rb_cISeq, "compile", iseq_s_compile, -1); + rb_define_singleton_method(rb_cISeq, "new", iseq_s_compile, -1); + rb_define_singleton_method(rb_cISeq, "compile_file", iseq_s_compile_file, -1); + rb_define_singleton_method(rb_cISeq, "compile_option", iseq_s_compile_option_get, 0); + rb_define_singleton_method(rb_cISeq, "compile_option=", iseq_s_compile_option_set, 1); + rb_define_singleton_method(rb_cISeq, "disasm", iseq_s_disasm, 1); + rb_define_singleton_method(rb_cISeq, "disassemble", iseq_s_disasm, 1); +} + diff --git a/iseq.h b/iseq.h new file mode 100644 index 0000000..952583a --- /dev/null +++ b/iseq.h @@ -0,0 +1,103 @@ +/********************************************************************** + + iseq.h - + + $Author: yugui $ + created at: 04/01/01 23:36:57 JST + + Copyright (C) 2004-2008 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_COMPILE_H +#define RUBY_COMPILE_H + +/* compile.c */ +VALUE rb_iseq_compile_node(VALUE self, NODE *node); +int rb_iseq_translate_threaded_code(rb_iseq_t *iseq); +VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE locals, VALUE args, + VALUE exception, VALUE body); + +/* iseq.c */ +VALUE ruby_iseq_load(VALUE data, VALUE parent, VALUE opt); +struct st_table *ruby_insn_make_insn_table(void); + +#define ISEQ_TYPE_TOP INT2FIX(1) +#define ISEQ_TYPE_METHOD INT2FIX(2) +#define ISEQ_TYPE_BLOCK INT2FIX(3) +#define ISEQ_TYPE_CLASS INT2FIX(4) +#define ISEQ_TYPE_RESCUE INT2FIX(5) +#define ISEQ_TYPE_ENSURE INT2FIX(6) +#define ISEQ_TYPE_EVAL INT2FIX(7) +#define ISEQ_TYPE_MAIN INT2FIX(8) +#define ISEQ_TYPE_DEFINED_GUARD INT2FIX(9) + +#define CATCH_TYPE_RESCUE INT2FIX(1) +#define CATCH_TYPE_ENSURE INT2FIX(2) +#define CATCH_TYPE_RETRY INT2FIX(3) +#define CATCH_TYPE_BREAK INT2FIX(4) +#define CATCH_TYPE_REDO INT2FIX(5) +#define CATCH_TYPE_NEXT INT2FIX(6) + +struct iseq_insn_info_entry { + unsigned short position; + unsigned short line_no; + unsigned short sp; +}; + +struct iseq_catch_table_entry { + VALUE type; + VALUE iseq; + unsigned long start; + unsigned long end; + unsigned long cont; + unsigned long sp; +}; + +#define INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE (512) + +struct iseq_compile_data_storage { + struct iseq_compile_data_storage *next; + unsigned long pos; + unsigned long size; + char *buff; +}; + +struct iseq_compile_data { + /* GC is needed */ + VALUE err_info; + VALUE mark_ary; + VALUE catch_table_ary; /* Array */ + + /* GC is not needed */ + struct iseq_label_data *start_label; + struct iseq_label_data *end_label; + struct iseq_label_data *redo_label; + VALUE current_block; + VALUE loopval_popped; /* used by NODE_BREAK */ + VALUE ensure_node; + VALUE for_iseq; + struct iseq_compile_data_ensure_node_stack *ensure_node_stack; + int cached_const; + struct iseq_compile_data_storage *storage_head; + struct iseq_compile_data_storage *storage_current; + int last_line; + int flip_cnt; + int label_no; + int node_level; + const rb_compile_option_t *option; +}; + +/* defined? */ +#define DEFINED_IVAR INT2FIX(1) +#define DEFINED_IVAR2 INT2FIX(2) +#define DEFINED_GVAR INT2FIX(3) +#define DEFINED_CVAR INT2FIX(4) +#define DEFINED_CONST INT2FIX(5) +#define DEFINED_METHOD INT2FIX(6) +#define DEFINED_YIELD INT2FIX(7) +#define DEFINED_REF INT2FIX(8) +#define DEFINED_ZSUPER INT2FIX(9) +#define DEFINED_FUNC INT2FIX(10) + +#endif /* RUBY_COMPILE_H */ diff --git a/lex.c.blt b/lex.c.blt new file mode 100644 index 0000000..ea20686 --- /dev/null +++ b/lex.c.blt @@ -0,0 +1,217 @@ +/* C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -C -p -j1 -i 1 -g -o -t -N rb_reserved_word -k'1,3,$' keywords */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "keywords" + +struct kwtable {const char *name; int id[2]; enum lex_state_e state;}; +const struct kwtable *rb_reserved_word(const char *, unsigned int); +#ifndef RIPPER +#line 7 "keywords" +struct kwtable; + +#define TOTAL_KEYWORDS 41 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 12 +#define MIN_HASH_VALUE 8 +#define MAX_HASH_VALUE 50 +/* maximum key range = 43, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (str, len) + register const char *str; + register unsigned int len; +{ + static const unsigned char asso_values[] = + { + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 26, 51, 51, 14, 51, 16, 8, + 11, 13, 51, 51, 51, 51, 10, 51, 13, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 11, 51, 13, 1, 26, + 4, 1, 8, 28, 51, 23, 51, 1, 1, 27, + 5, 19, 21, 51, 8, 3, 3, 11, 51, 21, + 24, 16, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[2]]; + /*FALLTHROUGH*/ + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct kwtable * +rb_reserved_word (str, len) + register const char *str; + register unsigned int len; +{ + static const struct kwtable wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 17 "keywords" + {"break", {keyword_break, keyword_break}, EXPR_MID}, +#line 23 "keywords" + {"else", {keyword_else, keyword_else}, EXPR_BEG}, +#line 33 "keywords" + {"nil", {keyword_nil, keyword_nil}, EXPR_END}, +#line 26 "keywords" + {"ensure", {keyword_ensure, keyword_ensure}, EXPR_BEG}, +#line 25 "keywords" + {"end", {keyword_end, keyword_end}, EXPR_END}, +#line 42 "keywords" + {"then", {keyword_then, keyword_then}, EXPR_BEG}, +#line 34 "keywords" + {"not", {keyword_not, keyword_not}, EXPR_ARG}, +#line 27 "keywords" + {"false", {keyword_false, keyword_false}, EXPR_END}, +#line 40 "keywords" + {"self", {keyword_self, keyword_self}, EXPR_END}, +#line 24 "keywords" + {"elsif", {keyword_elsif, keyword_elsif}, EXPR_VALUE}, +#line 37 "keywords" + {"rescue", {keyword_rescue, modifier_rescue}, EXPR_MID}, +#line 43 "keywords" + {"true", {keyword_true, keyword_true}, EXPR_END}, +#line 46 "keywords" + {"until", {keyword_until, modifier_until}, EXPR_VALUE}, +#line 45 "keywords" + {"unless", {keyword_unless, modifier_unless}, EXPR_VALUE}, +#line 39 "keywords" + {"return", {keyword_return, keyword_return}, EXPR_MID}, +#line 20 "keywords" + {"def", {keyword_def, keyword_def}, EXPR_FNAME}, +#line 15 "keywords" + {"and", {keyword_and, keyword_and}, EXPR_VALUE}, +#line 22 "keywords" + {"do", {keyword_do, keyword_do}, EXPR_BEG}, +#line 49 "keywords" + {"yield", {keyword_yield, keyword_yield}, EXPR_ARG}, +#line 28 "keywords" + {"for", {keyword_for, keyword_for}, EXPR_VALUE}, +#line 44 "keywords" + {"undef", {keyword_undef, keyword_undef}, EXPR_FNAME}, +#line 35 "keywords" + {"or", {keyword_or, keyword_or}, EXPR_VALUE}, +#line 30 "keywords" + {"in", {keyword_in, keyword_in}, EXPR_VALUE}, +#line 47 "keywords" + {"when", {keyword_when, keyword_when}, EXPR_VALUE}, +#line 38 "keywords" + {"retry", {keyword_retry, keyword_retry}, EXPR_END}, +#line 29 "keywords" + {"if", {keyword_if, modifier_if}, EXPR_VALUE}, +#line 18 "keywords" + {"case", {keyword_case, keyword_case}, EXPR_VALUE}, +#line 36 "keywords" + {"redo", {keyword_redo, keyword_redo}, EXPR_END}, +#line 32 "keywords" + {"next", {keyword_next, keyword_next}, EXPR_MID}, +#line 41 "keywords" + {"super", {keyword_super, keyword_super}, EXPR_ARG}, +#line 31 "keywords" + {"module", {keyword_module, keyword_module}, EXPR_VALUE}, +#line 16 "keywords" + {"begin", {keyword_begin, keyword_begin}, EXPR_BEG}, +#line 10 "keywords" + {"__LINE__", {keyword__LINE__, keyword__LINE__}, EXPR_END}, +#line 11 "keywords" + {"__FILE__", {keyword__FILE__, keyword__FILE__}, EXPR_END}, +#line 9 "keywords" + {"__ENCODING__", {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END}, +#line 13 "keywords" + {"END", {keyword_END, keyword_END}, EXPR_END}, +#line 14 "keywords" + {"alias", {keyword_alias, keyword_alias}, EXPR_FNAME}, +#line 12 "keywords" + {"BEGIN", {keyword_BEGIN, keyword_BEGIN}, EXPR_END}, +#line 21 "keywords" + {"defined?", {keyword_defined, keyword_defined}, EXPR_ARG}, +#line 19 "keywords" + {"class", {keyword_class, keyword_class}, EXPR_CLASS}, + {""}, {""}, +#line 48 "keywords" + {"while", {keyword_while, modifier_while}, EXPR_VALUE} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} +#line 50 "keywords" + +#endif diff --git a/lib/English.rb b/lib/English.rb new file mode 100644 index 0000000..1a0e11d --- /dev/null +++ b/lib/English.rb @@ -0,0 +1,155 @@ +# Include the English library file in a Ruby script, and you can +# reference the global variables such as \VAR{\$\_} using less +# cryptic names, listed in the following table.% \vref{tab:english}. +# +# Without 'English': +# +# $\ = ' -- ' +# "waterbuffalo" =~ /buff/ +# print $", $', $$, "\n" +# +# With English: +# +# require "English" +# +# $OUTPUT_FIELD_SEPARATOR = ' -- ' +# "waterbuffalo" =~ /buff/ +# print $LOADED_FEATURES, $POSTMATCH, $PID, "\n" + + +# The exception object passed to +raise+. +alias $ERROR_INFO $! + +# The stack backtrace generated by the last +# exception. See Kernel.caller for details. Thread local. +alias $ERROR_POSITION $@ + +# The default separator pattern used by String.split. May be +# set from the command line using the -F flag. +alias $FS $; + +# The default separator pattern used by String.split. May be +# set from the command line using the -F flag. +alias $FIELD_SEPARATOR $; + +# The separator string output between the parameters to methods such +# as Kernel.print and Array.join. Defaults to +nil+, +# which adds no text. +alias $OFS $, + +# The separator string output between the parameters to methods such +# as Kernel.print and Array.join. Defaults to +nil+, +# which adds no text. +alias $OUTPUT_FIELD_SEPARATOR $, + +# The input record separator (newline by default). This is the value +# that routines such as Kernel.gets use to determine record +# boundaries. If set to +nil+, +gets+ will read the entire file. +alias $RS $/ + +# The input record separator (newline by default). This is the value +# that routines such as Kernel.gets use to determine record +# boundaries. If set to +nil+, +gets+ will read the entire file. +alias $INPUT_RECORD_SEPARATOR $/ + +# The string appended to the output of every call to methods such as +# Kernel.print and IO.write. The default value is +# +nil+. +alias $ORS $\ + +# The string appended to the output of every call to methods such as +# Kernel.print and IO.write. The default value is +# +nil+. +alias $OUTPUT_RECORD_SEPARATOR $\ + +# The number of the last line read from the current input file. +alias $INPUT_LINE_NUMBER $. + +# The number of the last line read from the current input file. +alias $NR $. + +# The last line read by Kernel.gets or +# Kernel.readline. Many string-related functions in the +# +Kernel+ module operate on $_ by default. The variable is +# local to the current scope. Thread local. +alias $LAST_READ_LINE $_ + +# The destination of output for Kernel.print +# and Kernel.printf. The default value is +# $stdout. +alias $DEFAULT_OUTPUT $> + +# An object that provides access to the concatenation +# of the contents of all the files +# given as command-line arguments, or $stdin +# (in the case where there are no +# arguments). $< supports methods similar to a +# +File+ object: +# +inmode+, +close+, +# closed?, +each+, +# each_byte, each_line, +# +eof+, eof?, +file+, +# +filename+, +fileno+, +# +getc+, +gets+, +lineno+, +# lineno=, +path+, +# +pos+, pos=, +# +read+, +readchar+, +# +readline+, +readlines+, +# +rewind+, +seek+, +skip+, +# +tell+, to_a, to_i, +# to_io, to_s, along with the +# methods in +Enumerable+. The method +file+ +# returns a +File+ object for the file currently +# being read. This may change as $< reads +# through the files on the command line. Read only. +alias $DEFAULT_INPUT $< + +# The process number of the program being executed. Read only. +alias $PID $$ + +# The process number of the program being executed. Read only. +alias $PROCESS_ID $$ + +# The exit status of the last child process to terminate. Read +# only. Thread local. +alias $CHILD_STATUS $? + +# A +MatchData+ object that encapsulates the results of a successful +# pattern match. The variables $&, $`, $', +# and $1 to $9 are all derived from +# $~. Assigning to $~ changes the values of these +# derived variables. This variable is local to the current +# scope. Thread local. +alias $LAST_MATCH_INFO $~ + +# If set to any value apart from +nil+ or +false+, all pattern matches +# will be case insensitive, string comparisons will ignore case, and +# string hash values will be case insensitive. Deprecated +alias $IGNORECASE $= + +# An array of strings containing the command-line +# options from the invocation of the program. Options +# used by the Ruby interpreter will have been +# removed. Read only. Also known simply as +ARGV+. +alias $ARGV $* + +# The string matched by the last successful pattern +# match. This variable is local to the current +# scope. Read only. Thread local. +alias $MATCH $& + +# The string preceding the match in the last +# successful pattern match. This variable is local to +# the current scope. Read only. Thread local. +alias $PREMATCH $` + +# The string following the match in the last +# successful pattern match. This variable is local to +# the current scope. Read only. Thread local. +alias $POSTMATCH $' + +# The contents of the highest-numbered group matched in the last +# successful pattern match. Thus, in "cat" =~ /(c|a)(t|z)/, +# $+ will be set to "t". This variable is local to the +# current scope. Read only. Thread local. +alias $LAST_PAREN_MATCH $+ diff --git a/lib/README b/lib/README new file mode 100644 index 0000000..ca236d3 --- /dev/null +++ b/lib/README @@ -0,0 +1,93 @@ +English.rb lets Perl'ish global variables have English names +README this file +abbrev.rb abbreviation calculator +base64.rb Base64 de- and encoder +benchmark.rb a benchmark utility +cgi.rb CGI support library +cgi/session.rb CGI session class +cmath.rb math support for complex numbers +complex.rb includes cmath and set complex arithemtic as default +csv.rb CSV parser/generator +date.rb date object +date/format.rb date parsing and formatting +debug.rb ruby debugger +delegate.rb delegates messages to other object +drb.rb distributed Ruby +e2mmap.rb exception utilities +erb.rb tiny eRuby library +fileutils.rb file utilities +find.rb traverses directory tree +forwardable.rb explicit delegation library +gauntlet_rubygems.rb Gem package validator +getoptlong.rb GNU getoptlong compatible +gserver.rb general TCP server +ipaddr.rb defines the IPAddr class +irb.rb interactive ruby +logger.rb simple logging utility +mathn.rb extended math operation +matrix.rb matrix calculation library +minitest/unit minimal drop-in replacement for test-unit +mkmf.rb Makefile maker +monitor.rb exclusive region monitor for thread +mutex_m.rb mutex mixin +net/ftp.rb ftp access +net/http.rb HTTP access +net/https.rb HTTPS access +net/imap.rb IMAP4 access +net/pop.rb POP3 access +net/protocol.rb abstract class for net library (DO NOT USE) +net/smtp.rb SMTP access +net/telnet.rb telnet library +observer.rb observer desing pattern library (provides Observable) +open-uri.rb easy-to-use network interface using URI and Net +open3.rb opens subprocess connection stdin/stdout/stderr +optparse.rb command line option analysis +ostruct.rb python style object +pathname.rb Object-Oriented Pathname Class +pp.rb pretty print objects +prettyprint.rb pretty printing algorithm +prime.rb prime numbers and factorization +profile.rb runs ruby profiler +profiler.rb ruby profiler module +pstore.rb persistent object strage using marshal +racc/parser.rb racc (Ruby yACC) runtime +rake.rb Ruby Make +rational.rb rational number support +rdoc source-code documentation tool +resolv-replace.rb replace Socket DNS by resolve.rb +resolv.rb DNS resolver in Ruby +rexml an XML parser for Ruby, in Ruby +rinda/rinda.rb Linda distributed computing paradigm for drb +rinda/ring.rb RingServer for tuplespace +rinda/tuplespace.rb tuplespace for drb +rss.rb RSS parser/generator +rubygems Ruby package management system +scanf.rb scanf for Ruby +securerandom.rb Secure random number generator interface +set.rb defines the Set class +shell.rb runs commands and does pipeline operations like shell +shellwords.rb split into words like shell +singleton.rb singleton design pattern library +sync.rb 2 phase lock +tempfile.rb temporary file with automatic removal +test/unit Ruby Unit Testing Framework +thread.rb thread support +thwait.rb thread syncronization class +time.rb RFC2822, RFC2616, ISO8601 style time formatting/parsing +timeout.rb provides timeout +tmpdir.rb retrieve temporary directory path +tracer.rb execution tracer +tsort.rb topological sorting +ubygems.rb command line shortcut for RubyGems +un.rb Utilities to replace common UNIX commands in Makefiles etc +uri.rb URI support +uri/ftp.rb ftp scheme support +uri/http.rb http scheme support +uri/https.rb https scheme support +uri/ldap.rb ldap scheme support +uri/ldaps.rb ldaps scheme support +uri/mailto.rb mailto scheme support +weakref.rb weak reference class +webrick.rb WEB server toolkit +xmlrpc XML-RPC implementation +yaml.rb YAML implementation diff --git a/lib/abbrev.rb b/lib/abbrev.rb new file mode 100644 index 0000000..338b89f --- /dev/null +++ b/lib/abbrev.rb @@ -0,0 +1,103 @@ +#!/usr/bin/env ruby +=begin +# +# Copyright (c) 2001,2003 Akinori MUSHA +# +# All rights reserved. You can redistribute and/or modify it under +# the same terms as Ruby. +# +# $Idaemons: /home/cvs/rb/abbrev.rb,v 1.2 2001/05/30 09:37:45 knu Exp $ +# $RoughId: abbrev.rb,v 1.4 2003/10/14 19:45:42 knu Exp $ +# $Id: abbrev.rb 11708 2007-02-12 23:01:19Z shyouhei $ +=end + +# Calculate the set of unique abbreviations for a given set of strings. +# +# require 'abbrev' +# require 'pp' +# +# pp Abbrev::abbrev(['ruby', 'rules']).sort +# +# Generates: +# +# [["rub", "ruby"], +# ["ruby", "ruby"], +# ["rul", "rules"], +# ["rule", "rules"], +# ["rules", "rules"]] +# +# Also adds an +abbrev+ method to class +Array+. + +module Abbrev + + # Given a set of strings, calculate the set of unambiguous + # abbreviations for those strings, and return a hash where the keys + # are all the possible abbreviations and the values are the full + # strings. Thus, given input of "car" and "cone", the keys pointing + # to "car" would be "ca" and "car", while those pointing to "cone" + # would be "co", "con", and "cone". + # + # The optional +pattern+ parameter is a pattern or a string. Only + # those input strings matching the pattern, or begging the string, + # are considered for inclusion in the output hash + + def abbrev(words, pattern = nil) + table = {} + seen = Hash.new(0) + + if pattern.is_a?(String) + pattern = /^#{Regexp.quote(pattern)}/ # regard as a prefix + end + + words.each do |word| + next if (abbrev = word).empty? + while (len = abbrev.rindex(/[\w\W]\z/)) > 0 + abbrev = word[0,len] + + next if pattern && pattern !~ abbrev + + case seen[abbrev] += 1 + when 1 + table[abbrev] = word + when 2 + table.delete(abbrev) + else + break + end + end + end + + words.each do |word| + next if pattern && pattern !~ word + + table[word] = word + end + + table + end + + module_function :abbrev +end + +class Array + # Calculates the set of unambiguous abbreviations for the strings in + # +self+. If passed a pattern or a string, only the strings matching + # the pattern or starting with the string are considered. + # + # %w{ car cone }.abbrev #=> { "ca" => "car", "car" => "car", + # "co" => "cone", "con" => cone", + # "cone" => "cone" } + def abbrev(pattern = nil) + Abbrev::abbrev(self, pattern) + end +end + +if $0 == __FILE__ + while line = gets + hash = line.split.abbrev + + hash.sort.each do |k, v| + puts "#{k} => #{v}" + end + end +end diff --git a/lib/base64.rb b/lib/base64.rb new file mode 100644 index 0000000..ebd796e --- /dev/null +++ b/lib/base64.rb @@ -0,0 +1,91 @@ +# +# = base64.rb: methods for base64-encoding and -decoding stings +# + +# The Base64 module provides for the encoding (#encode64, #strict_encode64, +# #urlsafe_encode64) and decoding (#decode64, #strict_decode64, +# #urlsafe_decode64) of binary data using a Base64 representation. +# +# == Example +# +# A simple encoding and decoding. +# +# require "base64" +# +# enc = Base64.encode64('Send reinforcements') +# # -> "U2VuZCByZWluZm9yY2VtZW50cw==\n" +# plain = Base64.decode64(enc) +# # -> "Send reinforcements" +# +# The purpose of using base64 to encode data is that it translates any +# binary data into purely printable characters. + +module Base64 + module_function + + # Returns the Base64-encoded version of +bin+. + # This method complies with RFC 2045. + # Line feeds are added to every 60 encoded charactors. + # + # require 'base64' + # Base64.encode64("Now is the time for all good coders\nto learn Ruby") + # + # Generates: + # + # Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g + # UnVieQ== + def encode64(bin) + [bin].pack("m") + end + + # Returns the Base64-decoded version of +str+. + # This method complies with RFC 2045. + # Characters outside the base alphabet are ignored. + # + # require 'base64' + # str = 'VGhpcyBpcyBsaW5lIG9uZQpUaGlzIG' + + # 'lzIGxpbmUgdHdvClRoaXMgaXMgbGlu' + + # 'ZSB0aHJlZQpBbmQgc28gb24uLi4K' + # puts Base64.decode64(str) + # + # Generates: + # + # This is line one + # This is line two + # This is line three + # And so on... + def decode64(str) + str.unpack("m").first + end + + # Returns the Base64-encoded version of +bin+. + # This method complies with RFC 4648. + # No line feeds are added. + def strict_encode64(bin) + [bin].pack("m0") + end + + # Returns the Base64-decoded version of +str+. + # This method complies with RFC 4648. + # ArgumentError is raised if +str+ is incorrectly padded or contains + # non-alphabet characters. Note that CR or LF are also rejected. + def strict_decode64(str) + str.unpack("m0").first + end + + # Returns the Base64-encoded version of +bin+. + # This method complies with ``Base 64 Encoding with URL and Filename Safe + # Alphabet'' in RFC 4648. + # The alphabet uses '-' instead of '+' and '_' instead of '/'. + def urlsafe_encode64(bin) + strict_encode64(bin).tr("+/", "-_") + end + + # Returns the Base64-decoded version of +str+. + # This method complies with ``Base 64 Encoding with URL and Filename Safe + # Alphabet'' in RFC 4648. + # The alphabet uses '-' instead of '+' and '_' instead of '/'. + def urlsafe_decode64(str) + strict_decode64(str.tr("-_", "+/")) + end +end diff --git a/lib/benchmark.rb b/lib/benchmark.rb new file mode 100644 index 0000000..236c0e1 --- /dev/null +++ b/lib/benchmark.rb @@ -0,0 +1,573 @@ +=begin +# +# benchmark.rb - a performance benchmarking library +# +# $Id: benchmark.rb 22530 2009-02-22 12:49:00Z yugui $ +# +# Created by Gotoken (gotoken@notwork.org). +# +# Documentation by Gotoken (original RD), Lyle Johnson (RDoc conversion), and +# Gavin Sinclair (editing). +# +=end + +# == Overview +# +# The Benchmark module provides methods for benchmarking Ruby code, giving +# detailed reports on the time taken for each task. +# + +# The Benchmark module provides methods to measure and report the time +# used to execute Ruby code. +# +# * Measure the time to construct the string given by the expression +# "a"*1_000_000: +# +# require 'benchmark' +# +# puts Benchmark.measure { "a"*1_000_000 } +# +# On my machine (FreeBSD 3.2 on P5, 100MHz) this generates: +# +# 1.166667 0.050000 1.216667 ( 0.571355) +# +# This report shows the user CPU time, system CPU time, the sum of +# the user and system CPU times, and the elapsed real time. The unit +# of time is seconds. +# +# * Do some experiments sequentially using the #bm method: +# +# require 'benchmark' +# +# n = 50000 +# Benchmark.bm do |x| +# x.report { for i in 1..n; a = "1"; end } +# x.report { n.times do ; a = "1"; end } +# x.report { 1.upto(n) do ; a = "1"; end } +# end +# +# The result: +# +# user system total real +# 1.033333 0.016667 1.016667 ( 0.492106) +# 1.483333 0.000000 1.483333 ( 0.694605) +# 1.516667 0.000000 1.516667 ( 0.711077) +# +# * Continuing the previous example, put a label in each report: +# +# require 'benchmark' +# +# n = 50000 +# Benchmark.bm(7) do |x| +# x.report("for:") { for i in 1..n; a = "1"; end } +# x.report("times:") { n.times do ; a = "1"; end } +# x.report("upto:") { 1.upto(n) do ; a = "1"; end } +# end +# +# The result: +# +# user system total real +# for: 1.050000 0.000000 1.050000 ( 0.503462) +# times: 1.533333 0.016667 1.550000 ( 0.735473) +# upto: 1.500000 0.016667 1.516667 ( 0.711239) +# +# +# * The times for some benchmarks depend on the order in which items +# are run. These differences are due to the cost of memory +# allocation and garbage collection. To avoid these discrepancies, +# the #bmbm method is provided. For example, to compare ways to +# sort an array of floats: +# +# require 'benchmark' +# +# array = (1..1000000).map { rand } +# +# Benchmark.bmbm do |x| +# x.report("sort!") { array.dup.sort! } +# x.report("sort") { array.dup.sort } +# end +# +# The result: +# +# Rehearsal ----------------------------------------- +# sort! 11.928000 0.010000 11.938000 ( 12.756000) +# sort 13.048000 0.020000 13.068000 ( 13.857000) +# ------------------------------- total: 25.006000sec +# +# user system total real +# sort! 12.959000 0.010000 12.969000 ( 13.793000) +# sort 12.007000 0.000000 12.007000 ( 12.791000) +# +# +# * Report statistics of sequential experiments with unique labels, +# using the #benchmark method: +# +# require 'benchmark' +# include Benchmark # we need the CAPTION and FMTSTR constants +# +# n = 50000 +# Benchmark.benchmark(" "*7 + CAPTION, 7, FMTSTR, ">total:", ">avg:") do |x| +# tf = x.report("for:") { for i in 1..n; a = "1"; end } +# tt = x.report("times:") { n.times do ; a = "1"; end } +# tu = x.report("upto:") { 1.upto(n) do ; a = "1"; end } +# [tf+tt+tu, (tf+tt+tu)/3] +# end +# +# The result: +# +# user system total real +# for: 1.016667 0.016667 1.033333 ( 0.485749) +# times: 1.450000 0.016667 1.466667 ( 0.681367) +# upto: 1.533333 0.000000 1.533333 ( 0.722166) +# >total: 4.000000 0.033333 4.033333 ( 1.889282) +# >avg: 1.333333 0.011111 1.344444 ( 0.629761) + +module Benchmark + + BENCHMARK_VERSION = "2002-04-25" #:nodoc" + + def Benchmark::times() # :nodoc: + Process::times() + end + + + # Invokes the block with a Benchmark::Report object, which + # may be used to collect and report on the results of individual + # benchmark tests. Reserves label_width leading spaces for + # labels on each line. Prints _caption_ at the top of the + # report, and uses _fmt_ to format each line. + # If the block returns an array of + # Benchmark::Tms objects, these will be used to format + # additional lines of output. If _label_ parameters are + # given, these are used to label these extra lines. + # + # _Note_: Other methods provide a simpler interface to this one, and are + # suitable for nearly all benchmarking requirements. See the examples in + # Benchmark, and the #bm and #bmbm methods. + # + # Example: + # + # require 'benchmark' + # include Benchmark # we need the CAPTION and FMTSTR constants + # + # n = 50000 + # Benchmark.benchmark(" "*7 + CAPTION, 7, FMTSTR, ">total:", ">avg:") do |x| + # tf = x.report("for:") { for i in 1..n; a = "1"; end } + # tt = x.report("times:") { n.times do ; a = "1"; end } + # tu = x.report("upto:") { 1.upto(n) do ; a = "1"; end } + # [tf+tt+tu, (tf+tt+tu)/3] + # end + # + # Generates: + # + # user system total real + # for: 1.016667 0.016667 1.033333 ( 0.485749) + # times: 1.450000 0.016667 1.466667 ( 0.681367) + # upto: 1.533333 0.000000 1.533333 ( 0.722166) + # >total: 4.000000 0.033333 4.033333 ( 1.889282) + # >avg: 1.333333 0.011111 1.344444 ( 0.629761) + # + + def benchmark(caption = "", label_width = nil, fmtstr = nil, *labels) # :yield: report + sync = STDOUT.sync + STDOUT.sync = true + label_width ||= 0 + fmtstr ||= FMTSTR + raise ArgumentError, "no block" unless iterator? + print caption + results = yield(Report.new(label_width, fmtstr)) + Array === results and results.grep(Tms).each {|t| + print((labels.shift || t.label || "").ljust(label_width), + t.format(fmtstr)) + } + STDOUT.sync = sync + end + + + # A simple interface to the #benchmark method, #bm is generates sequential reports + # with labels. The parameters have the same meaning as for #benchmark. + # + # require 'benchmark' + # + # n = 50000 + # Benchmark.bm(7) do |x| + # x.report("for:") { for i in 1..n; a = "1"; end } + # x.report("times:") { n.times do ; a = "1"; end } + # x.report("upto:") { 1.upto(n) do ; a = "1"; end } + # end + # + # Generates: + # + # user system total real + # for: 1.050000 0.000000 1.050000 ( 0.503462) + # times: 1.533333 0.016667 1.550000 ( 0.735473) + # upto: 1.500000 0.016667 1.516667 ( 0.711239) + # + + def bm(label_width = 0, *labels, &blk) # :yield: report + benchmark(" "*label_width + CAPTION, label_width, FMTSTR, *labels, &blk) + end + + + # Sometimes benchmark results are skewed because code executed + # earlier encounters different garbage collection overheads than + # that run later. #bmbm attempts to minimize this effect by running + # the tests twice, the first time as a rehearsal in order to get the + # runtime environment stable, the second time for + # real. GC.start is executed before the start of each of + # the real timings; the cost of this is not included in the + # timings. In reality, though, there's only so much that #bmbm can + # do, and the results are not guaranteed to be isolated from garbage + # collection and other effects. + # + # Because #bmbm takes two passes through the tests, it can + # calculate the required label width. + # + # require 'benchmark' + # + # array = (1..1000000).map { rand } + # + # Benchmark.bmbm do |x| + # x.report("sort!") { array.dup.sort! } + # x.report("sort") { array.dup.sort } + # end + # + # Generates: + # + # Rehearsal ----------------------------------------- + # sort! 11.928000 0.010000 11.938000 ( 12.756000) + # sort 13.048000 0.020000 13.068000 ( 13.857000) + # ------------------------------- total: 25.006000sec + # + # user system total real + # sort! 12.959000 0.010000 12.969000 ( 13.793000) + # sort 12.007000 0.000000 12.007000 ( 12.791000) + # + # #bmbm yields a Benchmark::Job object and returns an array of + # Benchmark::Tms objects. + # + def bmbm(width = 0, &blk) # :yield: job + job = Job.new(width) + yield(job) + width = job.width + sync = STDOUT.sync + STDOUT.sync = true + + # rehearsal + print "Rehearsal " + puts '-'*(width+CAPTION.length - "Rehearsal ".length) + list = [] + job.list.each{|label,item| + print(label.ljust(width)) + res = Benchmark::measure(&item) + print res.format() + list.push res + } + sum = Tms.new; list.each{|i| sum += i} + ets = sum.format("total: %tsec") + printf("%s %s\n\n", + "-"*(width+CAPTION.length-ets.length-1), ets) + + # take + print ' '*width, CAPTION + list = [] + ary = [] + job.list.each{|label,item| + GC::start + print label.ljust(width) + res = Benchmark::measure(&item) + print res.format() + ary.push res + list.push [label, res] + } + + STDOUT.sync = sync + ary + end + + # + # Returns the time used to execute the given block as a + # Benchmark::Tms object. + # + def measure(label = "") # :yield: + t0, r0 = Benchmark.times, Time.now + yield + t1, r1 = Benchmark.times, Time.now + Benchmark::Tms.new(t1.utime - t0.utime, + t1.stime - t0.stime, + t1.cutime - t0.cutime, + t1.cstime - t0.cstime, + r1.to_f - r0.to_f, + label) + end + + # + # Returns the elapsed real time used to execute the given block. + # + def realtime(&blk) # :yield: + r0 = Time.now + yield + r1 = Time.now + r1.to_f - r0.to_f + end + + + + # + # A Job is a sequence of labelled blocks to be processed by the + # Benchmark.bmbm method. It is of little direct interest to the user. + # + class Job # :nodoc: + # + # Returns an initialized Job instance. + # Usually, one doesn't call this method directly, as new + # Job objects are created by the #bmbm method. + # _width_ is a initial value for the label offset used in formatting; + # the #bmbm method passes its _width_ argument to this constructor. + # + def initialize(width) + @width = width + @list = [] + end + + # + # Registers the given label and block pair in the job list. + # + def item(label = "", &blk) # :yield: + raise ArgumentError, "no block" unless block_given? + label += ' ' + w = label.length + @width = w if @width < w + @list.push [label, blk] + self + end + + alias report item + + # An array of 2-element arrays, consisting of label and block pairs. + attr_reader :list + + # Length of the widest label in the #list, plus one. + attr_reader :width + end + + module_function :benchmark, :measure, :realtime, :bm, :bmbm + + + + # + # This class is used by the Benchmark.benchmark and Benchmark.bm methods. + # It is of little direct interest to the user. + # + class Report # :nodoc: + # + # Returns an initialized Report instance. + # Usually, one doesn't call this method directly, as new + # Report objects are created by the #benchmark and #bm methods. + # _width_ and _fmtstr_ are the label offset and + # format string used by Tms#format. + # + def initialize(width = 0, fmtstr = nil) + @width, @fmtstr = width, fmtstr + end + + # + # Prints the _label_ and measured time for the block, + # formatted by _fmt_. See Tms#format for the + # formatting rules. + # + def item(label = "", *fmt, &blk) # :yield: + print label.ljust(@width) + res = Benchmark::measure(&blk) + print res.format(@fmtstr, *fmt) + res + end + + alias report item + end + + + + # + # A data object, representing the times associated with a benchmark + # measurement. + # + class Tms + CAPTION = " user system total real\n" + FMTSTR = "%10.6u %10.6y %10.6t %10.6r\n" + + # User CPU time + attr_reader :utime + + # System CPU time + attr_reader :stime + + # User CPU time of children + attr_reader :cutime + + # System CPU time of children + attr_reader :cstime + + # Elapsed real time + attr_reader :real + + # Total time, that is _utime_ + _stime_ + _cutime_ + _cstime_ + attr_reader :total + + # Label + attr_reader :label + + # + # Returns an initialized Tms object which has + # _u_ as the user CPU time, _s_ as the system CPU time, + # _cu_ as the children's user CPU time, _cs_ as the children's + # system CPU time, _real_ as the elapsed real time and _l_ + # as the label. + # + def initialize(u = 0.0, s = 0.0, cu = 0.0, cs = 0.0, real = 0.0, l = nil) + @utime, @stime, @cutime, @cstime, @real, @label = u, s, cu, cs, real, l + @total = @utime + @stime + @cutime + @cstime + end + + # + # Returns a new Tms object whose times are the sum of the times for this + # Tms object, plus the time required to execute the code block (_blk_). + # + def add(&blk) # :yield: + self + Benchmark::measure(&blk) + end + + # + # An in-place version of #add. + # + def add! + t = Benchmark::measure(&blk) + @utime = utime + t.utime + @stime = stime + t.stime + @cutime = cutime + t.cutime + @cstime = cstime + t.cstime + @real = real + t.real + self + end + + # + # Returns a new Tms object obtained by memberwise summation + # of the individual times for this Tms object with those of the other + # Tms object. + # This method and #/() are useful for taking statistics. + # + def +(other); memberwise(:+, other) end + + # + # Returns a new Tms object obtained by memberwise subtraction + # of the individual times for the other Tms object from those of this + # Tms object. + # + def -(other); memberwise(:-, other) end + + # + # Returns a new Tms object obtained by memberwise multiplication + # of the individual times for this Tms object by _x_. + # + def *(x); memberwise(:*, x) end + + # + # Returns a new Tms object obtained by memberwise division + # of the individual times for this Tms object by _x_. + # This method and #+() are useful for taking statistics. + # + def /(x); memberwise(:/, x) end + + # + # Returns the contents of this Tms object as + # a formatted string, according to a format string + # like that passed to Kernel.format. In addition, #format + # accepts the following extensions: + # + # %u:: Replaced by the user CPU time, as reported by Tms#utime. + # %y:: Replaced by the system CPU time, as reported by #stime (Mnemonic: y of "s*y*stem") + # %U:: Replaced by the children's user CPU time, as reported by Tms#cutime + # %Y:: Replaced by the children's system CPU time, as reported by Tms#cstime + # %t:: Replaced by the total CPU time, as reported by Tms#total + # %r:: Replaced by the elapsed real time, as reported by Tms#real + # %n:: Replaced by the label string, as reported by Tms#label (Mnemonic: n of "*n*ame") + # + # If _fmtstr_ is not given, FMTSTR is used as default value, detailing the + # user, system and real elapsed time. + # + def format(arg0 = nil, *args) + fmtstr = (arg0 || FMTSTR).dup + fmtstr.gsub!(/(%[-+\.\d]*)n/){"#{$1}s" % label} + fmtstr.gsub!(/(%[-+\.\d]*)u/){"#{$1}f" % utime} + fmtstr.gsub!(/(%[-+\.\d]*)y/){"#{$1}f" % stime} + fmtstr.gsub!(/(%[-+\.\d]*)U/){"#{$1}f" % cutime} + fmtstr.gsub!(/(%[-+\.\d]*)Y/){"#{$1}f" % cstime} + fmtstr.gsub!(/(%[-+\.\d]*)t/){"#{$1}f" % total} + fmtstr.gsub!(/(%[-+\.\d]*)r/){"(#{$1}f)" % real} + arg0 ? Kernel::format(fmtstr, *args) : fmtstr + end + + # + # Same as #format. + # + def to_s + format + end + + # + # Returns a new 6-element array, consisting of the + # label, user CPU time, system CPU time, children's + # user CPU time, children's system CPU time and elapsed + # real time. + # + def to_a + [@label, @utime, @stime, @cutime, @cstime, @real] + end + + protected + def memberwise(op, x) + case x + when Benchmark::Tms + Benchmark::Tms.new(utime.__send__(op, x.utime), + stime.__send__(op, x.stime), + cutime.__send__(op, x.cutime), + cstime.__send__(op, x.cstime), + real.__send__(op, x.real) + ) + else + Benchmark::Tms.new(utime.__send__(op, x), + stime.__send__(op, x), + cutime.__send__(op, x), + cstime.__send__(op, x), + real.__send__(op, x) + ) + end + end + end + + # The default caption string (heading above the output times). + CAPTION = Benchmark::Tms::CAPTION + + # The default format string used to display times. See also Benchmark::Tms#format. + FMTSTR = Benchmark::Tms::FMTSTR +end + +if __FILE__ == $0 + include Benchmark + + n = ARGV[0].to_i.nonzero? || 50000 + puts %Q([#{n} times iterations of `a = "1"']) + benchmark(" " + CAPTION, 7, FMTSTR) do |x| + x.report("for:") {for i in 1..n; a = "1"; end} # Benchmark::measure + x.report("times:") {n.times do ; a = "1"; end} + x.report("upto:") {1.upto(n) do ; a = "1"; end} + end + + benchmark do + [ + measure{for i in 1..n; a = "1"; end}, # Benchmark::measure + measure{n.times do ; a = "1"; end}, + measure{1.upto(n) do ; a = "1"; end} + ] + end +end diff --git a/lib/cgi.rb b/lib/cgi.rb new file mode 100644 index 0000000..6acf05b --- /dev/null +++ b/lib/cgi.rb @@ -0,0 +1,274 @@ +# +# cgi.rb - cgi support library +# +# Copyright (C) 2000 Network Applied Communication Laboratory, Inc. +# +# Copyright (C) 2000 Information-technology Promotion Agency, Japan +# +# Author: Wakou Aoyama +# +# Documentation: Wakou Aoyama (RDoc'd and embellished by William Webber) +# +# == Overview +# +# The Common Gateway Interface (CGI) is a simple protocol +# for passing an HTTP request from a web server to a +# standalone program, and returning the output to the web +# browser. Basically, a CGI program is called with the +# parameters of the request passed in either in the +# environment (GET) or via $stdin (POST), and everything +# it prints to $stdout is returned to the client. +# +# This file holds the +CGI+ class. This class provides +# functionality for retrieving HTTP request parameters, +# managing cookies, and generating HTML output. See the +# class documentation for more details and examples of use. +# +# The file cgi/session.rb provides session management +# functionality; see that file for more details. +# +# See http://www.w3.org/CGI/ for more information on the CGI +# protocol. + +raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0" + +# CGI class. See documentation for the file cgi.rb for an overview +# of the CGI protocol. +# +# == Introduction +# +# CGI is a large class, providing several categories of methods, many of which +# are mixed in from other modules. Some of the documentation is in this class, +# some in the modules CGI::QueryExtension and CGI::HtmlExtension. See +# CGI::Cookie for specific information on handling cookies, and cgi/session.rb +# (CGI::Session) for information on sessions. +# +# For queries, CGI provides methods to get at environmental variables, +# parameters, cookies, and multipart request data. For responses, CGI provides +# methods for writing output and generating HTML. +# +# Read on for more details. Examples are provided at the bottom. +# +# == Queries +# +# The CGI class dynamically mixes in parameter and cookie-parsing +# functionality, environmental variable access, and support for +# parsing multipart requests (including uploaded files) from the +# CGI::QueryExtension module. +# +# === Environmental Variables +# +# The standard CGI environmental variables are available as read-only +# attributes of a CGI object. The following is a list of these variables: +# +# +# AUTH_TYPE HTTP_HOST REMOTE_IDENT +# CONTENT_LENGTH HTTP_NEGOTIATE REMOTE_USER +# CONTENT_TYPE HTTP_PRAGMA REQUEST_METHOD +# GATEWAY_INTERFACE HTTP_REFERER SCRIPT_NAME +# HTTP_ACCEPT HTTP_USER_AGENT SERVER_NAME +# HTTP_ACCEPT_CHARSET PATH_INFO SERVER_PORT +# HTTP_ACCEPT_ENCODING PATH_TRANSLATED SERVER_PROTOCOL +# HTTP_ACCEPT_LANGUAGE QUERY_STRING SERVER_SOFTWARE +# HTTP_CACHE_CONTROL REMOTE_ADDR +# HTTP_FROM REMOTE_HOST +# +# +# For each of these variables, there is a corresponding attribute with the +# same name, except all lower case and without a preceding HTTP_. +# +content_length+ and +server_port+ are integers; the rest are strings. +# +# === Parameters +# +# The method #params() returns a hash of all parameters in the request as +# name/value-list pairs, where the value-list is an Array of one or more +# values. The CGI object itself also behaves as a hash of parameter names +# to values, but only returns a single value (as a String) for each +# parameter name. +# +# For instance, suppose the request contains the parameter +# "favourite_colours" with the multiple values "blue" and "green". The +# following behaviour would occur: +# +# cgi.params["favourite_colours"] # => ["blue", "green"] +# cgi["favourite_colours"] # => "blue" +# +# If a parameter does not exist, the former method will return an empty +# array, the latter an empty string. The simplest way to test for existence +# of a parameter is by the #has_key? method. +# +# === Cookies +# +# HTTP Cookies are automatically parsed from the request. They are available +# from the #cookies() accessor, which returns a hash from cookie name to +# CGI::Cookie object. +# +# === Multipart requests +# +# If a request's method is POST and its content type is multipart/form-data, +# then it may contain uploaded files. These are stored by the QueryExtension +# module in the parameters of the request. The parameter name is the name +# attribute of the file input field, as usual. However, the value is not +# a string, but an IO object, either an IOString for small files, or a +# Tempfile for larger ones. This object also has the additional singleton +# methods: +# +# #local_path():: the path of the uploaded file on the local filesystem +# #original_filename():: the name of the file on the client computer +# #content_type():: the content type of the file +# +# == Responses +# +# The CGI class provides methods for sending header and content output to +# the HTTP client, and mixes in methods for programmatic HTML generation +# from CGI::HtmlExtension and CGI::TagMaker modules. The precise version of HTML +# to use for HTML generation is specified at object creation time. +# +# === Writing output +# +# The simplest way to send output to the HTTP client is using the #out() method. +# This takes the HTTP headers as a hash parameter, and the body content +# via a block. The headers can be generated as a string using the #header() +# method. The output stream can be written directly to using the #print() +# method. +# +# === Generating HTML +# +# Each HTML element has a corresponding method for generating that +# element as a String. The name of this method is the same as that +# of the element, all lowercase. The attributes of the element are +# passed in as a hash, and the body as a no-argument block that evaluates +# to a String. The HTML generation module knows which elements are +# always empty, and silently drops any passed-in body. It also knows +# which elements require matching closing tags and which don't. However, +# it does not know what attributes are legal for which elements. +# +# There are also some additional HTML generation methods mixed in from +# the CGI::HtmlExtension module. These include individual methods for the +# different types of form inputs, and methods for elements that commonly +# take particular attributes where the attributes can be directly specified +# as arguments, rather than via a hash. +# +# == Examples of use +# +# === Get form values +# +# require "cgi" +# cgi = CGI.new +# value = cgi['field_name'] # <== value string for 'field_name' +# # if not 'field_name' included, then return "". +# fields = cgi.keys # <== array of field names +# +# # returns true if form has 'field_name' +# cgi.has_key?('field_name') +# cgi.has_key?('field_name') +# cgi.include?('field_name') +# +# CAUTION! cgi['field_name'] returned an Array with the old +# cgi.rb(included in ruby 1.6) +# +# === Get form values as hash +# +# require "cgi" +# cgi = CGI.new +# params = cgi.params +# +# cgi.params is a hash. +# +# cgi.params['new_field_name'] = ["value"] # add new param +# cgi.params['field_name'] = ["new_value"] # change value +# cgi.params.delete('field_name') # delete param +# cgi.params.clear # delete all params +# +# +# === Save form values to file +# +# require "pstore" +# db = PStore.new("query.db") +# db.transaction do +# db["params"] = cgi.params +# end +# +# +# === Restore form values from file +# +# require "pstore" +# db = PStore.new("query.db") +# db.transaction do +# cgi.params = db["params"] +# end +# +# +# === Get multipart form values +# +# require "cgi" +# cgi = CGI.new +# value = cgi['field_name'] # <== value string for 'field_name' +# value.read # <== body of value +# value.local_path # <== path to local file of value +# value.original_filename # <== original filename of value +# value.content_type # <== content_type of value +# +# and value has StringIO or Tempfile class methods. +# +# === Get cookie values +# +# require "cgi" +# cgi = CGI.new +# values = cgi.cookies['name'] # <== array of 'name' +# # if not 'name' included, then return []. +# names = cgi.cookies.keys # <== array of cookie names +# +# and cgi.cookies is a hash. +# +# === Get cookie objects +# +# require "cgi" +# cgi = CGI.new +# for name, cookie in cgi.cookies +# cookie.expires = Time.now + 30 +# end +# cgi.out("cookie" => cgi.cookies) {"string"} +# +# cgi.cookies # { "name1" => cookie1, "name2" => cookie2, ... } +# +# require "cgi" +# cgi = CGI.new +# cgi.cookies['name'].expires = Time.now + 30 +# cgi.out("cookie" => cgi.cookies['name']) {"string"} +# +# === Print http header and html string to $DEFAULT_OUTPUT ($>) +# +# require "cgi" +# cgi = CGI.new("html3") # add HTML generation methods +# cgi.out() do +# cgi.html() do +# cgi.head{ cgi.title{"TITLE"} } + +# cgi.body() do +# cgi.form() do +# cgi.textarea("get_text") + +# cgi.br + +# cgi.submit +# end + +# cgi.pre() do +# CGI::escapeHTML( +# "params: " + cgi.params.inspect + "\n" + +# "cookies: " + cgi.cookies.inspect + "\n" + +# ENV.collect() do |key, value| +# key + " --> " + value + "\n" +# end.join("") +# ) +# end +# end +# end +# end +# +# # add HTML generation methods +# CGI.new("html3") # html3.2 +# CGI.new("html4") # html4.01 (Strict) +# CGI.new("html4Tr") # html4.01 Transitional +# CGI.new("html4Fr") # html4.01 Frameset +# +require 'cgi/core' +require 'cgi/cookie' +require 'cgi/util' diff --git a/lib/cmath.rb b/lib/cmath.rb new file mode 100644 index 0000000..1b0c65c --- /dev/null +++ b/lib/cmath.rb @@ -0,0 +1,233 @@ +module CMath + + include Math + + alias exp! exp + alias log! log + alias log10! log10 + alias sqrt! sqrt + + alias sin! sin + alias cos! cos + alias tan! tan + + alias sinh! sinh + alias cosh! cosh + alias tanh! tanh + + alias asin! asin + alias acos! acos + alias atan! atan + alias atan2! atan2 + + alias asinh! asinh + alias acosh! acosh + alias atanh! atanh + + def exp(z) + if z.real? + exp!(z) + else + Complex(exp!(z.real) * cos!(z.imag), + exp!(z.real) * sin!(z.imag)) + end + end + + def log(*args) + z, b = args + if z.real? and z >= 0 and (b.nil? or b >= 0) + log!(*args) + else + r, theta = z.polar + a = Complex(log!(r.abs), theta) + if b + a /= log(b) + end + a + end + end + + def log10(z) + if z.real? and z >= 0 + log10!(z) + else + log(z) / log!(10) + end + end + + def sqrt(z) + if z.real? + if z < 0 + Complex(0, sqrt!(-z)) + else + sqrt!(z) + end + else + if z.imag < 0 + sqrt(z.conjugate).conjugate + else + r = z.abs + x = z.real + Complex(sqrt!((r + x) / 2), sqrt!((r - x) / 2)) + end + end + end + + def sin(z) + if z.real? + sin!(z) + else + Complex(sin!(z.real) * cosh!(z.imag), + cos!(z.real) * sinh!(z.imag)) + end + end + + def cos(z) + if z.real? + cos!(z) + else + Complex(cos!(z.real) * cosh!(z.imag), + -sin!(z.real) * sinh!(z.imag)) + end + end + + def tan(z) + if z.real? + tan!(z) + else + sin(z)/cos(z) + end + end + + def sinh(z) + if z.real? + sinh!(z) + else + Complex(sinh!(z.real) * cos!(z.imag), + cosh!(z.real) * sin!(z.imag)) + end + end + + def cosh(z) + if z.real? + cosh!(z) + else + Complex(cosh!(z.real) * cos!(z.imag), + sinh!(z.real) * sin!(z.imag)) + end + end + + def tanh(z) + if z.real? + tanh!(z) + else + sinh(z) / cosh(z) + end + end + + def asin(z) + if z.real? and z >= -1 and z <= 1 + asin!(z) + else + Complex(0, -1.0) * log(Complex(0, 1.0) * z + sqrt(1.0 - z * z)) + end + end + + def acos(z) + if z.real? and z >= -1 and z <= 1 + acos!(z) + else + Complex(0, -1.0) * log(z + Complex(0, 1.0) * sqrt(1.0 - z * z)) + end + end + + def atan(z) + if z.real? + atan!(z) + else + Complex(0, 1.0) * log((Complex(0, 1.0) + z) / (Complex(0, 1.0) - z)) / 2.0 + end + end + + def atan2(y,x) + if y.real? and x.real? + atan2!(y,x) + else + Complex(0, -1.0) * log((x + Complex(0, 1.0) * y) / sqrt(x * x + y * y)) + end + end + + def acosh(z) + if z.real? and z >= 1 + acosh!(z) + else + log(z + sqrt(z * z - 1.0)) + end + end + + def asinh(z) + if z.real? + asinh!(z) + else + log(z + sqrt(1.0 + z * z)) + end + end + + def atanh(z) + if z.real? and z >= -1 and z <= 1 + atanh!(z) + else + log((1.0 + z) / (1.0 - z)) / 2.0 + end + end + + module_function :exp! + module_function :exp + module_function :log! + module_function :log + module_function :log10! + module_function :log10 + module_function :sqrt! + module_function :sqrt + + module_function :sin! + module_function :sin + module_function :cos! + module_function :cos + module_function :tan! + module_function :tan + + module_function :sinh! + module_function :sinh + module_function :cosh! + module_function :cosh + module_function :tanh! + module_function :tanh + + module_function :asin! + module_function :asin + module_function :acos! + module_function :acos + module_function :atan! + module_function :atan + module_function :atan2! + module_function :atan2 + + module_function :asinh! + module_function :asinh + module_function :acosh! + module_function :acosh + module_function :atanh! + module_function :atanh + + module_function :log2 + module_function :cbrt + module_function :frexp + module_function :ldexp + module_function :hypot + module_function :erf + module_function :erfc + module_function :gamma + module_function :lgamma + +end diff --git a/lib/complex.rb b/lib/complex.rb new file mode 100644 index 0000000..3018791 --- /dev/null +++ b/lib/complex.rb @@ -0,0 +1,24 @@ +require 'cmath' + +unless defined?(Math.exp!) + Object.instance_eval{remove_const :Math} + Math = CMath +end + +def Complex.generic? (other) + other.kind_of?(Integer) || + other.kind_of?(Float) || + other.kind_of?(Rational) +end + +class Complex + + alias image imag + +end + +class Numeric + + def im() Complex(0, self) end + +end diff --git a/lib/csv.rb b/lib/csv.rb new file mode 100644 index 0000000..71ebee8 --- /dev/null +++ b/lib/csv.rb @@ -0,0 +1,2320 @@ +# encoding: US-ASCII +# = csv.rb -- CSV Reading and Writing +# +# Created by James Edward Gray II on 2005-10-31. +# Copyright 2005 James Edward Gray II. You can redistribute or modify this code +# under the terms of Ruby's license. +# +# See CSV for documentation. +# +# == Description +# +# Welcome to the new and improved CSV. +# +# This version of the CSV library began its life as FasterCSV. FasterCSV was +# intended as a replacement to Ruby's then standard CSV library. It was +# designed to address concerns users of that library had and it had three +# primary goals: +# +# 1. Be significantly faster than CSV while remaining a pure Ruby library. +# 2. Use a smaller and easier to maintain code base. (FasterCSV eventually +# grew larger, was also but considerably richer in features. The parsing +# core remains quite small.) +# 3. Improve on the CSV interface. +# +# Obviously, the last one is subjective. I did try to defer to the original +# interface whenever I didn't have a compelling reason to change it though, so +# hopefully this won't be too radically different. +# +# We must have met our goals because FasterCSV was renamed to CSV and replaced +# the original library. +# +# == What's Different From the Old CSV? +# +# I'm sure I'll miss something, but I'll try to mention most of the major +# differences I am aware of, to help others quickly get up to speed: +# +# === CSV Parsing +# +# * This parser is m17n aware. See CSV for full details. +# * This library has a stricter parser and will throw MalformedCSVErrors on +# problematic data. +# * This library has a less liberal idea of a line ending than CSV. What you +# set as the :row_sep is law. It can auto-detect your line endings +# though. +# * The old library returned empty lines as [nil]. This library calls +# them []. +# * This library has a much faster parser. +# +# === Interface +# +# * CSV now uses Hash-style parameters to set options. +# * CSV no longer has generate_row() or parse_row(). +# * The old CSV's Reader and Writer classes have been dropped. +# * CSV::open() is now more like Ruby's open(). +# * CSV objects now support most standard IO methods. +# * CSV now has a new() method used to wrap objects like String and IO for +# reading and writing. +# * CSV::generate() is different from the old method. +# * CSV no longer supports partial reads. It works line-by-line. +# * CSV no longer allows the instance methods to override the separators for +# performance reasons. They must be set in the constructor. +# +# If you use this library and find yourself missing any functionality I have +# trimmed, please {let me know}[mailto:james@grayproductions.net]. +# +# == Documentation +# +# See CSV for documentation. +# +# == What is CSV, really? +# +# CSV maintains a pretty strict definition of CSV taken directly from +# {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one +# place and that is to make using this library easier. CSV will parse all valid +# CSV. +# +# What you don't want to do is feed CSV invalid data. Because of the way the +# CSV format works, it's common for a parser to need to read until the end of +# the file to be sure a field is invalid. This eats a lot of time and memory. +# +# Luckily, when working with invalid CSV, Ruby's built-in methods will almost +# always be superior in every way. For example, parsing non-quoted fields is as +# easy as: +# +# data.split(",") +# +# == Questions and/or Comments +# +# Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] +# with any questions. + +require "forwardable" +require "English" +require "date" +require "stringio" + +# +# This class provides a complete interface to CSV files and data. It offers +# tools to enable you to read and write to and from Strings or IO objects, as +# needed. +# +# == Reading +# +# === From a File +# +# ==== A Line at a Time +# +# CSV.foreach("path/to/file.csv") do |row| +# # use row here... +# end +# +# ==== All at Once +# +# arr_of_arrs = CSV.read("path/to/file.csv") +# +# === From a String +# +# ==== A Line at a Time +# +# CSV.parse("CSV,data,String") do |row| +# # use row here... +# end +# +# ==== All at Once +# +# arr_of_arrs = CSV.parse("CSV,data,String") +# +# == Writing +# +# === To a File +# +# CSV.open("path/to/file.csv", "wb") do |csv| +# csv << ["row", "of", "CSV", "data"] +# csv << ["another", "row"] +# # ... +# end +# +# === To a String +# +# csv_string = CSV.generate do |csv| +# csv << ["row", "of", "CSV", "data"] +# csv << ["another", "row"] +# # ... +# end +# +# == Convert a Single Line +# +# csv_string = ["CSV", "data"].to_csv # to CSV +# csv_array = "CSV,String".parse_csv # from CSV +# +# == Shortcut Interface +# +# CSV { |csv_out| csv_out << %w{my data here} } # to $stdout +# CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String +# CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr +# +# == CSV and Character Encodings (M17n or Multilingualization) +# +# This new CSV parser is m17n savvy. The parser works in the Encoding of the IO +# or String object being read from or written to. Your data is never transcoded +# (unless you ask Ruby to transcode it for you) and will literally be parsed in +# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the +# Encoding of your data. This is accomplished by transcoding the parser itself +# into your Encoding. +# +# Some transcoding must take place, of course, to accomplish this multiencoding +# support. For example, :col_sep, :row_sep, and +# :quote_char must be transcoded to match your data. Hopefully this +# makes the entire process feel transparent, since CSV's defaults should just +# magically work for you data. However, you can set these values manually in +# the target Encoding to avoid the translation. +# +# It's also important to note that while all of CSV's core parser is now +# Encoding agnostic, some features are not. For example, the built-in +# converters will try to transcode data to UTF-8 before making conversions. +# Again, you can provide custom converters that are aware of your Encodings to +# avoid this translation. It's just too hard for me to support native +# conversions in all of Ruby's Encodings. +# +# Anyway, the practical side of this is simple: make sure IO and String objects +# passed into CSV have the proper Encoding set and everything should just work. +# CSV methods that allow you to open IO objects (CSV::foreach(), CSV::open(), +# CSV::read(), and CSV::readlines()) do allow you to specify the Encoding. +# +# One minor exception comes when generating CSV into a String with an Encoding +# that is not ASCII compatible. There's no existing data for CSV to use to +# prepare itself and thus you will probably need to manually specify the desired +# Encoding for most of those cases. It will try to guess using the fields in a +# row of output though, when using CSV::generate_line() or Array#to_csv(). +# +# I try to point out any other Encoding issues in the documentation of methods +# as they come up. +# +# This has been tested to the best of my ability with all non-"dummy" Encodings +# Ruby ships with. However, it is brave new code and may have some bugs. +# Please feel free to {report}[mailto:james@grayproductions.net] any issues you +# find with it. +# +class CSV + # The version of the installed library. + VERSION = "2.4.5".freeze + + # + # A CSV::Row is part Array and part Hash. It retains an order for the fields + # and allows duplicates just as an Array would, but also allows you to access + # fields by name just as you could if they were in a Hash. + # + # All rows returned by CSV will be constructed from this class, if header row + # processing is activated. + # + class Row + # + # Construct a new CSV::Row from +headers+ and +fields+, which are expected + # to be Arrays. If one Array is shorter than the other, it will be padded + # with +nil+ objects. + # + # The optional +header_row+ parameter can be set to +true+ to indicate, via + # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header + # row. Otherwise, the row is assumes to be a field row. + # + # A CSV::Row object supports the following Array methods through delegation: + # + # * empty?() + # * length() + # * size() + # + def initialize(headers, fields, header_row = false) + @header_row = header_row + + # handle extra headers or fields + @row = if headers.size > fields.size + headers.zip(fields) + else + fields.zip(headers).map { |pair| pair.reverse } + end + end + + # Internal data format used to compare equality. + attr_reader :row + protected :row + + ### Array Delegation ### + + extend Forwardable + def_delegators :@row, :empty?, :length, :size + + # Returns +true+ if this is a header row. + def header_row? + @header_row + end + + # Returns +true+ if this is a field row. + def field_row? + not header_row? + end + + # Returns the headers of this row. + def headers + @row.map { |pair| pair.first } + end + + # + # :call-seq: + # field( header ) + # field( header, offset ) + # field( index ) + # + # This method will fetch the field value by +header+ or +index+. If a field + # is not found, +nil+ is returned. + # + # When provided, +offset+ ensures that a header match occurrs on or later + # than the +offset+ index. You can use this to find duplicate headers, + # without resorting to hard-coding exact indices. + # + def field(header_or_index, minimum_index = 0) + # locate the pair + finder = header_or_index.is_a?(Integer) ? :[] : :assoc + pair = @row[minimum_index..-1].send(finder, header_or_index) + + # return the field if we have a pair + pair.nil? ? nil : pair.last + end + alias_method :[], :field + + # + # :call-seq: + # []=( header, value ) + # []=( header, offset, value ) + # []=( index, value ) + # + # Looks up the field by the semantics described in CSV::Row.field() and + # assigns the +value+. + # + # Assigning past the end of the row with an index will set all pairs between + # to [nil, nil]. Assigning to an unused header appends the new + # pair. + # + def []=(*args) + value = args.pop + + if args.first.is_a? Integer + if @row[args.first].nil? # extending past the end with index + @row[args.first] = [nil, value] + @row.map! { |pair| pair.nil? ? [nil, nil] : pair } + else # normal index assignment + @row[args.first][1] = value + end + else + index = index(*args) + if index.nil? # appending a field + self << [args.first, value] + else # normal header assignment + @row[index][1] = value + end + end + end + + # + # :call-seq: + # <<( field ) + # <<( header_and_field_array ) + # <<( header_and_field_hash ) + # + # If a two-element Array is provided, it is assumed to be a header and field + # and the pair is appended. A Hash works the same way with the key being + # the header and the value being the field. Anything else is assumed to be + # a lone field which is appended with a +nil+ header. + # + # This method returns the row for chaining. + # + def <<(arg) + if arg.is_a?(Array) and arg.size == 2 # appending a header and name + @row << arg + elsif arg.is_a?(Hash) # append header and name pairs + arg.each { |pair| @row << pair } + else # append field value + @row << [nil, arg] + end + + self # for chaining + end + + # + # A shortcut for appending multiple fields. Equivalent to: + # + # args.each { |arg| csv_row << arg } + # + # This method returns the row for chaining. + # + def push(*args) + args.each { |arg| self << arg } + + self # for chaining + end + + # + # :call-seq: + # delete( header ) + # delete( header, offset ) + # delete( index ) + # + # Used to remove a pair from the row by +header+ or +index+. The pair is + # located as described in CSV::Row.field(). The deleted pair is returned, + # or +nil+ if a pair could not be found. + # + def delete(header_or_index, minimum_index = 0) + if header_or_index.is_a? Integer # by index + @row.delete_at(header_or_index) + else # by header + @row.delete_at(index(header_or_index, minimum_index)) + end + end + + # + # The provided +block+ is passed a header and field for each pair in the row + # and expected to return +true+ or +false+, depending on whether the pair + # should be deleted. + # + # This method returns the row for chaining. + # + def delete_if(&block) + @row.delete_if(&block) + + self # for chaining + end + + # + # This method accepts any number of arguments which can be headers, indices, + # Ranges of either, or two-element Arrays containing a header and offset. + # Each argument will be replaced with a field lookup as described in + # CSV::Row.field(). + # + # If called with no arguments, all fields are returned. + # + def fields(*headers_and_or_indices) + if headers_and_or_indices.empty? # return all fields--no arguments + @row.map { |pair| pair.last } + else # or work like values_at() + headers_and_or_indices.inject(Array.new) do |all, h_or_i| + all + if h_or_i.is_a? Range + index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin : + index(h_or_i.begin) + index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end : + index(h_or_i.end) + new_range = h_or_i.exclude_end? ? (index_begin...index_end) : + (index_begin..index_end) + fields.values_at(new_range) + else + [field(*Array(h_or_i))] + end + end + end + end + alias_method :values_at, :fields + + # + # :call-seq: + # index( header ) + # index( header, offset ) + # + # This method will return the index of a field with the provided +header+. + # The +offset+ can be used to locate duplicate header names, as described in + # CSV::Row.field(). + # + def index(header, minimum_index = 0) + # find the pair + index = headers[minimum_index..-1].index(header) + # return the index at the right offset, if we found one + index.nil? ? nil : index + minimum_index + end + + # Returns +true+ if +name+ is a header for this row, and +false+ otherwise. + def header?(name) + headers.include? name + end + alias_method :include?, :header? + + # + # Returns +true+ if +data+ matches a field in this row, and +false+ + # otherwise. + # + def field?(data) + fields.include? data + end + + include Enumerable + + # + # Yields each pair of the row as header and field tuples (much like + # iterating over a Hash). + # + # Support for Enumerable. + # + # This method returns the row for chaining. + # + def each(&block) + @row.each(&block) + + self # for chaining + end + + # + # Returns +true+ if this row contains the same headers and fields in the + # same order as +other+. + # + def ==(other) + @row == other.row + end + + # + # Collapses the row into a simple Hash. Be warning that this discards field + # order and clobbers duplicate fields. + # + def to_hash + # flatten just one level of the internal Array + Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }] + end + + # + # Returns the row as a CSV String. Headers are not used. Equivalent to: + # + # csv_row.fields.to_csv( options ) + # + def to_csv(options = Hash.new) + fields.to_csv(options) + end + alias_method :to_s, :to_csv + + # A summary of fields, by header, in an ASCII compatible String. + def inspect + str = ["#<", self.class.to_s] + each do |header, field| + str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) << + ":" << field.inspect + end + str << ">" + begin + str.join + rescue # any encoding error + str.map do |s| + e = Encoding::Converter.asciicompat_encoding(s.encoding) + e ? s.encode(e) : s.force_encoding("ASCII-8BIT") + end.join + end + end + end + + # + # A CSV::Table is a two-dimensional data structure for representing CSV + # documents. Tables allow you to work with the data by row or column, + # manipulate the data, and even convert the results back to CSV, if needed. + # + # All tables returned by CSV will be constructed from this class, if header + # row processing is activated. + # + class Table + # + # Construct a new CSV::Table from +array_of_rows+, which are expected + # to be CSV::Row objects. All rows are assumed to have the same headers. + # + # A CSV::Table object supports the following Array methods through + # delegation: + # + # * empty?() + # * length() + # * size() + # + def initialize(array_of_rows) + @table = array_of_rows + @mode = :col_or_row + end + + # The current access mode for indexing and iteration. + attr_reader :mode + + # Internal data format used to compare equality. + attr_reader :table + protected :table + + ### Array Delegation ### + + extend Forwardable + def_delegators :@table, :empty?, :length, :size + + # + # Returns a duplicate table object, in column mode. This is handy for + # chaining in a single call without changing the table mode, but be aware + # that this method can consume a fair amount of memory for bigger data sets. + # + # This method returns the duplicate table for chaining. Don't chain + # destructive methods (like []=()) this way though, since you are working + # with a duplicate. + # + def by_col + self.class.new(@table.dup).by_col! + end + + # + # Switches the mode of this table to column mode. All calls to indexing and + # iteration methods will work with columns until the mode is changed again. + # + # This method returns the table and is safe to chain. + # + def by_col! + @mode = :col + + self + end + + # + # Returns a duplicate table object, in mixed mode. This is handy for + # chaining in a single call without changing the table mode, but be aware + # that this method can consume a fair amount of memory for bigger data sets. + # + # This method returns the duplicate table for chaining. Don't chain + # destructive methods (like []=()) this way though, since you are working + # with a duplicate. + # + def by_col_or_row + self.class.new(@table.dup).by_col_or_row! + end + + # + # Switches the mode of this table to mixed mode. All calls to indexing and + # iteration methods will use the default intelligent indexing system until + # the mode is changed again. In mixed mode an index is assumed to be a row + # reference while anything else is assumed to be column access by headers. + # + # This method returns the table and is safe to chain. + # + def by_col_or_row! + @mode = :col_or_row + + self + end + + # + # Returns a duplicate table object, in row mode. This is handy for chaining + # in a single call without changing the table mode, but be aware that this + # method can consume a fair amount of memory for bigger data sets. + # + # This method returns the duplicate table for chaining. Don't chain + # destructive methods (like []=()) this way though, since you are working + # with a duplicate. + # + def by_row + self.class.new(@table.dup).by_row! + end + + # + # Switches the mode of this table to row mode. All calls to indexing and + # iteration methods will work with rows until the mode is changed again. + # + # This method returns the table and is safe to chain. + # + def by_row! + @mode = :row + + self + end + + # + # Returns the headers for the first row of this table (assumed to match all + # other rows). An empty Array is returned for empty tables. + # + def headers + if @table.empty? + Array.new + else + @table.first.headers + end + end + + # + # In the default mixed mode, this method returns rows for index access and + # columns for header access. You can force the index association by first + # calling by_col!() or by_row!(). + # + # Columns are returned as an Array of values. Altering that Array has no + # effect on the table. + # + def [](index_or_header) + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + @table[index_or_header] + else # by header + @table.map { |row| row[index_or_header] } + end + end + + # + # In the default mixed mode, this method assigns rows for index access and + # columns for header access. You can force the index association by first + # calling by_col!() or by_row!(). + # + # Rows may be set to an Array of values (which will inherit the table's + # headers()) or a CSV::Row. + # + # Columns may be set to a single value, which is copied to each row of the + # column, or an Array of values. Arrays of values are assigned to rows top + # to bottom in row major order. Excess values are ignored and if the Array + # does not have a value for each row the extra rows will receive a +nil+. + # + # Assigning to an existing column or row clobbers the data. Assigning to + # new columns creates them at the right end of the table. + # + def []=(index_or_header, value) + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + if value.is_a? Array + @table[index_or_header] = Row.new(headers, value) + else + @table[index_or_header] = value + end + else # set column + if value.is_a? Array # multiple values + @table.each_with_index do |row, i| + if row.header_row? + row[index_or_header] = index_or_header + else + row[index_or_header] = value[i] + end + end + else # repeated value + @table.each do |row| + if row.header_row? + row[index_or_header] = index_or_header + else + row[index_or_header] = value + end + end + end + end + end + + # + # The mixed mode default is to treat a list of indices as row access, + # returning the rows indicated. Anything else is considered columnar + # access. For columnar access, the return set has an Array for each row + # with the values indicated by the headers in each Array. You can force + # column or row mode using by_col!() or by_row!(). + # + # You cannot mix column and row access. + # + def values_at(*indices_or_headers) + if @mode == :row or # by indices + ( @mode == :col_or_row and indices_or_headers.all? do |index| + index.is_a?(Integer) or + ( index.is_a?(Range) and + index.first.is_a?(Integer) and + index.last.is_a?(Integer) ) + end ) + @table.values_at(*indices_or_headers) + else # by headers + @table.map { |row| row.values_at(*indices_or_headers) } + end + end + + # + # Adds a new row to the bottom end of this table. You can provide an Array, + # which will be converted to a CSV::Row (inheriting the table's headers()), + # or a CSV::Row. + # + # This method returns the table for chaining. + # + def <<(row_or_array) + if row_or_array.is_a? Array # append Array + @table << Row.new(headers, row_or_array) + else # append Row + @table << row_or_array + end + + self # for chaining + end + + # + # A shortcut for appending multiple rows. Equivalent to: + # + # rows.each { |row| self << row } + # + # This method returns the table for chaining. + # + def push(*rows) + rows.each { |row| self << row } + + self # for chaining + end + + # + # Removes and returns the indicated column or row. In the default mixed + # mode indices refer to rows and everything else is assumed to be a column + # header. Use by_col!() or by_row!() to force the lookup. + # + def delete(index_or_header) + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + @table.delete_at(index_or_header) + else # by header + @table.map { |row| row.delete(index_or_header).last } + end + end + + # + # Removes any column or row for which the block returns +true+. In the + # default mixed mode or row mode, iteration is the standard row major + # walking of rows. In column mode, interation will +yield+ two element + # tuples containing the column name and an Array of values for that column. + # + # This method returns the table for chaining. + # + def delete_if(&block) + if @mode == :row or @mode == :col_or_row # by index + @table.delete_if(&block) + else # by header + to_delete = Array.new + headers.each_with_index do |header, i| + to_delete << header if block[[header, self[header]]] + end + to_delete.map { |header| delete(header) } + end + + self # for chaining + end + + include Enumerable + + # + # In the default mixed mode or row mode, iteration is the standard row major + # walking of rows. In column mode, interation will +yield+ two element + # tuples containing the column name and an Array of values for that column. + # + # This method returns the table for chaining. + # + def each(&block) + if @mode == :col + headers.each { |header| block[[header, self[header]]] } + else + @table.each(&block) + end + + self # for chaining + end + + # Returns +true+ if all rows of this table ==() +other+'s rows. + def ==(other) + @table == other.table + end + + # + # Returns the table as an Array of Arrays. Headers will be the first row, + # then all of the field rows will follow. + # + def to_a + @table.inject([headers]) do |array, row| + if row.header_row? + array + else + array + [row.fields] + end + end + end + + # + # Returns the table as a complete CSV String. Headers will be listed first, + # then all of the field rows. + # + def to_csv(options = Hash.new) + @table.inject([headers.to_csv(options)]) do |rows, row| + if row.header_row? + rows + else + rows + [row.fields.to_csv(options)] + end + end.join + end + alias_method :to_s, :to_csv + + # Shows the mode and size of this table in a US-ASCII String. + def inspect + "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") + end + end + + # The error thrown when the parser encounters illegal CSV formatting. + class MalformedCSVError < RuntimeError; end + + # + # A FieldInfo Struct contains details about a field's position in the data + # source it was read from. CSV will pass this Struct to some blocks that make + # decisions based on field structure. See CSV.convert_fields() for an + # example. + # + # index:: The zero-based index of the field in its row. + # line:: The line of the data source this row is from. + # header:: The header for the column, when available. + # + FieldInfo = Struct.new(:index, :line, :header) + + # A Regexp used to find and convert some common Date formats. + DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | + \d{4}-\d{2}-\d{2} )\z /x + # A Regexp used to find and convert some common DateTime formats. + DateTimeMatcher = + / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | + \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x + + # The encoding used by all converters. + ConverterEncoding = Encoding.find("UTF-8") + + # + # This Hash holds the built-in converters of CSV that can be accessed by name. + # You can select Converters with CSV.convert() or through the +options+ Hash + # passed to CSV::new(). + # + # :integer:: Converts any field Integer() accepts. + # :float:: Converts any field Float() accepts. + # :numeric:: A combination of :integer + # and :float. + # :date:: Converts any field Date::parse() accepts. + # :date_time:: Converts any field DateTime::parse() accepts. + # :all:: All built-in converters. A combination of + # :date_time and :numeric. + # + # All built-in converters transcode field data to UTF-8 before attempting a + # conversion. If your data cannot be transcoded to UTF-8 the conversion will + # fail and the field will remain unchanged. + # + # This Hash is intentionally left unfrozen and users should feel free to add + # values to it that can be accessed by all CSV objects. + # + # To add a combo field, the value should be an Array of names. Combo fields + # can be nested with other combo fields. + # + Converters = { integer: lambda { |f| + Integer(f.encode(ConverterEncoding)) rescue f + }, + float: lambda { |f| + Float(f.encode(ConverterEncoding)) rescue f + }, + numeric: [:integer, :float], + date: lambda { |f| + begin + e = f.encode(ConverterEncoding) + e =~ DateMatcher ? Date.parse(e) : f + rescue # encoding conversion or date parse errors + f + end + }, + date_time: lambda { |f| + begin + e = f.encode(ConverterEncoding) + e =~ DateTimeMatcher ? DateTime.parse(e) : f + rescue # encoding conversion or date parse errors + f + end + }, + all: [:date_time, :numeric] } + + # + # This Hash holds the built-in header converters of CSV that can be accessed + # by name. You can select HeaderConverters with CSV.header_convert() or + # through the +options+ Hash passed to CSV::new(). + # + # :downcase:: Calls downcase() on the header String. + # :symbol:: The header String is downcased, spaces are + # replaced with underscores, non-word characters + # are dropped, and finally to_sym() is called. + # + # All built-in header converters transcode header data to UTF-8 before + # attempting a conversion. If your data cannot be transcoded to UTF-8 the + # conversion will fail and the header will remain unchanged. + # + # This Hash is intetionally left unfrozen and users should feel free to add + # values to it that can be accessed by all CSV objects. + # + # To add a combo field, the value should be an Array of names. Combo fields + # can be nested with other combo fields. + # + HeaderConverters = { + downcase: lambda { |h| h.encode(ConverterEncoding).downcase }, + symbol: lambda { |h| + h.encode(ConverterEncoding).downcase.gsub(/\s+/, "_"). + gsub(/\W+/, "").to_sym + } + } + + # + # The options used when no overrides are given by calling code. They are: + # + # :col_sep:: "," + # :row_sep:: :auto + # :quote_char:: '"' + # :field_size_limit:: +nil+ + # :converters:: +nil+ + # :unconverted_fields:: +nil+ + # :headers:: +false+ + # :return_headers:: +false+ + # :header_converters:: +nil+ + # :skip_blanks:: +false+ + # :force_quotes:: +false+ + # + DEFAULT_OPTIONS = { col_sep: ",", + row_sep: :auto, + quote_char: '"', + field_size_limit: nil, + converters: nil, + unconverted_fields: nil, + headers: false, + return_headers: false, + header_converters: nil, + skip_blanks: false, + force_quotes: false }.freeze + + # + # This method will return a CSV instance, just like CSV::new(), but the + # instance will be cached and returned for all future calls to this method for + # the same +data+ object (tested by Object#object_id()) with the same + # +options+. + # + # If a block is given, the instance is passed to the block and the return + # value becomes the return value of the block. + # + def self.instance(data = $stdout, options = Hash.new) + # create a _signature_ for this method call, data object and options + sig = [data.object_id] + + options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s }) + + # fetch or create the instance for this signature + @@instances ||= Hash.new + instance = (@@instances[sig] ||= new(data, options)) + + if block_given? + yield instance # run block, if given, returning result + else + instance # or return the instance + end + end + + # + # This method allows you to serialize an Array of Ruby objects to a String or + # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps + # useful for spreadsheet and database interaction. + # + # Out of the box, this method is intended to work with simple data objects or + # Structs. It will serialize a list of instance variables and/or + # Struct.members(). + # + # If you need need more complicated serialization, you can control the process + # by adding methods to the class to be serialized. + # + # A class method csv_meta() is responsible for returning the first row of the + # document (as an Array). This row is considered to be a Hash of the form + # key_1,value_1,key_2,value_2,... CSV::load() expects to find a class key + # with a value of the stringified class name and CSV::dump() will create this, + # if you do not define this method. This method is only called on the first + # object of the Array. + # + # The next method you can provide is an instance method called csv_headers(). + # This method is expected to return the second line of the document (again as + # an Array), which is to be used to give each column a header. By default, + # CSV::load() will set an instance variable if the field header starts with an + # @ character or call send() passing the header as the method name and + # the field value as an argument. This method is only called on the first + # object of the Array. + # + # Finally, you can provide an instance method called csv_dump(), which will + # be passed the headers. This should return an Array of fields that can be + # serialized for this object. This method is called once for every object in + # the Array. + # + # The +io+ parameter can be used to serialize to a File, and +options+ can be + # anything CSV::new() accepts. + # + def self.dump(ary_of_objs, io = "", options = Hash.new) + obj_template = ary_of_objs.first + + csv = new(io, options) + + # write meta information + begin + csv << obj_template.class.csv_meta + rescue NoMethodError + csv << [:class, obj_template.class] + end + + # write headers + begin + headers = obj_template.csv_headers + rescue NoMethodError + headers = obj_template.instance_variables.sort + if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ } + headers += obj_template.members.map { |mem| "#{mem}=" }.sort + end + end + csv << headers + + # serialize each object + ary_of_objs.each do |obj| + begin + csv << obj.csv_dump(headers) + rescue NoMethodError + csv << headers.map do |var| + if var[0] == ?@ + obj.instance_variable_get(var) + else + obj[var[0..-2]] + end + end + end + end + + if io.is_a? String + csv.string + else + csv.close + end + end + + # + # This method is the reading counterpart to CSV::dump(). See that method for + # a detailed description of the process. + # + # You can customize loading by adding a class method called csv_load() which + # will be passed a Hash of meta information, an Array of headers, and an Array + # of fields for the object the method is expected to return. + # + # Remember that all fields will be Strings after this load. If you need + # something else, use +options+ to setup converters or provide a custom + # csv_load() implementation. + # + def self.load(io_or_str, options = Hash.new) + csv = new(io_or_str, options) + + # load meta information + meta = Hash[*csv.shift] + cls = meta["class".encode(csv.encoding)].split("::".encode(csv.encoding)). + inject(Object) do |c, const| + c.const_get(const) + end + + # load headers + headers = csv.shift + + # unserialize each object stored in the file + results = csv.inject(Array.new) do |all, row| + begin + obj = cls.csv_load(meta, headers, row) + rescue NoMethodError + obj = cls.allocate + headers.zip(row) do |name, value| + if name[0] == ?@ + obj.instance_variable_set(name, value) + else + obj.send(name, value) + end + end + end + all << obj + end + + csv.close unless io_or_str.is_a? String + + results + end + + # + # :call-seq: + # filter( options = Hash.new ) { |row| ... } + # filter( input, options = Hash.new ) { |row| ... } + # filter( input, output, options = Hash.new ) { |row| ... } + # + # This method is a convenience for building Unix-like filters for CSV data. + # Each row is yielded to the provided block which can alter it as needed. + # After the block returns, the row is appended to +output+ altered or not. + # + # The +input+ and +output+ arguments can be anything CSV::new() accepts + # (generally String or IO objects). If not given, they default to + # ARGF and $stdout. + # + # The +options+ parameter is also filtered down to CSV::new() after some + # clever key parsing. Any key beginning with :in_ or + # :input_ will have that leading identifier stripped and will only + # be used in the +options+ Hash for the +input+ object. Keys starting with + # :out_ or :output_ affect only +output+. All other keys + # are assigned to both objects. + # + # The :output_row_sep +option+ defaults to + # $INPUT_RECORD_SEPARATOR ($/). + # + def self.filter(*args) + # parse options for input, output, or both + in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR} + if args.last.is_a? Hash + args.pop.each do |key, value| + case key.to_s + when /\Ain(?:put)?_(.+)\Z/ + in_options[$1.to_sym] = value + when /\Aout(?:put)?_(.+)\Z/ + out_options[$1.to_sym] = value + else + in_options[key] = value + out_options[key] = value + end + end + end + # build input and output wrappers + input = new(args.shift || ARGF, in_options) + output = new(args.shift || $stdout, out_options) + + # read, yield, write + input.each do |row| + yield row + output << row + end + end + + # + # This method is intended as the primary interface for reading CSV files. You + # pass a +path+ and any +options+ you wish to set for the read. Each row of + # file will be passed to the provided +block+ in turn. + # + # The +options+ parameter can be anything CSV::new() understands. This method + # also understands an additional :encoding parameter that you can use + # to specify the Encoding of the data in the file to be read. You must provide + # this unless your data is in Encoding::default_external(). CSV will use this + # to deterime how to parse the data. You may provide a second Encoding to + # have the data transcoded as it is read. For example, + # encoding: "UTF-32BE:UTF-8" would read UTF-32BE data from the file + # but transcode it to UTF-8 before CSV parses it. + # + def self.foreach(path, options = Hash.new, &block) + encoding = options.delete(:encoding) + mode = "rb" + mode << ":#{encoding}" if encoding + open(path, mode, options) do |csv| + csv.each(&block) + end + end + + # + # :call-seq: + # generate( str, options = Hash.new ) { |csv| ... } + # generate( options = Hash.new ) { |csv| ... } + # + # This method wraps a String you provide, or an empty default String, in a + # CSV object which is passed to the provided block. You can use the block to + # append CSV rows to the String and when the block exits, the final String + # will be returned. + # + # Note that a passed String *is* modfied by this method. Call dup() before + # passing if you need a new String. + # + # The +options+ parameter can be anthing CSV::new() understands. This method + # understands an additional :encoding parameter when not passed a + # String to set the base Encoding for the output. CSV needs this hint if you + # plan to output non-ASCII compatible data. + # + def self.generate(*args) + # add a default empty String, if none was given + if args.first.is_a? String + io = StringIO.new(args.shift) + io.seek(0, IO::SEEK_END) + args.unshift(io) + else + encoding = args.last.is_a?(Hash) ? args.last.delete(:encoding) : nil + str = "" + str.encode!(encoding) if encoding + args.unshift(str) + end + csv = new(*args) # wrap + yield csv # yield for appending + csv.string # return final String + end + + # + # This method is a shortcut for converting a single row (Array) into a CSV + # String. + # + # The +options+ parameter can be anthing CSV::new() understands. This method + # understands an additional :encoding parameter to set the base + # Encoding for the output. This method will try to guess your Encoding from + # the first non-+nil+ field in +row+, if possible, but you may need to use + # this parameter as a backup plan. + # + # The :row_sep +option+ defaults to $INPUT_RECORD_SEPARATOR + # ($/) when calling this method. + # + def self.generate_line(row, options = Hash.new) + options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options) + encoding = options.delete(:encoding) + str = "" + if encoding + str.force_encoding(encoding) + elsif field = row.find { |f| not f.nil? } + str.force_encoding(String(field).encoding) + end + (new(str, options) << row).string + end + + # + # :call-seq: + # open( filename, mode = "rb", options = Hash.new ) { |faster_csv| ... } + # open( filename, options = Hash.new ) { |faster_csv| ... } + # open( filename, mode = "rb", options = Hash.new ) + # open( filename, options = Hash.new ) + # + # This method opens an IO object, and wraps that with CSV. This is intended + # as the primary interface for writing a CSV file. + # + # You must pass a +filename+ and may optionally add a +mode+ for Ruby's + # open(). You may also pass an optional Hash containing any +options+ + # CSV::new() understands as the final argument. + # + # This method works like Ruby's open() call, in that it will pass a CSV object + # to a provided block and close it when the block terminates, or it will + # return the CSV object when no block is provided. (*Note*: This is different + # from the Ruby 1.8 CSV library which passed rows to the block. Use + # CSV::foreach() for that behavior.) + # + # You must provide a +mode+ with an embedded Encoding designator unless your + # data is in Encoding::default_external(). CSV will check the Encoding of the + # underlying IO object (set by the +mode+ you pass) to deterime how to parse + # the data. You may provide a second Encoding to have the data transcoded as + # it is read just as you can with a normal call to IO::open(). For example, + # "rb:UTF-32BE:UTF-8" would read UTF-32BE data from the file but + # transcode it to UTF-8 before CSV parses it. + # + # An opened CSV object will delegate to many IO methods for convenience. You + # may call: + # + # * binmode() + # * binmode?() + # * close() + # * close_read() + # * close_write() + # * closed?() + # * eof() + # * eof?() + # * external_encoding() + # * fcntl() + # * fileno() + # * flock() + # * flush() + # * fsync() + # * internal_encoding() + # * ioctl() + # * isatty() + # * path() + # * pid() + # * pos() + # * pos=() + # * reopen() + # * seek() + # * stat() + # * sync() + # * sync=() + # * tell() + # * to_i() + # * to_io() + # * truncate() + # * tty?() + # + def self.open(*args) + # find the +options+ Hash + options = if args.last.is_a? Hash then args.pop else Hash.new end + # default to a binary open mode + args << "rb" if args.size == 1 + # wrap a File opened with the remaining +args+ + csv = new(File.open(*args), options) + + # handle blocks like Ruby's open(), not like the CSV library + if block_given? + begin + yield csv + ensure + csv.close + end + else + csv + end + end + + # + # :call-seq: + # parse( str, options = Hash.new ) { |row| ... } + # parse( str, options = Hash.new ) + # + # This method can be used to easily parse CSV out of a String. You may either + # provide a +block+ which will be called with each row of the String in turn, + # or just use the returned Array of Arrays (when no +block+ is given). + # + # You pass your +str+ to read from, and an optional +options+ Hash containing + # anything CSV::new() understands. + # + def self.parse(*args, &block) + csv = new(*args) + if block.nil? # slurp contents, if no block is given + begin + csv.read + ensure + csv.close + end + else # or pass each row to a provided block + csv.each(&block) + end + end + + # + # This method is a shortcut for converting a single line of a CSV String into + # a into an Array. Note that if +line+ contains multiple rows, anything + # beyond the first row is ignored. + # + # The +options+ parameter can be anthing CSV::new() understands. + # + def self.parse_line(line, options = Hash.new) + new(line, options).shift + end + + # + # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the + # file and any +options+ CSV::new() understands. This method also understands + # an additional :encoding parameter that you can use to specify the + # Encoding of the data in the file to be read. You must provide this unless + # your data is in Encoding::default_external(). CSV will use this to deterime + # how to parse the data. You may provide a second Encoding to have the data + # transcoded as it is read. For example, + # encoding: "UTF-32BE:UTF-8" would read UTF-32BE data from the file + # but transcode it to UTF-8 before CSV parses it. + # + def self.read(path, options = Hash.new) + encoding = options.delete(:encoding) + mode = "rb" + mode << ":#{encoding}" if encoding + open(path, mode, options) { |csv| csv.read } + end + + # Alias for CSV::read(). + def self.readlines(*args) + read(*args) + end + + # + # A shortcut for: + # + # CSV.read( path, { headers: true, + # converters: :numeric, + # header_converters: :symbol }.merge(options) ) + # + def self.table(path, options = Hash.new) + read( path, { headers: true, + converters: :numeric, + header_converters: :symbol }.merge(options) ) + end + + # + # This constructor will wrap either a String or IO object passed in +data+ for + # reading and/or writing. In addition to the CSV instance methods, several IO + # methods are delegated. (See CSV::open() for a complete list.) If you pass + # a String for +data+, you can later retrieve it (after writing to it, for + # example) with CSV.string(). + # + # Note that a wrapped String will be positioned at at the beginning (for + # reading). If you want it at the end (for writing), use CSV::generate(). + # If you want any other positioning, pass a preset StringIO object instead. + # + # You may set any reading and/or writing preferences in the +options+ Hash. + # Available options are: + # + # :col_sep:: The String placed between each field. + # This String will be transcoded into + # the data's Encoding before parsing. + # :row_sep:: The String appended to the end of each + # row. This can be set to the special + # :auto setting, which requests + # that CSV automatically discover this + # from the data. Auto-discovery reads + # ahead in the data looking for the next + # "\r\n", "\n", or + # "\r" sequence. A sequence + # will be selected even if it occurs in + # a quoted field, assuming that you + # would have the same line endings + # there. If none of those sequences is + # found, +data+ is ARGF, + # STDIN, STDOUT, or + # STDERR, or the stream is only + # available for output, the default + # $INPUT_RECORD_SEPARATOR + # ($/) is used. Obviously, + # discovery takes a little time. Set + # manually if speed is important. Also + # note that IO objects should be opened + # in binary mode on Windows if this + # feature will be used as the + # line-ending translation can cause + # problems with resetting the document + # position to where it was before the + # read ahead. This String will be + # transcoded into the data's Encoding + # before parsing. + # :quote_char:: The character used to quote fields. + # This has to be a single character + # String. This is useful for + # application that incorrectly use + # ' as the quote character + # instead of the correct ". + # CSV will always consider a double + # sequence this character to be an + # escaped quote. This String will be + # transcoded into the data's Encoding + # before parsing. + # :field_size_limit:: This is a maximum size CSV will read + # ahead looking for the closing quote + # for a field. (In truth, it reads to + # the first line ending beyond this + # size.) If a quote cannot be found + # within the limit CSV will raise a + # MalformedCSVError, assuming the data + # is faulty. You can use this limit to + # prevent what are effectively DoS + # attacks on the parser. However, this + # limit can cause a legitimate parse to + # fail and thus is set to +nil+, or off, + # by default. + # :converters:: An Array of names from the Converters + # Hash and/or lambdas that handle custom + # conversion. A single converter + # doesn't have to be in an Array. All + # built-in converters try to transcode + # fields to UTF-8 before converting. + # The conversion will fail if the data + # cannot be transcoded, leaving the + # field unchanged. + # :unconverted_fields:: If set to +true+, an + # unconverted_fields() method will be + # added to all returned rows (Array or + # CSV::Row) that will return the fields + # as they were before conversion. Note + # that :headers supplied by + # Array or String were not fields of the + # document and thus will have an empty + # Array attached. + # :headers:: If set to :first_row or + # +true+, the initial row of the CSV + # file will be treated as a row of + # headers. If set to an Array, the + # contents will be used as the headers. + # If set to a String, the String is run + # through a call of CSV::parse_line() + # with the same :col_sep, + # :row_sep, and + # :quote_char as this instance + # to produce an Array of headers. This + # setting causes CSV#shift() to return + # rows as CSV::Row objects instead of + # Arrays and CSV#read() to return + # CSV::Table objects instead of an Array + # of Arrays. + # :return_headers:: When +false+, header rows are silently + # swallowed. If set to +true+, header + # rows are returned in a CSV::Row object + # with identical headers and + # fields (save that the fields do not go + # through the converters). + # :write_headers:: When +true+ and :headers is + # set, a header row will be added to the + # output. + # :header_converters:: Identical in functionality to + # :converters save that the + # conversions are only made to header + # rows. All built-in converters try to + # transcode headers to UTF-8 before + # converting. The conversion will fail + # if the data cannot be transcoded, + # leaving the header unchanged. + # :skip_blanks:: When set to a +true+ value, CSV will + # skip over any rows with no content. + # :force_quotes:: When set to a +true+ value, CSV will + # quote all CSV fields it creates. + # + # See CSV::DEFAULT_OPTIONS for the default settings. + # + # Options cannot be overriden in the instance methods for performance reasons, + # so be sure to set what you want here. + # + def initialize(data, options = Hash.new) + # build the options for this read/write + options = DEFAULT_OPTIONS.merge(options) + + # create the IO object we will read from + @io = if data.is_a? String then StringIO.new(data) else data end + # honor the IO encoding if we can, otherwise default to ASCII-8BIT + @encoding = raw_encoding || Encoding.default_internal || Encoding.default_external + # + # prepare for building safe regular expressions in the target encoding, + # if we can transcode the needed characters + # + @re_esc = "\\".encode(@encoding) rescue "" + @re_chars = %w[ \\ . [ ] - ^ $ ? + * + { } ( ) | # + \ \r \n \t \f \v ]. + map { |s| s.encode(@encoding) rescue nil }.compact + + init_separators(options) + init_parsers(options) + init_converters(options) + init_headers(options) + + unless options.empty? + raise ArgumentError, "Unknown options: #{options.keys.join(', ')}." + end + + # track our own lineno since IO gets confused about line-ends is CSV fields + @lineno = 0 + end + + # + # The encoded :col_sep used in parsing and writing. See CSV::new + # for details. + # + attr_reader :col_sep + # + # The encoded :row_sep used in parsing and writing. See CSV::new + # for details. + # + attr_reader :row_sep + # + # The encoded :quote_char used in parsing and writing. See CSV::new + # for details. + # + attr_reader :quote_char + # The limit for field size, if any. See CSV::new for details. + attr_reader :field_size_limit + # + # Returns the current list of converters in effect. See CSV::new for details. + # Built-in converters will be returned by name, while others will be returned + # as is. + # + def converters + @converters.map do |converter| + name = Converters.rassoc(converter) + name ? name.first : converter + end + end + # + # Returns +true+ if unconverted_fields() to parsed results. See CSV::new + # for details. + # + def unconverted_fields?() @unconverted_fields end + # + # Returns +nil+ if headers will not be used, +true+ if they will but have not + # yet been read, or the actual headers after they have been read. See + # CSV::new for details. + # + def headers + @headers || true if @use_headers + end + # + # Returns +true+ if headers will be returned as a row of results. + # See CSV::new for details. + # + def return_headers?() @return_headers end + # Returns +true+ if headers are written in output. See CSV::new for details. + def write_headers?() @write_headers end + # + # Returns the current list of converters in effect for headers. See CSV::new + # for details. Built-in converters will be returned by name, while others + # will be returned as is. + # + def header_converters + @header_converters.map do |converter| + name = HeaderConverters.rassoc(converter) + name ? name.first : converter + end + end + # + # Returns +true+ blank lines are skipped by the parser. See CSV::new + # for details. + # + def skip_blanks?() @skip_blanks end + # Returns +true+ if all output fields are quoted. See CSV::new for details. + def force_quotes?() @force_quotes end + + # + # The Encoding CSV is parsing or writing in. This will be the Encoding you + # receive parsed data in and/or the Encoding data will be written in. + # + attr_reader :encoding + + # + # The line number of the last row read from this file. Fields with nested + # line-end characters will not affect this count. + # + attr_reader :lineno + + ### IO and StringIO Delegation ### + + extend Forwardable + def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write, + :closed?, :eof, :eof?, :external_encoding, :fcntl, + :fileno, :flock, :flush, :fsync, :internal_encoding, + :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen, + :seek, :stat, :string, :sync, :sync=, :tell, :to_i, + :to_io, :truncate, :tty? + + # Rewinds the underlying IO object and resets CSV's lineno() counter. + def rewind + @headers = nil + @lineno = 0 + + @io.rewind + end + + ### End Delegation ### + + # + # The primary write method for wrapped Strings and IOs, +row+ (an Array or + # CSV::Row) is converted to CSV and appended to the data source. When a + # CSV::Row is passed, only the row's fields() are appended to the output. + # + # The data source must be open for writing. + # + def <<(row) + # make sure headers have been assigned + if header_row? and [Array, String].include? @use_headers.class + parse_headers # won't read data for Array or String + self << @headers if @write_headers + end + + # handle CSV::Row objects and Hashes + row = case row + when self.class::Row then row.fields + when Hash then @headers.map { |header| row[header] } + else row + end + + @headers = row if header_row? + @lineno += 1 + + @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate + + self # for chaining + end + alias_method :add_row, :<< + alias_method :puts, :<< + + # + # :call-seq: + # convert( name ) + # convert { |field| ... } + # convert { |field, field_info| ... } + # + # You can use this method to install a CSV::Converters built-in, or provide a + # block that handles a custom conversion. + # + # If you provide a block that takes one argument, it will be passed the field + # and is expected to return the converted value or the field itself. If your + # block takes two arguments, it will also be passed a CSV::FieldInfo Struct, + # containing details about the field. Again, the block should return a + # converted field or the field itself. + # + def convert(name = nil, &converter) + add_converter(:converters, self.class::Converters, name, &converter) + end + + # + # :call-seq: + # header_convert( name ) + # header_convert { |field| ... } + # header_convert { |field, field_info| ... } + # + # Identical to CSV#convert(), but for header rows. + # + # Note that this method must be called before header rows are read to have any + # effect. + # + def header_convert(name = nil, &converter) + add_converter( :header_converters, + self.class::HeaderConverters, + name, + &converter ) + end + + include Enumerable + + # + # Yields each row of the data source in turn. + # + # Support for Enumerable. + # + # The data source must be open for reading. + # + def each + while row = shift + yield row + end + end + + # + # Slurps the remaining rows and returns an Array of Arrays. + # + # The data source must be open for reading. + # + def read + rows = to_a + if @use_headers + Table.new(rows) + else + rows + end + end + alias_method :readlines, :read + + # Returns +true+ if the next row read will be a header row. + def header_row? + @use_headers and @headers.nil? + end + + # + # The primary read method for wrapped Strings and IOs, a single row is pulled + # from the data source, parsed and returned as an Array of fields (if header + # rows are not used) or a CSV::Row (when header rows are used). + # + # The data source must be open for reading. + # + def shift + ######################################################################### + ### This method is purposefully kept a bit long as simple conditional ### + ### checks are faster than numerous (expensive) method calls. ### + ######################################################################### + + # handle headers not based on document content + if header_row? and @return_headers and + [Array, String].include? @use_headers.class + if @unconverted_fields + return add_unconverted_fields(parse_headers, Array.new) + else + return parse_headers + end + end + + # begin with a blank line, so we can always add to it + line = "" + + # + # it can take multiple calls to @io.gets() to get a full line, + # because of \r and/or \n characters embedded in quoted fields + # + loop do + # add another read to the line + (line += @io.gets(@row_sep)) rescue return nil + # copy the line so we can chop it up in parsing + parse = line.dup + parse.sub!(@parsers[:line_end], "") + + # + # I believe a blank line should be an Array.new, not Ruby 1.8 + # CSV's [nil] + # + if parse.empty? + @lineno += 1 + if @skip_blanks + line = "" + next + elsif @unconverted_fields + return add_unconverted_fields(Array.new, Array.new) + elsif @use_headers + return self.class::Row.new(Array.new, Array.new) + else + return Array.new + end + end + + # + # shave leading empty fields if needed, because the main parser chokes + # on these + # + csv = if parse.sub!(@parsers[:leading_fields], "") + [nil] * ($&.length / @col_sep.length) + else + Array.new + end + # + # then parse the main fields with a hyper-tuned Regexp from + # Mastering Regular Expressions, Second Edition + # + parse.gsub!(@parsers[:csv_row]) do + csv << if $1.nil? # we found an unquoted field + if $2.empty? # switch empty unquoted fields to +nil+... + nil # for Ruby 1.8 CSV compatibility + else + # I decided to take a strict approach to CSV parsing... + if $2.count(@parsers[:return_newline]).zero? # verify correctness + $2 + else + # or throw an Exception + raise MalformedCSVError, "Unquoted fields do not allow " + + "\\r or \\n (line #{lineno + 1})." + end + end + else # we found a quoted field... + $1.gsub(@quote_char * 2, @quote_char) # unescape contents + end + "" # gsub!'s replacement, clear the field + end + + # if parse is empty?(), we found all the fields on the line... + if parse.empty? + @lineno += 1 + + # save fields unconverted fields, if needed... + unconverted = csv.dup if @unconverted_fields + + # convert fields, if needed... + csv = convert_fields(csv) unless @use_headers or @converters.empty? + # parse out header rows and handle CSV::Row conversions... + csv = parse_headers(csv) if @use_headers + + # inject unconverted fields and accessor, if requested... + if @unconverted_fields and not csv.respond_to? :unconverted_fields + add_unconverted_fields(csv, unconverted) + end + + # return the results + break csv + end + # if we're not empty?() but at eof?(), a quoted field wasn't closed... + if @io.eof? + raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}." + elsif parse =~ @parsers[:bad_field] + raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}." + elsif @field_size_limit and parse.length >= @field_size_limit + raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}." + end + # otherwise, we need to loop and pull some more data to complete the row + end + end + alias_method :gets, :shift + alias_method :readline, :shift + + # + # Returns a simplified description of the key FasterCSV attributes in an + # ASCII compatible String. + # + def inspect + str = ["<#", self.class.to_s, " io_type:"] + # show type of wrapped IO + if @io == $stdout then str << "$stdout" + elsif @io == $stdin then str << "$stdin" + elsif @io == $stderr then str << "$stderr" + else str << @io.class.to_s + end + # show IO.path(), if available + if @io.respond_to?(:path) and (p = @io.path) + str << " io_path:" << p.inspect + end + # show encoding + str << " encoding:" << @encoding.name + # show other attributes + %w[ lineno col_sep row_sep + quote_char skip_blanks ].each do |attr_name| + if a = instance_variable_get("@#{attr_name}") + str << " " << attr_name << ":" << a.inspect + end + end + if @use_headers + str << " headers:" << headers.inspect + end + str << ">" + begin + str.join + rescue # any encoding error + str.map do |s| + e = Encoding::Converter.asciicompat_encoding(s.encoding) + e ? s.encode(e) : s.force_encoding("ASCII-8BIT") + end.join + end + end + + private + + # + # Stores the indicated separators for later use. + # + # If auto-discovery was requested for @row_sep, this method will read + # ahead in the @io and try to find one. +ARGF+, +STDIN+, +STDOUT+, + # +STDERR+ and any stream open for output only with a default + # @row_sep of $INPUT_RECORD_SEPARATOR ($/). + # + # This method also establishes the quoting rules used for CSV output. + # + def init_separators(options) + # store the selected separators + @col_sep = options.delete(:col_sep).to_s.encode(@encoding) + @row_sep = options.delete(:row_sep) # encode after resolving :auto + @quote_char = options.delete(:quote_char).to_s.encode(@encoding) + + if @quote_char.length != 1 + raise ArgumentError, ":quote_char has to be a single character String" + end + + # + # automatically discover row separator when requested + # (not fully encoding safe) + # + if @row_sep == :auto + if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or + (defined?(Zlib) and @io.class == Zlib::GzipWriter) + @row_sep = $INPUT_RECORD_SEPARATOR + else + begin + saved_pos = @io.pos # remember where we were + while @row_sep == :auto + # + # if we run out of data, it's probably a single line + # (use a sensible default) + # + if @io.eof? + @row_sep = $INPUT_RECORD_SEPARATOR + break + end + + # read ahead a bit + sample = read_to_char(1024) + sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and + not @io.eof? + # try to find a standard separator + if sample =~ encode_re("\r\n?|\n") + @row_sep = $& + break + end + end + # tricky seek() clone to work around GzipReader's lack of seek() + @io.rewind + # reset back to the remembered position + while saved_pos > 1024 # avoid loading a lot of data into memory + @io.read(1024) + saved_pos -= 1024 + end + @io.read(saved_pos) if saved_pos.nonzero? + rescue IOError # stream not opened for reading + @row_sep = $INPUT_RECORD_SEPARATOR + end + end + end + @row_sep = @row_sep.to_s.encode(@encoding) + + # establish quoting rules + @force_quotes = options.delete(:force_quotes) + do_quote = lambda do |field| + @quote_char + + String(field).gsub(@quote_char, @quote_char * 2) + + @quote_char + end + quotable_chars = encode_str("\r\n", @col_sep, @quote_char) + @quote = if @force_quotes + do_quote + else + lambda do |field| + if field.nil? # represent +nil+ fields as empty unquoted fields + "" + else + field = String(field) # Stringify fields + # represent empty fields as empty quoted fields + if field.empty? or + field.count(quotable_chars).nonzero? + do_quote.call(field) + else + field # unquoted field + end + end + end + end + end + + # Pre-compiles parsers and stores them by name for access during reads. + def init_parsers(options) + # store the parser behaviors + @skip_blanks = options.delete(:skip_blanks) + @field_size_limit = options.delete(:field_size_limit) + + # prebuild Regexps for faster parsing + esc_col_sep = escape_re(@col_sep) + esc_row_sep = escape_re(@row_sep) + esc_quote = escape_re(@quote_char) + @parsers = { + # for empty leading fields + leading_fields: encode_re("\\A(?:", esc_col_sep, ")+"), + # The Primary Parser + csv_row: encode_re( + "\\G(?:\\A|", esc_col_sep, ")", # anchor the match + "(?:", esc_quote, # find quoted fields + "((?>[^", esc_quote, "]*)", # "unrolling the loop" + "(?>", esc_quote * 2, # double for escaping + "[^", esc_quote, "]*)*)", + esc_quote, + "|", # ... or ... + "([^", esc_quote, esc_col_sep, "]*))", # unquoted fields + "(?=", esc_col_sep, "|\\z)" # ensure field is ended + ), + # a test for unescaped quotes + bad_field: encode_re( + "\\A", esc_col_sep, "?", # an optional comma + "(?:", esc_quote, # a quoted field + "(?>[^", esc_quote, "]*)", # "unrolling the loop" + "(?>", esc_quote * 2, # double for escaping + "[^", esc_quote, "]*)*", + esc_quote, # the closing quote + "[^", esc_quote, "]", # an extra character + "|", # ... or ... + "[^", esc_quote, esc_col_sep, "]+", # an unquoted field + esc_quote, ")" # an extra quote + ), + # safer than chomp!() + line_end: encode_re(esc_row_sep, "\\z"), + # illegal unquoted characters + return_newline: encode_str("\r\n") + } + end + + # + # Loads any converters requested during construction. + # + # If +field_name+ is set :converters (the default) field converters + # are set. When +field_name+ is :header_converters header converters + # are added instead. + # + # The :unconverted_fields option is also actived for + # :converters calls, if requested. + # + def init_converters(options, field_name = :converters) + if field_name == :converters + @unconverted_fields = options.delete(:unconverted_fields) + end + + instance_variable_set("@#{field_name}", Array.new) + + # find the correct method to add the converters + convert = method(field_name.to_s.sub(/ers\Z/, "")) + + # load converters + unless options[field_name].nil? + # allow a single converter not wrapped in an Array + unless options[field_name].is_a? Array + options[field_name] = [options[field_name]] + end + # load each converter... + options[field_name].each do |converter| + if converter.is_a? Proc # custom code block + convert.call(&converter) + else # by name + convert.call(converter) + end + end + end + + options.delete(field_name) + end + + # Stores header row settings and loads header converters, if needed. + def init_headers(options) + @use_headers = options.delete(:headers) + @return_headers = options.delete(:return_headers) + @write_headers = options.delete(:write_headers) + + # headers must be delayed until shift(), in case they need a row of content + @headers = nil + + init_converters(options, :header_converters) + end + + # + # The actual work method for adding converters, used by both CSV.convert() and + # CSV.header_convert(). + # + # This method requires the +var_name+ of the instance variable to place the + # converters in, the +const+ Hash to lookup named converters in, and the + # normal parameters of the CSV.convert() and CSV.header_convert() methods. + # + def add_converter(var_name, const, name = nil, &converter) + if name.nil? # custom converter + instance_variable_get("@#{var_name}") << converter + else # named converter + combo = const[name] + case combo + when Array # combo converter + combo.each do |converter_name| + add_converter(var_name, const, converter_name) + end + else # individual named converter + instance_variable_get("@#{var_name}") << combo + end + end + end + + # + # Processes +fields+ with @converters, or @header_converters + # if +headers+ is passed as +true+, returning the converted field set. Any + # converter that changes the field into something other than a String halts + # the pipeline of conversion for that field. This is primarily an efficiency + # shortcut. + # + def convert_fields(fields, headers = false) + # see if we are converting headers or fields + converters = headers ? @header_converters : @converters + + fields.map.with_index do |field, index| + converters.each do |converter| + field = if converter.arity == 1 # straight field converter + converter[field] + else # FieldInfo converter + header = @use_headers && !headers ? @headers[index] : nil + converter[field, FieldInfo.new(index, lineno, header)] + end + break unless field.is_a? String # short-curcuit pipeline for speed + end + field # final state of each field, converted or original + end + end + + # + # This methods is used to turn a finished +row+ into a CSV::Row. Header rows + # are also dealt with here, either by returning a CSV::Row with identical + # headers and fields (save that the fields do not go through the converters) + # or by reading past them to return a field row. Headers are also saved in + # @headers for use in future rows. + # + # When +nil+, +row+ is assumed to be a header row not based on an actual row + # of the stream. + # + def parse_headers(row = nil) + if @headers.nil? # header row + @headers = case @use_headers # save headers + # Array of headers + when Array then @use_headers + # CSV header String + when String + self.class.parse_line( @use_headers, + col_sep: @col_sep, + row_sep: @row_sep, + quote_char: @quote_char ) + # first row is headers + else row + end + + # prepare converted and unconverted copies + row = @headers if row.nil? + @headers = convert_fields(@headers, true) + + if @return_headers # return headers + return self.class::Row.new(@headers, row, true) + elsif not [Array, String].include? @use_headers.class # skip to field row + return shift + end + end + + self.class::Row.new(@headers, convert_fields(row)) # field row + end + + # + # Thiw methods injects an instance variable unconverted_fields into + # +row+ and an accessor method for it called unconverted_fields(). The + # variable is set to the contents of +fields+. + # + def add_unconverted_fields(row, fields) + class << row + attr_reader :unconverted_fields + end + row.instance_eval { @unconverted_fields = fields } + row + end + + # + # This method is an encoding safe version of Regexp::escape(). It will escape + # any characters that would change the meaning of a regular expression in the + # encoding of +str+. Regular expression characters that cannot be transcoded + # to the target encoding will be skipped and no escaping will be performed if + # a backslash cannot be transcoded. + # + def escape_re(str) + str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join + end + + # + # Builds a regular expression in @encoding. All +chunks+ will be + # transcoded to that encoding. + # + def encode_re(*chunks) + Regexp.new(encode_str(*chunks)) + end + + # + # Builds a String in @encoding. All +chunks+ will be transcoded to + # that encoding. + # + def encode_str(*chunks) + chunks.map { |chunk| chunk.encode(@encoding.name) }.join + end + + # + # Reads at least +bytes+ from @io, but will read up 10 bytes ahead if + # needed to ensure the data read is valid in the ecoding of that data. This + # should ensure that it is safe to use regular expressions on the read data, + # unless it is actually a broken encoding. The read data will be returned in + # @encoding. + # + def read_to_char(bytes) + return "" if @io.eof? + data = read_io(bytes) + begin + raise unless data.valid_encoding? + encoded = encode_str(data) + raise unless encoded.valid_encoding? + return encoded + rescue # encoding error or my invalid data raise + if @io.eof? or data.size >= bytes + 10 + return data + else + data += read_io(1) + retry + end + end + end + + private + def raw_encoding + if @io.respond_to? :internal_encoding + @io.internal_encoding || @io.external_encoding + elsif @io.is_a? StringIO + @io.string.encoding + elsif @io.respond_to? :encoding + @io.encoding + else + Encoding::ASCII_8BIT + end + end + + def read_io(bytes) + @io.read(bytes).force_encoding(raw_encoding) + end +end + +# Another name for CSV::instance(). +def CSV(*args, &block) + CSV.instance(*args, &block) +end + +class Array + # Equivalent to CSV::generate_line(self, options). + def to_csv(options = Hash.new) + CSV.generate_line(self, options) + end +end + +class String + # Equivalent to CSV::parse_line(self, options). + def parse_csv(options = Hash.new) + CSV.parse_line(self, options) + end +end diff --git a/lib/date.rb b/lib/date.rb new file mode 100644 index 0000000..2c97925 --- /dev/null +++ b/lib/date.rb @@ -0,0 +1,1834 @@ +# +# date.rb - date and time library +# +# Author: Tadayoshi Funaba 1998-2008 +# +# Documentation: William Webber +# +#-- +# $Id: date.rb,v 2.37 2008-01-17 20:16:31+09 tadf Exp $ +#++ +# +# == Overview +# +# This file provides two classes for working with +# dates and times. +# +# The first class, Date, represents dates. +# It works with years, months, weeks, and days. +# See the Date class documentation for more details. +# +# The second, DateTime, extends Date to include hours, +# minutes, seconds, and fractions of a second. It +# provides basic support for time zones. See the +# DateTime class documentation for more details. +# +# === Ways of calculating the date. +# +# In common usage, the date is reckoned in years since or +# before the Common Era (CE/BCE, also known as AD/BC), then +# as a month and day-of-the-month within the current year. +# This is known as the *Civil* *Date*, and abbreviated +# as +civil+ in the Date class. +# +# Instead of year, month-of-the-year, and day-of-the-month, +# the date can also be reckoned in terms of year and +# day-of-the-year. This is known as the *Ordinal* *Date*, +# and is abbreviated as +ordinal+ in the Date class. (Note +# that referring to this as the Julian date is incorrect.) +# +# The date can also be reckoned in terms of year, week-of-the-year, +# and day-of-the-week. This is known as the *Commercial* +# *Date*, and is abbreviated as +commercial+ in the +# Date class. The commercial week runs Monday (day-of-the-week +# 1) to Sunday (day-of-the-week 7), in contrast to the civil +# week which runs Sunday (day-of-the-week 0) to Saturday +# (day-of-the-week 6). The first week of the commercial year +# starts on the Monday on or before January 1, and the commercial +# year itself starts on this Monday, not January 1. +# +# For scientific purposes, it is convenient to refer to a date +# simply as a day count, counting from an arbitrary initial +# day. The date first chosen for this was January 1, 4713 BCE. +# A count of days from this date is the *Julian* *Day* *Number* +# or *Julian* *Date*, which is abbreviated as +jd+ in the +# Date class. This is in local time, and counts from midnight +# on the initial day. The stricter usage is in UTC, and counts +# from midday on the initial day. This is referred to in the +# Date class as the *Astronomical* *Julian* *Day* *Number*, and +# abbreviated as +ajd+. In the Date class, the Astronomical +# Julian Day Number includes fractional days. +# +# Another absolute day count is the *Modified* *Julian* *Day* +# *Number*, which takes November 17, 1858 as its initial day. +# This is abbreviated as +mjd+ in the Date class. There +# is also an *Astronomical* *Modified* *Julian* *Day* *Number*, +# which is in UTC and includes fractional days. This is +# abbreviated as +amjd+ in the Date class. Like the Modified +# Julian Day Number (and unlike the Astronomical Julian +# Day Number), it counts from midnight. +# +# Alternative calendars such as the Chinese Lunar Calendar, +# the Islamic Calendar, or the French Revolutionary Calendar +# are not supported by the Date class; nor are calendars that +# are based on an Era different from the Common Era, such as +# the Japanese Imperial Calendar or the Republic of China +# Calendar. +# +# === Calendar Reform +# +# The standard civil year is 365 days long. However, the +# solar year is fractionally longer than this. To account +# for this, a *leap* *year* is occasionally inserted. This +# is a year with 366 days, the extra day falling on February 29. +# In the early days of the civil calendar, every fourth +# year without exception was a leap year. This way of +# reckoning leap years is the *Julian* *Calendar*. +# +# However, the solar year is marginally shorter than 365 1/4 +# days, and so the *Julian* *Calendar* gradually ran slow +# over the centuries. To correct this, every 100th year +# (but not every 400th year) was excluded as a leap year. +# This way of reckoning leap years, which we use today, is +# the *Gregorian* *Calendar*. +# +# The Gregorian Calendar was introduced at different times +# in different regions. The day on which it was introduced +# for a particular region is the *Day* *of* *Calendar* +# *Reform* for that region. This is abbreviated as +sg+ +# (for Start of Gregorian calendar) in the Date class. +# +# Two such days are of particular +# significance. The first is October 15, 1582, which was +# the Day of Calendar Reform for Italy and most Catholic +# countries. The second is September 14, 1752, which was +# the Day of Calendar Reform for England and its colonies +# (including what is now the United States). These two +# dates are available as the constants Date::ITALY and +# Date::ENGLAND, respectively. (By comparison, Germany and +# Holland, less Catholic than Italy but less stubborn than +# England, changed over in 1698; Sweden in 1753; Russia not +# till 1918, after the Revolution; and Greece in 1923. Many +# Orthodox churches still use the Julian Calendar. A complete +# list of Days of Calendar Reform can be found at +# http://www.polysyllabic.com/GregConv.html.) +# +# Switching from the Julian to the Gregorian calendar +# involved skipping a number of days to make up for the +# accumulated lag, and the later the switch was (or is) +# done, the more days need to be skipped. So in 1582 in Italy, +# 4th October was followed by 15th October, skipping 10 days; in 1752 +# in England, 2nd September was followed by 14th September, skipping +# 11 days; and if I decided to switch from Julian to Gregorian +# Calendar this midnight, I would go from 27th July 2003 (Julian) +# today to 10th August 2003 (Gregorian) tomorrow, skipping +# 13 days. The Date class is aware of this gap, and a supposed +# date that would fall in the middle of it is regarded as invalid. +# +# The Day of Calendar Reform is relevant to all date representations +# involving years. It is not relevant to the Julian Day Numbers, +# except for converting between them and year-based representations. +# +# In the Date and DateTime classes, the Day of Calendar Reform or +# +sg+ can be specified a number of ways. First, it can be as +# the Julian Day Number of the Day of Calendar Reform. Second, +# it can be using the constants Date::ITALY or Date::ENGLAND; these +# are in fact the Julian Day Numbers of the Day of Calendar Reform +# of the respective regions. Third, it can be as the constant +# Date::JULIAN, which means to always use the Julian Calendar. +# Finally, it can be as the constant Date::GREGORIAN, which means +# to always use the Gregorian Calendar. +# +# Note: in the Julian Calendar, New Years Day was March 25. The +# Date class does not follow this convention. +# +# === Time Zones +# +# DateTime objects support a simple representation +# of time zones. Time zones are represented as an offset +# from UTC, as a fraction of a day. This offset is the +# how much local time is later (or earlier) than UTC. +# UTC offset 0 is centred on England (also known as GMT). +# As you travel east, the offset increases until you +# reach the dateline in the middle of the Pacific Ocean; +# as you travel west, the offset decreases. This offset +# is abbreviated as +of+ in the Date class. +# +# This simple representation of time zones does not take +# into account the common practice of Daylight Savings +# Time or Summer Time. +# +# Most DateTime methods return the date and the +# time in local time. The two exceptions are +# #ajd() and #amjd(), which return the date and time +# in UTC time, including fractional days. +# +# The Date class does not support time zone offsets, in that +# there is no way to create a Date object with a time zone. +# However, methods of the Date class when used by a +# DateTime instance will use the time zone offset of this +# instance. +# +# == Examples of use +# +# === Print out the date of every Sunday between two dates. +# +# def print_sundays(d1, d2) +# d1 +=1 while (d1.wday != 0) +# d1.step(d2, 7) do |date| +# puts "#{Date::MONTHNAMES[date.mon]} #{date.day}" +# end +# end +# +# print_sundays(Date::civil(2003, 4, 8), Date::civil(2003, 5, 23)) +# +# === Calculate how many seconds to go till midnight on New Year's Day. +# +# def secs_to_new_year(now = DateTime::now()) +# new_year = DateTime.new(now.year + 1, 1, 1) +# dif = new_year - now +# hours, mins, secs, ignore_fractions = Date::day_fraction_to_time(dif) +# return hours * 60 * 60 + mins * 60 + secs +# end +# +# puts secs_to_new_year() + +require 'date/format' + +# Class representing a date. +# +# See the documentation to the file date.rb for an overview. +# +# Internally, the date is represented as an Astronomical +# Julian Day Number, +ajd+. The Day of Calendar Reform, +sg+, is +# also stored, for conversions to other date formats. (There +# is also an +of+ field for a time zone offset, but this +# is only for the use of the DateTime subclass.) +# +# A new Date object is created using one of the object creation +# class methods named after the corresponding date format, and the +# arguments appropriate to that date format; for instance, +# Date::civil() (aliased to Date::new()) with year, month, +# and day-of-month, or Date::ordinal() with year and day-of-year. +# All of these object creation class methods also take the +# Day of Calendar Reform as an optional argument. +# +# Date objects are immutable once created. +# +# Once a Date has been created, date values +# can be retrieved for the different date formats supported +# using instance methods. For instance, #mon() gives the +# Civil month, #cwday() gives the Commercial day of the week, +# and #yday() gives the Ordinal day of the year. Date values +# can be retrieved in any format, regardless of what format +# was used to create the Date instance. +# +# The Date class includes the Comparable module, allowing +# date objects to be compared and sorted, ranges of dates +# to be created, and so forth. +class Date + + include Comparable + + # Full month names, in English. Months count from 1 to 12; a + # month's numerical representation indexed into this array + # gives the name of that month (hence the first element is nil). + MONTHNAMES = [nil] + %w(January February March April May June July + August September October November December) + + # Full names of days of the week, in English. Days of the week + # count from 0 to 6 (except in the commercial week); a day's numerical + # representation indexed into this array gives the name of that day. + DAYNAMES = %w(Sunday Monday Tuesday Wednesday Thursday Friday Saturday) + + # Abbreviated month names, in English. + ABBR_MONTHNAMES = [nil] + %w(Jan Feb Mar Apr May Jun + Jul Aug Sep Oct Nov Dec) + + # Abbreviated day names, in English. + ABBR_DAYNAMES = %w(Sun Mon Tue Wed Thu Fri Sat) + + [MONTHNAMES, DAYNAMES, ABBR_MONTHNAMES, ABBR_DAYNAMES].each do |xs| + xs.each{|x| x.freeze unless x.nil?}.freeze + end + + class Infinity < Numeric # :nodoc: + + include Comparable + + def initialize(d=1) @d = d <=> 0 end + + def d() @d end + + protected :d + + def zero? () false end + def finite? () false end + def infinite? () d.nonzero? end + def nan? () d.zero? end + + def abs() self.class.new end + + def -@ () self.class.new(-d) end + def +@ () self.class.new(+d) end + + def <=> (other) + case other + when Infinity; return d <=> other.d + when Numeric; return d + else + begin + l, r = other.coerce(self) + return l <=> r + rescue NoMethodError + end + end + nil + end + + def coerce(other) + case other + when Numeric; return -d, d + else + super + end + end + + end + + # The Julian Day Number of the Day of Calendar Reform for Italy + # and the Catholic countries. + ITALY = 2299161 # 1582-10-15 + + # The Julian Day Number of the Day of Calendar Reform for England + # and her Colonies. + ENGLAND = 2361222 # 1752-09-14 + + # A constant used to indicate that a Date should always use the + # Julian calendar. + JULIAN = Infinity.new + + # A constant used to indicate that a Date should always use the + # Gregorian calendar. + GREGORIAN = -Infinity.new + + HALF_DAYS_IN_DAY = Rational(1, 2) # :nodoc: + HOURS_IN_DAY = Rational(1, 24) # :nodoc: + MINUTES_IN_DAY = Rational(1, 1440) # :nodoc: + SECONDS_IN_DAY = Rational(1, 86400) # :nodoc: + MILLISECONDS_IN_DAY = Rational(1, 86400*10**3) # :nodoc: + NANOSECONDS_IN_DAY = Rational(1, 86400*10**9) # :nodoc: + MILLISECONDS_IN_SECOND = Rational(1, 10**3) # :nodoc: + NANOSECONDS_IN_SECOND = Rational(1, 10**9) # :nodoc: + + MJD_EPOCH_IN_AJD = Rational(4800001, 2) # 1858-11-17 # :nodoc: + UNIX_EPOCH_IN_AJD = Rational(4881175, 2) # 1970-01-01 # :nodoc: + MJD_EPOCH_IN_CJD = 2400001 # :nodoc: + UNIX_EPOCH_IN_CJD = 2440588 # :nodoc: + LD_EPOCH_IN_CJD = 2299160 # :nodoc: + + t = Module.new do + + private + + def find_fdoy(y, sg) # :nodoc: + j = nil + 1.upto(31) do |d| + break if j = _valid_civil?(y, 1, d, sg) + end + j + end + + def find_ldoy(y, sg) # :nodoc: + j = nil + 31.downto(1) do |d| + break if j = _valid_civil?(y, 12, d, sg) + end + j + end + + def find_fdom(y, m, sg) # :nodoc: + j = nil + 1.upto(31) do |d| + break if j = _valid_civil?(y, m, d, sg) + end + j + end + + def find_ldom(y, m, sg) # :nodoc: + j = nil + 31.downto(1) do |d| + break if j = _valid_civil?(y, m, d, sg) + end + j + end + + # Convert an Ordinal Date to a Julian Day Number. + # + # +y+ and +d+ are the year and day-of-year to convert. + # +sg+ specifies the Day of Calendar Reform. + # + # Returns the corresponding Julian Day Number. + def ordinal_to_jd(y, d, sg=GREGORIAN) # :nodoc: + find_fdoy(y, sg) + d - 1 + end + + # Convert a Julian Day Number to an Ordinal Date. + # + # +jd+ is the Julian Day Number to convert. + # +sg+ specifies the Day of Calendar Reform. + # + # Returns the corresponding Ordinal Date as + # [year, day_of_year] + def jd_to_ordinal(jd, sg=GREGORIAN) # :nodoc: + y = jd_to_civil(jd, sg)[0] + j = find_fdoy(y, sg) + doy = jd - j + 1 + return y, doy + end + + # Convert a Civil Date to a Julian Day Number. + # +y+, +m+, and +d+ are the year, month, and day of the + # month. +sg+ specifies the Day of Calendar Reform. + # + # Returns the corresponding Julian Day Number. + def civil_to_jd(y, m, d, sg=GREGORIAN) # :nodoc: + if m <= 2 + y -= 1 + m += 12 + end + a = (y / 100.0).floor + b = 2 - a + (a / 4.0).floor + jd = (365.25 * (y + 4716)).floor + + (30.6001 * (m + 1)).floor + + d + b - 1524 + if jd < sg + jd -= b + end + jd + end + + # Convert a Julian Day Number to a Civil Date. +jd+ is + # the Julian Day Number. +sg+ specifies the Day of + # Calendar Reform. + # + # Returns the corresponding [year, month, day_of_month] + # as a three-element array. + def jd_to_civil(jd, sg=GREGORIAN) # :nodoc: + if jd < sg + a = jd + else + x = ((jd - 1867216.25) / 36524.25).floor + a = jd + 1 + x - (x / 4.0).floor + end + b = a + 1524 + c = ((b - 122.1) / 365.25).floor + d = (365.25 * c).floor + e = ((b - d) / 30.6001).floor + dom = b - d - (30.6001 * e).floor + if e <= 13 + m = e - 1 + y = c - 4716 + else + m = e - 13 + y = c - 4715 + end + return y, m, dom + end + + # Convert a Commercial Date to a Julian Day Number. + # + # +y+, +w+, and +d+ are the (commercial) year, week of the year, + # and day of the week of the Commercial Date to convert. + # +sg+ specifies the Day of Calendar Reform. + def commercial_to_jd(y, w, d, sg=GREGORIAN) # :nodoc: + j = find_fdoy(y, sg) + 3 + (j - (((j - 1) + 1) % 7)) + + 7 * (w - 1) + + (d - 1) + end + + # Convert a Julian Day Number to a Commercial Date + # + # +jd+ is the Julian Day Number to convert. + # +sg+ specifies the Day of Calendar Reform. + # + # Returns the corresponding Commercial Date as + # [commercial_year, week_of_year, day_of_week] + def jd_to_commercial(jd, sg=GREGORIAN) # :nodoc: + a = jd_to_civil(jd - 3, sg)[0] + y = if jd >= commercial_to_jd(a + 1, 1, 1, sg) then a + 1 else a end + w = 1 + ((jd - commercial_to_jd(y, 1, 1, sg)) / 7).floor + d = (jd + 1) % 7 + d = 7 if d == 0 + return y, w, d + end + + def weeknum_to_jd(y, w, d, f=0, sg=GREGORIAN) # :nodoc: + a = find_fdoy(y, sg) + 6 + (a - ((a - f) + 1) % 7 - 7) + 7 * w + d + end + + def jd_to_weeknum(jd, f=0, sg=GREGORIAN) # :nodoc: + y, m, d = jd_to_civil(jd, sg) + a = find_fdoy(y, sg) + 6 + w, d = (jd - (a - ((a - f) + 1) % 7) + 7).divmod(7) + return y, w, d + end + + def nth_kday_to_jd(y, m, n, k, sg=GREGORIAN) # :nodoc: + j = if n > 0 + find_fdom(y, m, sg) - 1 + else + find_ldom(y, m, sg) + 7 + end + (j - (((j - k) + 1) % 7)) + 7 * n + end + + def jd_to_nth_kday(jd, sg=GREGORIAN) # :nodoc: + y, m, d = jd_to_civil(jd, sg) + j = find_fdom(y, m, sg) + return y, m, ((jd - j) / 7).floor + 1, jd_to_wday(jd) + end + + # Convert an Astronomical Julian Day Number to a (civil) Julian + # Day Number. + # + # +ajd+ is the Astronomical Julian Day Number to convert. + # +of+ is the offset from UTC as a fraction of a day (defaults to 0). + # + # Returns the (civil) Julian Day Number as [day_number, + # fraction] where +fraction+ is always 1/2. + def ajd_to_jd(ajd, of=0) (ajd + of + HALF_DAYS_IN_DAY).divmod(1) end # :nodoc: + + # Convert a (civil) Julian Day Number to an Astronomical Julian + # Day Number. + # + # +jd+ is the Julian Day Number to convert, and +fr+ is a + # fractional day. + # +of+ is the offset from UTC as a fraction of a day (defaults to 0). + # + # Returns the Astronomical Julian Day Number as a single + # numeric value. + def jd_to_ajd(jd, fr, of=0) jd + fr - of - HALF_DAYS_IN_DAY end # :nodoc: + + # Convert a fractional day +fr+ to [hours, minutes, seconds, + # fraction_of_a_second] + def day_fraction_to_time(fr) # :nodoc: + ss, fr = fr.divmod(SECONDS_IN_DAY) # 4p + h, ss = ss.divmod(3600) + min, s = ss.divmod(60) + return h, min, s, fr * 86400 + end + + # Convert an +h+ hour, +min+ minutes, +s+ seconds period + # to a fractional day. + begin + Rational(Rational(1, 2), 2) # a challenge + + def time_to_day_fraction(h, min, s) + Rational(h * 3600 + min * 60 + s, 86400) # 4p + end + rescue + def time_to_day_fraction(h, min, s) + if Integer === h && Integer === min && Integer === s + Rational(h * 3600 + min * 60 + s, 86400) # 4p + else + (h * 3600 + min * 60 + s).to_r/86400 # 4p + end + end + end + + # Convert an Astronomical Modified Julian Day Number to an + # Astronomical Julian Day Number. + def amjd_to_ajd(amjd) amjd + MJD_EPOCH_IN_AJD end # :nodoc: + + # Convert an Astronomical Julian Day Number to an + # Astronomical Modified Julian Day Number. + def ajd_to_amjd(ajd) ajd - MJD_EPOCH_IN_AJD end # :nodoc: + + # Convert a Modified Julian Day Number to a Julian + # Day Number. + def mjd_to_jd(mjd) mjd + MJD_EPOCH_IN_CJD end # :nodoc: + + # Convert a Julian Day Number to a Modified Julian Day + # Number. + def jd_to_mjd(jd) jd - MJD_EPOCH_IN_CJD end # :nodoc: + + # Convert a count of the number of days since the adoption + # of the Gregorian Calendar (in Italy) to a Julian Day Number. + def ld_to_jd(ld) ld + LD_EPOCH_IN_CJD end # :nodoc: + + # Convert a Julian Day Number to the number of days since + # the adoption of the Gregorian Calendar (in Italy). + def jd_to_ld(jd) jd - LD_EPOCH_IN_CJD end # :nodoc: + + # Convert a Julian Day Number to the day of the week. + # + # Sunday is day-of-week 0; Saturday is day-of-week 6. + def jd_to_wday(jd) (jd + 1) % 7 end # :nodoc: + + # Is +jd+ a valid Julian Day Number? + # + # If it is, returns it. In fact, any value is treated as a valid + # Julian Day Number. + def _valid_jd? (jd, sg=GREGORIAN) jd end # :nodoc: + + # Do the year +y+ and day-of-year +d+ make a valid Ordinal Date? + # Returns the corresponding Julian Day Number if they do, or + # nil if they don't. + # + # +d+ can be a negative number, in which case it counts backwards + # from the end of the year (-1 being the last day of the year). + # No year wraparound is performed, however, so valid values of + # +d+ are -365 .. -1, 1 .. 365 on a non-leap-year, + # -366 .. -1, 1 .. 366 on a leap year. + # A date falling in the period skipped in the Day of Calendar Reform + # adjustment is not valid. + # + # +sg+ specifies the Day of Calendar Reform. + def _valid_ordinal? (y, d, sg=GREGORIAN) # :nodoc: + if d < 0 + j = find_ldoy(y, sg) + ny, nd = jd_to_ordinal(j + d + 1, sg) + return unless ny == y + d = nd + end + jd = ordinal_to_jd(y, d, sg) + return unless [y, d] == jd_to_ordinal(jd, sg) + jd + end + + # Do year +y+, month +m+, and day-of-month +d+ make a + # valid Civil Date? Returns the corresponding Julian + # Day Number if they do, nil if they don't. + # + # +m+ and +d+ can be negative, in which case they count + # backwards from the end of the year and the end of the + # month respectively. No wraparound is performed, however, + # and invalid values cause an ArgumentError to be raised. + # A date falling in the period skipped in the Day of Calendar + # Reform adjustment is not valid. + # + # +sg+ specifies the Day of Calendar Reform. + def _valid_civil? (y, m, d, sg=GREGORIAN) # :nodoc: + if m < 0 + m += 13 + end + if d < 0 + j = find_ldom(y, m, sg) + ny, nm, nd = jd_to_civil(j + d + 1, sg) + return unless [ny, nm] == [y, m] + d = nd + end + jd = civil_to_jd(y, m, d, sg) + return unless [y, m, d] == jd_to_civil(jd, sg) + jd + end + + # Do year +y+, week-of-year +w+, and day-of-week +d+ make a + # valid Commercial Date? Returns the corresponding Julian + # Day Number if they do, nil if they don't. + # + # Monday is day-of-week 1; Sunday is day-of-week 7. + # + # +w+ and +d+ can be negative, in which case they count + # backwards from the end of the year and the end of the + # week respectively. No wraparound is performed, however, + # and invalid values cause an ArgumentError to be raised. + # A date falling in the period skipped in the Day of Calendar + # Reform adjustment is not valid. + # + # +sg+ specifies the Day of Calendar Reform. + def _valid_commercial? (y, w, d, sg=GREGORIAN) # :nodoc: + if d < 0 + d += 8 + end + if w < 0 + ny, nw, nd = + jd_to_commercial(commercial_to_jd(y + 1, 1, 1, sg) + w * 7, sg) + return unless ny == y + w = nw + end + jd = commercial_to_jd(y, w, d, sg) + return unless [y, w, d] == jd_to_commercial(jd, sg) + jd + end + + def _valid_weeknum? (y, w, d, f, sg=GREGORIAN) # :nodoc: + if d < 0 + d += 7 + end + if w < 0 + ny, nw, nd, nf = + jd_to_weeknum(weeknum_to_jd(y + 1, 1, f, f, sg) + w * 7, f, sg) + return unless ny == y + w = nw + end + jd = weeknum_to_jd(y, w, d, f, sg) + return unless [y, w, d] == jd_to_weeknum(jd, f, sg) + jd + end + + def _valid_nth_kday? (y, m, n, k, sg=GREGORIAN) # :nodoc: + if k < 0 + k += 7 + end + if n < 0 + ny, nm = (y * 12 + m).divmod(12) + nm, = (nm + 1) .divmod(1) + ny, nm, nn, nk = + jd_to_nth_kday(nth_kday_to_jd(ny, nm, 1, k, sg) + n * 7, sg) + return unless [ny, nm] == [y, m] + n = nn + end + jd = nth_kday_to_jd(y, m, n, k, sg) + return unless [y, m, n, k] == jd_to_nth_kday(jd, sg) + jd + end + + # Do hour +h+, minute +min+, and second +s+ constitute a valid time? + # + # If they do, returns their value as a fraction of a day. If not, + # returns nil. + # + # The 24-hour clock is used. Negative values of +h+, +min+, and + # +sec+ are treating as counting backwards from the end of the + # next larger unit (e.g. a +min+ of -2 is treated as 58). No + # wraparound is performed. + def _valid_time? (h, min, s) # :nodoc: + h += 24 if h < 0 + min += 60 if min < 0 + s += 60 if s < 0 + return unless ((0...24) === h && + (0...60) === min && + (0...60) === s) || + (24 == h && + 0 == min && + 0 == s) + time_to_day_fraction(h, min, s) + end + + end + + extend t + include t + + # Is a year a leap year in the Julian calendar? + # + # All years divisible by 4 are leap years in the Julian calendar. + def self.julian_leap? (y) y % 4 == 0 end + + # Is a year a leap year in the Gregorian calendar? + # + # All years divisible by 4 are leap years in the Gregorian calendar, + # except for years divisible by 100 and not by 400. + def self.gregorian_leap? (y) y % 4 == 0 && y % 100 != 0 || y % 400 == 0 end + + class << self; alias_method :leap?, :gregorian_leap? end + class << self; alias_method :new!, :new end + + def self.valid_jd? (jd, sg=ITALY) + !!_valid_jd?(jd, sg) + end + + def self.valid_ordinal? (y, d, sg=ITALY) + !!_valid_ordinal?(y, d, sg) + end + + def self.valid_civil? (y, m, d, sg=ITALY) + !!_valid_civil?(y, m, d, sg) + end + + class << self; alias_method :valid_date?, :valid_civil? end + + def self.valid_commercial? (y, w, d, sg=ITALY) + !!_valid_commercial?(y, w, d, sg) + end + + def self.valid_weeknum? (y, w, d, f, sg=ITALY) # :nodoc: + !!_valid_weeknum?(y, w, d, f, sg) + end + + private_class_method :valid_weeknum? + + def self.valid_nth_kday? (y, m, n, k, sg=ITALY) # :nodoc: + !!_valid_nth_kday?(y, m, n, k, sg) + end + + private_class_method :valid_nth_kday? + + def self.valid_time? (h, min, s) # :nodoc: + !!_valid_time?(h, min, s) + end + + private_class_method :valid_time? + + # Create a new Date object from a Julian Day Number. + # + # +jd+ is the Julian Day Number; if not specified, it defaults to + # 0. + # +sg+ specifies the Day of Calendar Reform. + def self.jd(jd=0, sg=ITALY) + jd = _valid_jd?(jd, sg) + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + # Create a new Date object from an Ordinal Date, specified + # by year +y+ and day-of-year +d+. +d+ can be negative, + # in which it counts backwards from the end of the year. + # No year wraparound is performed, however. An invalid + # value for +d+ results in an ArgumentError being raised. + # + # +y+ defaults to -4712, and +d+ to 1; this is Julian Day + # Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + def self.ordinal(y=-4712, d=1, sg=ITALY) + unless jd = _valid_ordinal?(y, d, sg) + raise ArgumentError, 'invalid date' + end + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + # Create a new Date object for the Civil Date specified by + # year +y+, month +m+, and day-of-month +d+. + # + # +m+ and +d+ can be negative, in which case they count + # backwards from the end of the year and the end of the + # month respectively. No wraparound is performed, however, + # and invalid values cause an ArgumentError to be raised. + # can be negative + # + # +y+ defaults to -4712, +m+ to 1, and +d+ to 1; this is + # Julian Day Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + def self.civil(y=-4712, m=1, d=1, sg=ITALY) + unless jd = _valid_civil?(y, m, d, sg) + raise ArgumentError, 'invalid date' + end + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + class << self; alias_method :new, :civil end + + # Create a new Date object for the Commercial Date specified by + # year +y+, week-of-year +w+, and day-of-week +d+. + # + # Monday is day-of-week 1; Sunday is day-of-week 7. + # + # +w+ and +d+ can be negative, in which case they count + # backwards from the end of the year and the end of the + # week respectively. No wraparound is performed, however, + # and invalid values cause an ArgumentError to be raised. + # + # +y+ defaults to -4712, +w+ to 1, and +d+ to 1; this is + # Julian Day Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + def self.commercial(y=-4712, w=1, d=1, sg=ITALY) + unless jd = _valid_commercial?(y, w, d, sg) + raise ArgumentError, 'invalid date' + end + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + def self.weeknum(y=-4712, w=0, d=1, f=0, sg=ITALY) + unless jd = _valid_weeknum?(y, w, d, f, sg) + raise ArgumentError, 'invalid date' + end + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + private_class_method :weeknum + + def self.nth_kday(y=-4712, m=1, n=1, k=1, sg=ITALY) + unless jd = _valid_nth_kday?(y, m, n, k, sg) + raise ArgumentError, 'invalid date' + end + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + private_class_method :nth_kday + + def self.rewrite_frags(elem) # :nodoc: + elem ||= {} + if seconds = elem[:seconds] + d, fr = seconds.divmod(86400) + h, fr = fr.divmod(3600) + min, fr = fr.divmod(60) + s, fr = fr.divmod(1) + elem[:jd] = UNIX_EPOCH_IN_CJD + d + elem[:hour] = h + elem[:min] = min + elem[:sec] = s + elem[:sec_fraction] = fr + elem.delete(:seconds) + elem.delete(:offset) + end + elem + end + + private_class_method :rewrite_frags + + def self.complete_frags(elem) # :nodoc: + i = 0 + g = [[:time, [:hour, :min, :sec]], + [nil, [:jd]], + [:ordinal, [:year, :yday, :hour, :min, :sec]], + [:civil, [:year, :mon, :mday, :hour, :min, :sec]], + [:commercial, [:cwyear, :cweek, :cwday, :hour, :min, :sec]], + [:wday, [:wday, :hour, :min, :sec]], + [:wnum0, [:year, :wnum0, :wday, :hour, :min, :sec]], + [:wnum1, [:year, :wnum1, :wday, :hour, :min, :sec]], + [nil, [:cwyear, :cweek, :wday, :hour, :min, :sec]], + [nil, [:year, :wnum0, :cwday, :hour, :min, :sec]], + [nil, [:year, :wnum1, :cwday, :hour, :min, :sec]]]. + collect{|k, a| e = elem.values_at(*a).compact; [k, a, e]}. + select{|k, a, e| e.size > 0}. + sort_by{|k, a, e| [e.size, i -= 1]}.last + + d = nil + + if g && g[0] && (g[1].size - g[2].size) != 0 + d ||= Date.today + + case g[0] + when :ordinal + elem[:year] ||= d.year + elem[:yday] ||= 1 + when :civil + g[1].each do |e| + break if elem[e] + elem[e] = d.__send__(e) + end + elem[:mon] ||= 1 + elem[:mday] ||= 1 + when :commercial + g[1].each do |e| + break if elem[e] + elem[e] = d.__send__(e) + end + elem[:cweek] ||= 1 + elem[:cwday] ||= 1 + when :wday + elem[:jd] ||= (d - d.wday + elem[:wday]).jd + when :wnum0 + g[1].each do |e| + break if elem[e] + elem[e] = d.__send__(e) + end + elem[:wnum0] ||= 0 + elem[:wday] ||= 0 + when :wnum1 + g[1].each do |e| + break if elem[e] + elem[e] = d.__send__(e) + end + elem[:wnum1] ||= 0 + elem[:wday] ||= 0 + end + end + + if g && g[0] == :time + if self <= DateTime + d ||= Date.today + elem[:jd] ||= d.jd + end + end + + elem[:hour] ||= 0 + elem[:min] ||= 0 + elem[:sec] ||= 0 + elem[:sec] = [elem[:sec], 59].min + + elem + end + + private_class_method :complete_frags + + def self.valid_date_frags?(elem, sg) # :nodoc: + catch :jd do + a = elem.values_at(:jd) + if a.all? + if jd = _valid_jd?(*(a << sg)) + throw :jd, jd + end + end + + a = elem.values_at(:year, :yday) + if a.all? + if jd = _valid_ordinal?(*(a << sg)) + throw :jd, jd + end + end + + a = elem.values_at(:year, :mon, :mday) + if a.all? + if jd = _valid_civil?(*(a << sg)) + throw :jd, jd + end + end + + a = elem.values_at(:cwyear, :cweek, :cwday) + if a[2].nil? && elem[:wday] + a[2] = elem[:wday].nonzero? || 7 + end + if a.all? + if jd = _valid_commercial?(*(a << sg)) + throw :jd, jd + end + end + + a = elem.values_at(:year, :wnum0, :wday) + if a[2].nil? && elem[:cwday] + a[2] = elem[:cwday] % 7 + end + if a.all? + if jd = _valid_weeknum?(*(a << 0 << sg)) + throw :jd, jd + end + end + + a = elem.values_at(:year, :wnum1, :wday) + if a[2] + a[2] = (a[2] - 1) % 7 + end + if a[2].nil? && elem[:cwday] + a[2] = (elem[:cwday] - 1) % 7 + end + if a.all? + if jd = _valid_weeknum?(*(a << 1 << sg)) + throw :jd, jd + end + end + end + end + + private_class_method :valid_date_frags? + + def self.valid_time_frags? (elem) # :nodoc: + h, min, s = elem.values_at(:hour, :min, :sec) + _valid_time?(h, min, s) + end + + private_class_method :valid_time_frags? + + def self.new_by_frags(elem, sg) # :nodoc: + elem = rewrite_frags(elem) + elem = complete_frags(elem) + unless jd = valid_date_frags?(elem, sg) + raise ArgumentError, 'invalid date' + end + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + private_class_method :new_by_frags + + # Create a new Date object by parsing from a String + # according to a specified format. + # + # +str+ is a String holding a date representation. + # +fmt+ is the format that the date is in. See + # date/format.rb for details on supported formats. + # + # The default +str+ is '-4712-01-01', and the default + # +fmt+ is '%F', which means Year-Month-Day_of_Month. + # This gives Julian Day Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + # + # An ArgumentError will be raised if +str+ cannot be + # parsed. + def self.strptime(str='-4712-01-01', fmt='%F', sg=ITALY) + elem = _strptime(str, fmt) + new_by_frags(elem, sg) + end + + # Create a new Date object by parsing from a String, + # without specifying the format. + # + # +str+ is a String holding a date representation. + # +comp+ specifies whether to interpret 2-digit years + # as 19XX (>= 69) or 20XX (< 69); the default is not to. + # The method will attempt to parse a date from the String + # using various heuristics; see #_parse in date/format.rb + # for more details. If parsing fails, an ArgumentError + # will be raised. + # + # The default +str+ is '-4712-01-01'; this is Julian + # Day Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + def self.parse(str='-4712-01-01', comp=true, sg=ITALY) + elem = _parse(str, comp) + new_by_frags(elem, sg) + end + + def self.iso8601(str='-4712-01-01', sg=ITALY) # :nodoc: + elem = _iso8601(str) + new_by_frags(elem, sg) + end + + def self.rfc3339(str='-4712-01-01T00:00:00+00:00', sg=ITALY) # :nodoc: + elem = _rfc3339(str) + new_by_frags(elem, sg) + end + + def self.xmlschema(str='-4712-01-01', sg=ITALY) # :nodoc: + elem = _xmlschema(str) + new_by_frags(elem, sg) + end + + def self.rfc2822(str='Mon, 1 Jan -4712 00:00:00 +0000', sg=ITALY) # :nodoc: + elem = _rfc2822(str) + new_by_frags(elem, sg) + end + + class << self; alias_method :rfc822, :rfc2822 end + + def self.httpdate(str='Mon, 01 Jan -4712 00:00:00 GMT', sg=ITALY) # :nodoc: + elem = _httpdate(str) + new_by_frags(elem, sg) + end + + def self.jisx0301(str='-4712-01-01', sg=ITALY) # :nodoc: + elem = _jisx0301(str) + new_by_frags(elem, sg) + end + + class << self + + def once(*ids) # :nodoc: -- restricted + for id in ids + module_eval <<-"end;" + alias_method :__#{id.object_id}__, :#{id.to_s} + private :__#{id.object_id}__ + def #{id.to_s}(*args) + @__ca__[#{id.object_id}] ||= __#{id.object_id}__(*args) + end + end; + end + end + + private :once + + end + + # *NOTE* this is the documentation for the method new!(). If + # you are reading this as the documentation for new(), that is + # because rdoc doesn't fully support the aliasing of the + # initialize() method. + # new() is in + # fact an alias for #civil(): read the documentation for that + # method instead. + # + # Create a new Date object. + # + # +ajd+ is the Astronomical Julian Day Number. + # +of+ is the offset from UTC as a fraction of a day. + # Both default to 0. + # + # +sg+ specifies the Day of Calendar Reform to use for this + # Date object. + # + # Using one of the factory methods such as Date::civil is + # generally easier and safer. + def initialize(ajd=0, of=0, sg=ITALY) + @ajd, @of, @sg = ajd, of, sg + @__ca__ = {} + end + + # Get the date as an Astronomical Julian Day Number. + def ajd() @ajd end + + # Get the date as an Astronomical Modified Julian Day Number. + def amjd() ajd_to_amjd(@ajd) end + + once :amjd + + # Get the date as a Julian Day Number. + def jd() ajd_to_jd(@ajd, @of)[0] end + + # Get any fractional day part of the date. + def day_fraction() ajd_to_jd(@ajd, @of)[1] end + + # Get the date as a Modified Julian Day Number. + def mjd() jd_to_mjd(jd) end + + # Get the date as the number of days since the Day of Calendar + # Reform (in Italy and the Catholic countries). + def ld() jd_to_ld(jd) end + + once :jd, :day_fraction, :mjd, :ld + + # Get the date as a Civil Date, [year, month, day_of_month] + def civil() jd_to_civil(jd, @sg) end # :nodoc: + + # Get the date as an Ordinal Date, [year, day_of_year] + def ordinal() jd_to_ordinal(jd, @sg) end # :nodoc: + + # Get the date as a Commercial Date, [year, week_of_year, day_of_week] + def commercial() jd_to_commercial(jd, @sg) end # :nodoc: + + def weeknum0() jd_to_weeknum(jd, 0, @sg) end # :nodoc: + def weeknum1() jd_to_weeknum(jd, 1, @sg) end # :nodoc: + + once :civil, :ordinal, :commercial, :weeknum0, :weeknum1 + private :civil, :ordinal, :commercial, :weeknum0, :weeknum1 + + # Get the year of this date. + def year() civil[0] end + + # Get the day-of-the-year of this date. + # + # January 1 is day-of-the-year 1 + def yday() ordinal[1] end + + # Get the month of this date. + # + # January is month 1. + def mon() civil[1] end + + # Get the day-of-the-month of this date. + def mday() civil[2] end + + alias_method :month, :mon + alias_method :day, :mday + + def wnum0() weeknum0[1] end # :nodoc: + def wnum1() weeknum1[1] end # :nodoc: + + private :wnum0, :wnum1 + + # Get the time of this date as [hours, minutes, seconds, + # fraction_of_a_second] + def time() day_fraction_to_time(day_fraction) end # :nodoc: + + once :time + private :time + + # Get the hour of this date. + def hour() time[0] end + + # Get the minute of this date. + def min() time[1] end + + # Get the second of this date. + def sec() time[2] end + + # Get the fraction-of-a-second of this date. + def sec_fraction() time[3] end + + alias_method :minute, :min + alias_method :second, :sec + alias_method :second_fraction, :sec_fraction + + private :hour, :min, :sec, :sec_fraction, + :minute, :second, :second_fraction + + def zone() strftime('%:z') end + + private :zone + + # Get the commercial year of this date. See *Commercial* *Date* + # in the introduction for how this differs from the normal year. + def cwyear() commercial[0] end + + # Get the commercial week of the year of this date. + def cweek() commercial[1] end + + # Get the commercial day of the week of this date. Monday is + # commercial day-of-week 1; Sunday is commercial day-of-week 7. + def cwday() commercial[2] end + + # Get the week day of this date. Sunday is day-of-week 0; + # Saturday is day-of-week 6. + def wday() jd_to_wday(jd) end + + once :wday + +=begin + MONTHNAMES.each_with_index do |n, i| + if n + define_method(n.downcase + '?'){mon == i} + end + end +=end + + DAYNAMES.each_with_index do |n, i| + define_method(n.downcase + '?'){wday == i} + end + + def nth_kday? (n, k) + k == wday && jd === nth_kday_to_jd(year, mon, n, k, start) + end + + private :nth_kday? + + # Is the current date old-style (Julian Calendar)? + def julian? () jd < @sg end + + # Is the current date new-style (Gregorian Calendar)? + def gregorian? () !julian? end + + once :julian?, :gregorian? + + def fix_style # :nodoc: + if julian? + then self.class::JULIAN + else self.class::GREGORIAN end + end + + private :fix_style + + # Is this a leap year? + def leap? + jd_to_civil(civil_to_jd(year, 3, 1, fix_style) - 1, + fix_style)[-1] == 29 + end + + once :leap? + + # When is the Day of Calendar Reform for this Date object? + def start() @sg end + + # Create a copy of this Date object using a new Day of Calendar Reform. + def new_start(sg=self.class::ITALY) self.class.new!(@ajd, @of, sg) end + + # Create a copy of this Date object that uses the Italian/Catholic + # Day of Calendar Reform. + def italy() new_start(self.class::ITALY) end + + # Create a copy of this Date object that uses the English/Colonial + # Day of Calendar Reform. + def england() new_start(self.class::ENGLAND) end + + # Create a copy of this Date object that always uses the Julian + # Calendar. + def julian() new_start(self.class::JULIAN) end + + # Create a copy of this Date object that always uses the Gregorian + # Calendar. + def gregorian() new_start(self.class::GREGORIAN) end + + def offset() @of end + + def new_offset(of=0) + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + self.class.new!(@ajd, of, @sg) + end + + private :offset, :new_offset + + # Return a new Date object that is +n+ days later than the + # current one. + # + # +n+ may be a negative value, in which case the new Date + # is earlier than the current one; however, #-() might be + # more intuitive. + # + # If +n+ is not a Numeric, a TypeError will be thrown. In + # particular, two Dates cannot be added to each other. + def + (n) + case n + when Numeric; return self.class.new!(@ajd + n, @of, @sg) + end + raise TypeError, 'expected numeric' + end + + # If +x+ is a Numeric value, create a new Date object that is + # +x+ days earlier than the current one. + # + # If +x+ is a Date, return the number of days between the + # two dates; or, more precisely, how many days later the current + # date is than +x+. + # + # If +x+ is neither Numeric nor a Date, a TypeError is raised. + def - (x) + case x + when Numeric; return self.class.new!(@ajd - x, @of, @sg) + when Date; return @ajd - x.ajd + end + raise TypeError, 'expected numeric or date' + end + + # Compare this date with another date. + # + # +other+ can also be a Numeric value, in which case it is + # interpreted as an Astronomical Julian Day Number. + # + # Comparison is by Astronomical Julian Day Number, including + # fractional days. This means that both the time and the + # timezone offset are taken into account when comparing + # two DateTime instances. When comparing a DateTime instance + # with a Date instance, the time of the latter will be + # considered as falling on midnight UTC. + def <=> (other) + case other + when Numeric; return @ajd <=> other + when Date; return @ajd <=> other.ajd + end + nil + end + + # The relationship operator for Date. + # + # Compares dates by Julian Day Number. When comparing + # two DateTime instances, or a DateTime with a Date, + # the instances will be regarded as equivalent if they + # fall on the same date in local time. + def === (other) + case other + when Numeric; return jd == other + when Date; return jd == other.jd + end + false + end + + def next_day(n=1) self + n end + def prev_day(n=1) self - n end + + # Return a new Date one day after this one. + def next() next_day end + + alias_method :succ, :next + + # Return a new Date object that is +n+ months later than + # the current one. + # + # If the day-of-the-month of the current Date is greater + # than the last day of the target month, the day-of-the-month + # of the returned Date will be the last day of the target month. + def >> (n) + y, m = (year * 12 + (mon - 1) + n).divmod(12) + m, = (m + 1) .divmod(1) + d = mday + d -= 1 until jd2 = _valid_civil?(y, m, d, @sg) + self + (jd2 - jd) + end + + # Return a new Date object that is +n+ months earlier than + # the current one. + # + # If the day-of-the-month of the current Date is greater + # than the last day of the target month, the day-of-the-month + # of the returned Date will be the last day of the target month. + def << (n) self >> -n end + + def next_month(n=1) self >> n end + def prev_month(n=1) self << n end + + def next_year(n=1) self >> n * 12 end + def prev_year(n=1) self << n * 12 end + + require 'enumerator' + + # Step the current date forward +step+ days at a + # time (or backward, if +step+ is negative) until + # we reach +limit+ (inclusive), yielding the resultant + # date at each step. + def step(limit, step=1) # :yield: date +=begin + if step.zero? + raise ArgumentError, "step can't be 0" + end +=end + unless block_given? + return to_enum(:step, limit, step) + end + da = self + op = %w(- <= >=)[step <=> 0] + while da.__send__(op, limit) + yield da + da += step + end + self + end + + # Step forward one day at a time until we reach +max+ + # (inclusive), yielding each date as we go. + def upto(max, &block) # :yield: date + step(max, +1, &block) + end + + # Step backward one day at a time until we reach +min+ + # (inclusive), yielding each date as we go. + def downto(min, &block) # :yield: date + step(min, -1, &block) + end + + # Is this Date equal to +other+? + # + # +other+ must both be a Date object, and represent the same date. + def eql? (other) Date === other && self == other end + + # Calculate a hash value for this date. + def hash() @ajd.hash end + + # Return internal object state as a programmer-readable string. + def inspect + format('#<%s: %s (%s,%s,%s)>', self.class, to_s, @ajd, @of, @sg) + end + + # Return the date as a human-readable string. + # + # The format used is YYYY-MM-DD. + def to_s() format('%.4d-%02d-%02d', year, mon, mday) end # 4p + + # Dump to Marshal format. + def marshal_dump() [@ajd, @of, @sg] end + + # Load from Marshal format. + def marshal_load(a) + @ajd, @of, @sg, = a + @__ca__ = {} + end + +end + +# Class representing a date and time. +# +# See the documentation to the file date.rb for an overview. +# +# DateTime objects are immutable once created. +# +# == Other methods. +# +# The following methods are defined in Date, but declared private +# there. They are made public in DateTime. They are documented +# here. +# +# === hour() +# +# Get the hour-of-the-day of the time. This is given +# using the 24-hour clock, counting from midnight. The first +# hour after midnight is hour 0; the last hour of the day is +# hour 23. +# +# === min() +# +# Get the minute-of-the-hour of the time. +# +# === sec() +# +# Get the second-of-the-minute of the time. +# +# === sec_fraction() +# +# Get the fraction of a second of the time. This is returned as +# a +Rational+. +# +# === zone() +# +# Get the time zone as a String. This is representation of the +# time offset such as "+1000", not the true time-zone name. +# +# === offset() +# +# Get the time zone offset as a fraction of a day. This is returned +# as a +Rational+. +# +# === new_offset(of=0) +# +# Create a new DateTime object, identical to the current one, except +# with a new time zone offset of +of+. +of+ is the new offset from +# UTC as a fraction of a day. +# +class DateTime < Date + + # Create a new DateTime object corresponding to the specified + # Julian Day Number +jd+ and hour +h+, minute +min+, second +s+. + # + # The 24-hour clock is used. Negative values of +h+, +min+, and + # +sec+ are treating as counting backwards from the end of the + # next larger unit (e.g. a +min+ of -2 is treated as 58). No + # wraparound is performed. If an invalid time portion is specified, + # an ArgumentError is raised. + # + # +of+ is the offset from UTC as a fraction of a day (defaults to 0). + # +sg+ specifies the Day of Calendar Reform. + # + # All day/time values default to 0. + def self.jd(jd=0, h=0, min=0, s=0, of=0, sg=ITALY) + unless (jd = _valid_jd?(jd, sg)) && + (fr = _valid_time?(h, min, s)) + raise ArgumentError, 'invalid date' + end + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + # Create a new DateTime object corresponding to the specified + # Ordinal Date and hour +h+, minute +min+, second +s+. + # + # The 24-hour clock is used. Negative values of +h+, +min+, and + # +sec+ are treating as counting backwards from the end of the + # next larger unit (e.g. a +min+ of -2 is treated as 58). No + # wraparound is performed. If an invalid time portion is specified, + # an ArgumentError is raised. + # + # +of+ is the offset from UTC as a fraction of a day (defaults to 0). + # +sg+ specifies the Day of Calendar Reform. + # + # +y+ defaults to -4712, and +d+ to 1; this is Julian Day Number + # day 0. The time values default to 0. + def self.ordinal(y=-4712, d=1, h=0, min=0, s=0, of=0, sg=ITALY) + unless (jd = _valid_ordinal?(y, d, sg)) && + (fr = _valid_time?(h, min, s)) + raise ArgumentError, 'invalid date' + end + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + # Create a new DateTime object corresponding to the specified + # Civil Date and hour +h+, minute +min+, second +s+. + # + # The 24-hour clock is used. Negative values of +h+, +min+, and + # +sec+ are treating as counting backwards from the end of the + # next larger unit (e.g. a +min+ of -2 is treated as 58). No + # wraparound is performed. If an invalid time portion is specified, + # an ArgumentError is raised. + # + # +of+ is the offset from UTC as a fraction of a day (defaults to 0). + # +sg+ specifies the Day of Calendar Reform. + # + # +y+ defaults to -4712, +m+ to 1, and +d+ to 1; this is Julian Day + # Number day 0. The time values default to 0. + def self.civil(y=-4712, m=1, d=1, h=0, min=0, s=0, of=0, sg=ITALY) + unless (jd = _valid_civil?(y, m, d, sg)) && + (fr = _valid_time?(h, min, s)) + raise ArgumentError, 'invalid date' + end + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + class << self; alias_method :new, :civil end + + # Create a new DateTime object corresponding to the specified + # Commercial Date and hour +h+, minute +min+, second +s+. + # + # The 24-hour clock is used. Negative values of +h+, +min+, and + # +sec+ are treating as counting backwards from the end of the + # next larger unit (e.g. a +min+ of -2 is treated as 58). No + # wraparound is performed. If an invalid time portion is specified, + # an ArgumentError is raised. + # + # +of+ is the offset from UTC as a fraction of a day (defaults to 0). + # +sg+ specifies the Day of Calendar Reform. + # + # +y+ defaults to -4712, +w+ to 1, and +d+ to 1; this is + # Julian Day Number day 0. + # The time values default to 0. + def self.commercial(y=-4712, w=1, d=1, h=0, min=0, s=0, of=0, sg=ITALY) + unless (jd = _valid_commercial?(y, w, d, sg)) && + (fr = _valid_time?(h, min, s)) + raise ArgumentError, 'invalid date' + end + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + def self.weeknum(y=-4712, w=0, d=1, f=0, h=0, min=0, s=0, of=0, sg=ITALY) # :nodoc: + unless (jd = _valid_weeknum?(y, w, d, f, sg)) && + (fr = _valid_time?(h, min, s)) + raise ArgumentError, 'invalid date' + end + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + private_class_method :weeknum + + def self.nth_kday(y=-4712, m=1, n=1, k=1, h=0, min=0, s=0, of=0, sg=ITALY) # :nodoc: + unless (jd = _valid_nth_kday?(y, m, n, k, sg)) && + (fr = _valid_time?(h, min, s)) + raise ArgumentError, 'invalid date' + end + if String === of + of = Rational(zone_to_diff(of) || 0, 86400) + end + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + private_class_method :nth_kday + + def self.new_by_frags(elem, sg) # :nodoc: + elem = rewrite_frags(elem) + elem = complete_frags(elem) + unless (jd = valid_date_frags?(elem, sg)) && + (fr = valid_time_frags?(elem)) + raise ArgumentError, 'invalid date' + end + fr += (elem[:sec_fraction] || 0) / 86400 + of = Rational(elem[:offset] || 0, 86400) + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + private_class_method :new_by_frags + + # Create a new DateTime object by parsing from a String + # according to a specified format. + # + # +str+ is a String holding a date-time representation. + # +fmt+ is the format that the date-time is in. See + # date/format.rb for details on supported formats. + # + # The default +str+ is '-4712-01-01T00:00:00+00:00', and the default + # +fmt+ is '%FT%T%z'. This gives midnight on Julian Day Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + # + # An ArgumentError will be raised if +str+ cannot be + # parsed. + def self.strptime(str='-4712-01-01T00:00:00+00:00', fmt='%FT%T%z', sg=ITALY) + elem = _strptime(str, fmt) + new_by_frags(elem, sg) + end + + # Create a new DateTime object by parsing from a String, + # without specifying the format. + # + # +str+ is a String holding a date-time representation. + # +comp+ specifies whether to interpret 2-digit years + # as 19XX (>= 69) or 20XX (< 69); the default is not to. + # The method will attempt to parse a date-time from the String + # using various heuristics; see #_parse in date/format.rb + # for more details. If parsing fails, an ArgumentError + # will be raised. + # + # The default +str+ is '-4712-01-01T00:00:00+00:00'; this is Julian + # Day Number day 0. + # + # +sg+ specifies the Day of Calendar Reform. + def self.parse(str='-4712-01-01T00:00:00+00:00', comp=true, sg=ITALY) + elem = _parse(str, comp) + new_by_frags(elem, sg) + end + + def self.iso8601(str='-4712-01-01T00:00:00+00:00', sg=ITALY) # :nodoc: + elem = _iso8601(str) + new_by_frags(elem, sg) + end + + def self.rfc3339(str='-4712-01-01T00:00:00+00:00', sg=ITALY) # :nodoc: + elem = _rfc3339(str) + new_by_frags(elem, sg) + end + + def self.xmlschema(str='-4712-01-01T00:00:00+00:00', sg=ITALY) # :nodoc: + elem = _xmlschema(str) + new_by_frags(elem, sg) + end + + def self.rfc2822(str='Mon, 1 Jan -4712 00:00:00 +0000', sg=ITALY) # :nodoc: + elem = _rfc2822(str) + new_by_frags(elem, sg) + end + + class << self; alias_method :rfc822, :rfc2822 end + + def self.httpdate(str='Mon, 01 Jan -4712 00:00:00 GMT', sg=ITALY) # :nodoc: + elem = _httpdate(str) + new_by_frags(elem, sg) + end + + def self.jisx0301(str='-4712-01-01T00:00:00+00:00', sg=ITALY) # :nodoc: + elem = _jisx0301(str) + new_by_frags(elem, sg) + end + + public :hour, :min, :sec, :sec_fraction, :zone, :offset, :new_offset, + :minute, :second, :second_fraction + + def to_s # 4p + format('%.4d-%02d-%02dT%02d:%02d:%02d%s', + year, mon, mday, hour, min, sec, zone) + end + +end + +class Time + + def to_time() getlocal end + + def to_date + jd = Date.__send__(:civil_to_jd, year, mon, mday, Date::ITALY) + Date.new!(Date.__send__(:jd_to_ajd, jd, 0, 0), 0, Date::ITALY) + end + + def to_datetime + jd = DateTime.__send__(:civil_to_jd, year, mon, mday, DateTime::ITALY) + fr = DateTime.__send__(:time_to_day_fraction, hour, min, [sec, 59].min) + + Rational(nsec, 86400_000_000_000) + of = Rational(utc_offset, 86400) + DateTime.new!(DateTime.__send__(:jd_to_ajd, jd, fr, of), + of, DateTime::ITALY) + end + +end + +class Date + + def to_time() Time.local(year, mon, mday) end + def to_date() self end + def to_datetime() DateTime.new!(jd_to_ajd(jd, 0, 0), @of, @sg) end + + # Create a new Date object representing today. + # + # +sg+ specifies the Day of Calendar Reform. + def self.today(sg=ITALY) + t = Time.now + jd = civil_to_jd(t.year, t.mon, t.mday, sg) + new!(jd_to_ajd(jd, 0, 0), 0, sg) + end + + # Create a new DateTime object representing the current time. + # + # +sg+ specifies the Day of Calendar Reform. + def self.now(sg=ITALY) + t = Time.now + jd = civil_to_jd(t.year, t.mon, t.mday, sg) + fr = time_to_day_fraction(t.hour, t.min, [t.sec, 59].min) + + Rational(t.nsec, 86400_000_000_000) + of = Rational(t.utc_offset, 86400) + new!(jd_to_ajd(jd, fr, of), of, sg) + end + + private_class_method :now + +end + +class DateTime < Date + + def to_time + d = new_offset(0) + d.instance_eval do + Time.utc(year, mon, mday, hour, min, sec + + sec_fraction) + end. + getlocal + end + + def to_date() Date.new!(jd_to_ajd(jd, 0, 0), 0, @sg) end + def to_datetime() self end + + private_class_method :today + public_class_method :now + +end diff --git a/lib/date/format.rb b/lib/date/format.rb new file mode 100644 index 0000000..a83b298 --- /dev/null +++ b/lib/date/format.rb @@ -0,0 +1,1313 @@ +# format.rb: Written by Tadayoshi Funaba 1999-2008 +# $Id: format.rb,v 2.43 2008-01-17 20:16:31+09 tadf Exp $ + +class Date + + module Format # :nodoc: + + MONTHS = { + 'january' => 1, 'february' => 2, 'march' => 3, 'april' => 4, + 'may' => 5, 'june' => 6, 'july' => 7, 'august' => 8, + 'september'=> 9, 'october' =>10, 'november' =>11, 'december' =>12 + } + + DAYS = { + 'sunday' => 0, 'monday' => 1, 'tuesday' => 2, 'wednesday'=> 3, + 'thursday' => 4, 'friday' => 5, 'saturday' => 6 + } + + ABBR_MONTHS = { + 'jan' => 1, 'feb' => 2, 'mar' => 3, 'apr' => 4, + 'may' => 5, 'jun' => 6, 'jul' => 7, 'aug' => 8, + 'sep' => 9, 'oct' =>10, 'nov' =>11, 'dec' =>12 + } + + ABBR_DAYS = { + 'sun' => 0, 'mon' => 1, 'tue' => 2, 'wed' => 3, + 'thu' => 4, 'fri' => 5, 'sat' => 6 + } + + ZONES = { + 'ut' => 0*3600, 'gmt' => 0*3600, 'est' => -5*3600, 'edt' => -4*3600, + 'cst' => -6*3600, 'cdt' => -5*3600, 'mst' => -7*3600, 'mdt' => -6*3600, + 'pst' => -8*3600, 'pdt' => -7*3600, + 'a' => 1*3600, 'b' => 2*3600, 'c' => 3*3600, 'd' => 4*3600, + 'e' => 5*3600, 'f' => 6*3600, 'g' => 7*3600, 'h' => 8*3600, + 'i' => 9*3600, 'k' => 10*3600, 'l' => 11*3600, 'm' => 12*3600, + 'n' => -1*3600, 'o' => -2*3600, 'p' => -3*3600, 'q' => -4*3600, + 'r' => -5*3600, 's' => -6*3600, 't' => -7*3600, 'u' => -8*3600, + 'v' => -9*3600, 'w' =>-10*3600, 'x' =>-11*3600, 'y' =>-12*3600, + 'z' => 0*3600, + + 'utc' => 0*3600, 'wet' => 0*3600, + 'at' => -2*3600, 'brst'=> -2*3600, 'ndt' => -(2*3600+1800), + 'art' => -3*3600, 'adt' => -3*3600, 'brt' => -3*3600, 'clst'=> -3*3600, + 'nst' => -(3*3600+1800), + 'ast' => -4*3600, 'clt' => -4*3600, + 'akdt'=> -8*3600, 'ydt' => -8*3600, + 'akst'=> -9*3600, 'hadt'=> -9*3600, 'hdt' => -9*3600, 'yst' => -9*3600, + 'ahst'=>-10*3600, 'cat' =>-10*3600, 'hast'=>-10*3600, 'hst' =>-10*3600, + 'nt' =>-11*3600, + 'idlw'=>-12*3600, + 'bst' => 1*3600, 'cet' => 1*3600, 'fwt' => 1*3600, 'met' => 1*3600, + 'mewt'=> 1*3600, 'mez' => 1*3600, 'swt' => 1*3600, 'wat' => 1*3600, + 'west'=> 1*3600, + 'cest'=> 2*3600, 'eet' => 2*3600, 'fst' => 2*3600, 'mest'=> 2*3600, + 'mesz'=> 2*3600, 'sast'=> 2*3600, 'sst' => 2*3600, + 'bt' => 3*3600, 'eat' => 3*3600, 'eest'=> 3*3600, 'msk' => 3*3600, + 'msd' => 4*3600, 'zp4' => 4*3600, + 'zp5' => 5*3600, 'ist' => (5*3600+1800), + 'zp6' => 6*3600, + 'wast'=> 7*3600, + 'cct' => 8*3600, 'sgt' => 8*3600, 'wadt'=> 8*3600, + 'jst' => 9*3600, 'kst' => 9*3600, + 'east'=> 10*3600, 'gst' => 10*3600, + 'eadt'=> 11*3600, + 'idle'=> 12*3600, 'nzst'=> 12*3600, 'nzt' => 12*3600, + 'nzdt'=> 13*3600, + + 'afghanistan' => 16200, 'alaskan' => -32400, + 'arab' => 10800, 'arabian' => 14400, + 'arabic' => 10800, 'atlantic' => -14400, + 'aus central' => 34200, 'aus eastern' => 36000, + 'azores' => -3600, 'canada central' => -21600, + 'cape verde' => -3600, 'caucasus' => 14400, + 'cen. australia' => 34200, 'central america' => -21600, + 'central asia' => 21600, 'central europe' => 3600, + 'central european' => 3600, 'central pacific' => 39600, + 'central' => -21600, 'china' => 28800, + 'dateline' => -43200, 'e. africa' => 10800, + 'e. australia' => 36000, 'e. europe' => 7200, + 'e. south america' => -10800, 'eastern' => -18000, + 'egypt' => 7200, 'ekaterinburg' => 18000, + 'fiji' => 43200, 'fle' => 7200, + 'greenland' => -10800, 'greenwich' => 0, + 'gtb' => 7200, 'hawaiian' => -36000, + 'india' => 19800, 'iran' => 12600, + 'jerusalem' => 7200, 'korea' => 32400, + 'mexico' => -21600, 'mid-atlantic' => -7200, + 'mountain' => -25200, 'myanmar' => 23400, + 'n. central asia' => 21600, 'nepal' => 20700, + 'new zealand' => 43200, 'newfoundland' => -12600, + 'north asia east' => 28800, 'north asia' => 25200, + 'pacific sa' => -14400, 'pacific' => -28800, + 'romance' => 3600, 'russian' => 10800, + 'sa eastern' => -10800, 'sa pacific' => -18000, + 'sa western' => -14400, 'samoa' => -39600, + 'se asia' => 25200, 'malay peninsula' => 28800, + 'south africa' => 7200, 'sri lanka' => 21600, + 'taipei' => 28800, 'tasmania' => 36000, + 'tokyo' => 32400, 'tonga' => 46800, + 'us eastern' => -18000, 'us mountain' => -25200, + 'vladivostok' => 36000, 'w. australia' => 28800, + 'w. central africa' => 3600, 'w. europe' => 3600, + 'west asia' => 18000, 'west pacific' => 36000, + 'yakutsk' => 32400 + } + + [MONTHS, DAYS, ABBR_MONTHS, ABBR_DAYS, ZONES].each do |x| + x.freeze + end + + class Bag # :nodoc: + + def initialize + @elem = {} + end + + def method_missing(t, *args, &block) + t = t.to_s + set = t.chomp!('=') + t = t.intern + if set + @elem[t] = args[0] + else + @elem[t] + end + end + + def to_hash + @elem.reject{|k, v| /\A_/ =~ k.to_s || v.nil?} + end + + end + + end + + def emit(e, f) # :nodoc: + case e + when Numeric + sign = %w(+ + -)[e <=> 0] + e = e.abs + end + + s = e.to_s + + if f[:s] && f[:p] == '0' + f[:w] -= 1 + end + + if f[:s] && f[:p] == "\s" + s[0,0] = sign + end + + if f[:p] != '-' + s = s.rjust(f[:w], f[:p]) + end + + if f[:s] && f[:p] != "\s" + s[0,0] = sign + end + + s = s.upcase if f[:u] + s = s.downcase if f[:d] + s + end + + def emit_w(e, w, f) # :nodoc: + f[:w] = [f[:w], w].compact.max + emit(e, f) + end + + def emit_n(e, w, f) # :nodoc: + f[:p] ||= '0' + emit_w(e, w, f) + end + + def emit_sn(e, w, f) # :nodoc: + if e < 0 + w += 1 + f[:s] = true + end + emit_n(e, w, f) + end + + def emit_z(e, w, f) # :nodoc: + w += 1 + f[:s] = true + emit_n(e, w, f) + end + + def emit_a(e, w, f) # :nodoc: + f[:p] ||= "\s" + emit_w(e, w, f) + end + + def emit_ad(e, w, f) # :nodoc: + if f[:x] + f[:u] = true + f[:d] = false + end + emit_a(e, w, f) + end + + def emit_au(e, w, f) # :nodoc: + if f[:x] + f[:u] = false + f[:d] = true + end + emit_a(e, w, f) + end + + private :emit, :emit_w, :emit_n, :emit_sn, :emit_z, + :emit_a, :emit_ad, :emit_au + + def strftime(fmt='%F') + fmt.gsub(/%([-_0^#]+)?(\d+)?([EO]?(?::{1,3}z|.))/m) do + f = {} + m = $& + s, w, c = $1, $2, $3 + if s + s.scan(/./) do |k| + case k + when '-'; f[:p] = '-' + when '_'; f[:p] = "\s" + when '0'; f[:p] = '0' + when '^'; f[:u] = true + when '#'; f[:x] = true + end + end + end + if w + f[:w] = w.to_i + end + case c + when 'A'; emit_ad(DAYNAMES[wday], 0, f) + when 'a'; emit_ad(ABBR_DAYNAMES[wday], 0, f) + when 'B'; emit_ad(MONTHNAMES[mon], 0, f) + when 'b'; emit_ad(ABBR_MONTHNAMES[mon], 0, f) + when 'C', 'EC'; emit_sn((year / 100).floor, 2, f) + when 'c', 'Ec'; emit_a(strftime('%a %b %e %H:%M:%S %Y'), 0, f) + when 'D'; emit_a(strftime('%m/%d/%y'), 0, f) + when 'd', 'Od'; emit_n(mday, 2, f) + when 'e', 'Oe'; emit_a(mday, 2, f) + when 'F' + if m == '%F' + format('%.4d-%02d-%02d', year, mon, mday) # 4p + else + emit_a(strftime('%Y-%m-%d'), 0, f) + end + when 'G'; emit_sn(cwyear, 4, f) + when 'g'; emit_n(cwyear % 100, 2, f) + when 'H', 'OH'; emit_n(hour, 2, f) + when 'h'; emit_ad(strftime('%b'), 0, f) + when 'I', 'OI'; emit_n((hour % 12).nonzero? || 12, 2, f) + when 'j'; emit_n(yday, 3, f) + when 'k'; emit_a(hour, 2, f) + when 'L' + f[:p] = nil + w = f[:w] || 3 + u = 10**w + emit_n((sec_fraction * u).floor, w, f) + when 'l'; emit_a((hour % 12).nonzero? || 12, 2, f) + when 'M', 'OM'; emit_n(min, 2, f) + when 'm', 'Om'; emit_n(mon, 2, f) + when 'N' + f[:p] = nil + w = f[:w] || 9 + u = 10**w + emit_n((sec_fraction * u).floor, w, f) + when 'n'; emit_a("\n", 0, f) + when 'P'; emit_ad(strftime('%p').downcase, 0, f) + when 'p'; emit_au(if hour < 12 then 'AM' else 'PM' end, 0, f) + when 'Q' + s = ((ajd - UNIX_EPOCH_IN_AJD) / MILLISECONDS_IN_DAY).round + emit_sn(s, 1, f) + when 'R'; emit_a(strftime('%H:%M'), 0, f) + when 'r'; emit_a(strftime('%I:%M:%S %p'), 0, f) + when 'S', 'OS'; emit_n(sec, 2, f) + when 's' + s = ((ajd - UNIX_EPOCH_IN_AJD) / SECONDS_IN_DAY).round + emit_sn(s, 1, f) + when 'T' + if m == '%T' + format('%02d:%02d:%02d', hour, min, sec) # 4p + else + emit_a(strftime('%H:%M:%S'), 0, f) + end + when 't'; emit_a("\t", 0, f) + when 'U', 'W', 'OU', 'OW' + emit_n(if c[-1,1] == 'U' then wnum0 else wnum1 end, 2, f) + when 'u', 'Ou'; emit_n(cwday, 1, f) + when 'V', 'OV'; emit_n(cweek, 2, f) + when 'v'; emit_a(strftime('%e-%b-%Y'), 0, f) + when 'w', 'Ow'; emit_n(wday, 1, f) + when 'X', 'EX'; emit_a(strftime('%H:%M:%S'), 0, f) + when 'x', 'Ex'; emit_a(strftime('%m/%d/%y'), 0, f) + when 'Y', 'EY'; emit_sn(year, 4, f) + when 'y', 'Ey', 'Oy'; emit_n(year % 100, 2, f) + when 'Z'; emit_au(strftime('%:z'), 0, f) + when /\A(:{0,3})z/ + t = $1.size + sign = if offset < 0 then -1 else +1 end + fr = offset.abs + ss = fr.div(SECONDS_IN_DAY) # 4p + hh, ss = ss.divmod(3600) + mm, ss = ss.divmod(60) + if t == 3 + if ss.nonzero? then t = 2 + elsif mm.nonzero? then t = 1 + else t = -1 + end + end + case t + when -1 + tail = [] + sep = '' + when 0 + f[:w] -= 2 if f[:w] + tail = ['%02d' % mm] + sep = '' + when 1 + f[:w] -= 3 if f[:w] + tail = ['%02d' % mm] + sep = ':' + when 2 + f[:w] -= 6 if f[:w] + tail = ['%02d' % mm, '%02d' % ss] + sep = ':' + end + ([emit_z(sign * hh, 2, f)] + tail).join(sep) + when '%'; emit_a('%', 0, f) + when '+'; emit_a(strftime('%a %b %e %H:%M:%S %Z %Y'), 0, f) + else + m + end + end + end + +# alias_method :format, :strftime + + def asctime() strftime('%c') end + + alias_method :ctime, :asctime + + def iso8601() strftime('%F') end + + def rfc3339() iso8601 end + + def xmlschema() iso8601 end # :nodoc: + + def rfc2822() strftime('%a, %-d %b %Y %T %z') end + + alias_method :rfc822, :rfc2822 + + def httpdate() new_offset(0).strftime('%a, %d %b %Y %T GMT') end # :nodoc: + + def jisx0301 + if jd < 2405160 + iso8601 + else + case jd + when 2405160...2419614 + g = 'M%02d' % (year - 1867) + when 2419614...2424875 + g = 'T%02d' % (year - 1911) + when 2424875...2447535 + g = 'S%02d' % (year - 1925) + else + g = 'H%02d' % (year - 1988) + end + g + strftime('.%m.%d') + end + end + +=begin + def beat(n=0) + i, f = (new_offset(HOURS_IN_DAY).day_fraction * 1000).divmod(1) + ('@%03d' % i) + + if n < 1 + '' + else + '.%0*d' % [n, (f / Rational(1, 10**n)).round] + end + end +=end + + def self.num_pattern? (s) # :nodoc: + /\A%[EO]?[CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy\d]/ =~ s || /\A\d/ =~ s + end + + private_class_method :num_pattern? + + def self._strptime_i(str, fmt, e) # :nodoc: + fmt.scan(/%([EO]?(?::{1,3}z|.))|(.)/m) do |s, c| + a = $& + if s + case s + when 'A', 'a' + return unless str.sub!(/\A(#{Format::DAYS.keys.join('|')})/io, '') || + str.sub!(/\A(#{Format::ABBR_DAYS.keys.join('|')})/io, '') + val = Format::DAYS[$1.downcase] || Format::ABBR_DAYS[$1.downcase] + return unless val + e.wday = val + when 'B', 'b', 'h' + return unless str.sub!(/\A(#{Format::MONTHS.keys.join('|')})/io, '') || + str.sub!(/\A(#{Format::ABBR_MONTHS.keys.join('|')})/io, '') + val = Format::MONTHS[$1.downcase] || Format::ABBR_MONTHS[$1.downcase] + return unless val + e.mon = val + when 'C', 'EC' + return unless str.sub!(if num_pattern?($') + then /\A([-+]?\d{1,2})/ + else /\A([-+]?\d{1,})/ + end, '') + val = $1.to_i + e._cent = val + when 'c', 'Ec' + return unless _strptime_i(str, '%a %b %e %H:%M:%S %Y', e) + when 'D' + return unless _strptime_i(str, '%m/%d/%y', e) + when 'd', 'e', 'Od', 'Oe' + return unless str.sub!(/\A( \d|\d{1,2})/, '') + val = $1.to_i + return unless (1..31) === val + e.mday = val + when 'F' + return unless _strptime_i(str, '%Y-%m-%d', e) + when 'G' + return unless str.sub!(if num_pattern?($') + then /\A([-+]?\d{1,4})/ + else /\A([-+]?\d{1,})/ + end, '') + val = $1.to_i + e.cwyear = val + when 'g' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (0..99) === val + e.cwyear = val + e._cent ||= if val >= 69 then 19 else 20 end + when 'H', 'k', 'OH' + return unless str.sub!(/\A( \d|\d{1,2})/, '') + val = $1.to_i + return unless (0..24) === val + e.hour = val + when 'I', 'l', 'OI' + return unless str.sub!(/\A( \d|\d{1,2})/, '') + val = $1.to_i + return unless (1..12) === val + e.hour = val + when 'j' + return unless str.sub!(/\A(\d{1,3})/, '') + val = $1.to_i + return unless (1..366) === val + e.yday = val + when 'L' + return unless str.sub!(if num_pattern?($') + then /\A([-+]?\d{1,3})/ + else /\A([-+]?\d{1,})/ + end, '') +# val = Rational($1.to_i, 10**3) + val = Rational($1.to_i, 10**$1.size) + e.sec_fraction = val + when 'M', 'OM' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (0..59) === val + e.min = val + when 'm', 'Om' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (1..12) === val + e.mon = val + when 'N' + return unless str.sub!(if num_pattern?($') + then /\A([-+]?\d{1,9})/ + else /\A([-+]?\d{1,})/ + end, '') +# val = Rational($1.to_i, 10**9) + val = Rational($1.to_i, 10**$1.size) + e.sec_fraction = val + when 'n', 't' + return unless _strptime_i(str, "\s", e) + when 'P', 'p' + return unless str.sub!(/\A([ap])(?:m\b|\.m\.)/i, '') + e._merid = if $1.downcase == 'a' then 0 else 12 end + when 'Q' + return unless str.sub!(/\A(-?\d{1,})/, '') + val = Rational($1.to_i, 10**3) + e.seconds = val + when 'R' + return unless _strptime_i(str, '%H:%M', e) + when 'r' + return unless _strptime_i(str, '%I:%M:%S %p', e) + when 'S', 'OS' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (0..60) === val + e.sec = val + when 's' + return unless str.sub!(/\A(-?\d{1,})/, '') + val = $1.to_i + e.seconds = val + when 'T' + return unless _strptime_i(str, '%H:%M:%S', e) + when 'U', 'W', 'OU', 'OW' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (0..53) === val + e.__send__(if s[-1,1] == 'U' then :wnum0= else :wnum1= end, val) + when 'u', 'Ou' + return unless str.sub!(/\A(\d{1})/, '') + val = $1.to_i + return unless (1..7) === val + e.cwday = val + when 'V', 'OV' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (1..53) === val + e.cweek = val + when 'v' + return unless _strptime_i(str, '%e-%b-%Y', e) + when 'w' + return unless str.sub!(/\A(\d{1})/, '') + val = $1.to_i + return unless (0..6) === val + e.wday = val + when 'X', 'EX' + return unless _strptime_i(str, '%H:%M:%S', e) + when 'x', 'Ex' + return unless _strptime_i(str, '%m/%d/%y', e) + when 'Y', 'EY' + return unless str.sub!(if num_pattern?($') + then /\A([-+]?\d{1,4})/ + else /\A([-+]?\d{1,})/ + end, '') + val = $1.to_i + e.year = val + when 'y', 'Ey', 'Oy' + return unless str.sub!(/\A(\d{1,2})/, '') + val = $1.to_i + return unless (0..99) === val + e.year = val + e._cent ||= if val >= 69 then 19 else 20 end + when 'Z', /\A:{0,3}z/ + return unless str.sub!(/\A((?:gmt|utc?)?[-+]\d+(?:[,.:]\d+(?::\d+)?)? + |[[:alpha:].\s]+(?:standard|daylight)\s+time\b + |[[:alpha:]]+(?:\s+dst)?\b + )/ix, '') + val = $1 + e.zone = val + offset = zone_to_diff(val) + e.offset = offset + when '%' + return unless str.sub!(/\A%/, '') + when '+' + return unless _strptime_i(str, '%a %b %e %H:%M:%S %Z %Y', e) + else + return unless str.sub!(Regexp.new('\\A' + Regexp.quote(a)), '') + end + else + case c + when /\A[\s\v]/ + str.sub!(/\A[\s\v]+/, '') + else + return unless str.sub!(Regexp.new('\\A' + Regexp.quote(a)), '') + end + end + end + end + + private_class_method :_strptime_i + + def self._strptime(str, fmt='%F') + str = str.dup + e = Format::Bag.new + return unless _strptime_i(str, fmt, e) + + if e._cent + if e.cwyear + e.cwyear += e._cent * 100 + end + if e.year + e. year += e._cent * 100 + end + end + + if e._merid + if e.hour + e.hour %= 12 + e.hour += e._merid + end + end + + unless str.empty? + e.leftover = str + end + + e.to_hash + end + + def self.s3e(e, y, m, d, bc=false) + unless String === m + m = m.to_s + end + + if y && m && !d + y, m, d = d, y, m + end + + if y == nil + if d && d.size > 2 + y = d + d = nil + end + if d && d[0,1] == "'" + y = d + d = nil + end + end + + if y + y.scan(/(\d+)(.+)?/) + if $2 + y, d = d, $1 + end + end + + if m + if m[0,1] == "'" || m.size > 2 + y, m, d = m, d, y # us -> be + end + end + + if d + if d[0,1] == "'" || d.size > 2 + y, d = d, y + end + end + + if y + y =~ /([-+])?(\d+)/ + if $1 || $2.size > 2 + c = false + end + iy = $&.to_i + if bc + iy = -iy + 1 + end + e.year = iy + end + + if m + m =~ /\d+/ + e.mon = $&.to_i + end + + if d + d =~ /\d+/ + e.mday = $&.to_i + end + + if c != nil + e._comp = c + end + + end + + private_class_method :s3e + + def self._parse_day(str, e) # :nodoc: + if str.sub!(/\b(#{Format::ABBR_DAYS.keys.join('|')})[^-\d\s]*/io, ' ') + e.wday = Format::ABBR_DAYS[$1.downcase] + true +=begin + elsif str.sub!(/\b(?!\dth)(su|mo|tu|we|th|fr|sa)\b/i, ' ') + e.wday = %w(su mo tu we th fr sa).index($1.downcase) + true +=end + end + end + + def self._parse_time(str, e) # :nodoc: + if str.sub!( + /( + (?: + \d+\s*:\s*\d+ + (?: + \s*:\s*\d+(?:[,.]\d*)? + )? + | + \d+\s*h(?:\s*\d+m?(?:\s*\d+s?)?)? + ) + (?: + \s* + [ap](?:m\b|\.m\.) + )? + | + \d+\s*[ap](?:m\b|\.m\.) + ) + (?: + \s* + ( + (?:gmt|utc?)?[-+]\d+(?:[,.:]\d+(?::\d+)?)? + | + [[:alpha:].\s]+(?:standard|daylight)\stime\b + | + [[:alpha:]]+(?:\sdst)?\b + ) + )? + /ix, + ' ') + + t = $1 + e.zone = $2 if $2 + + t =~ /\A(\d+)h? + (?:\s*:?\s*(\d+)m? + (?: + \s*:?\s*(\d+)(?:[,.](\d+))?s? + )? + )? + (?:\s*([ap])(?:m\b|\.m\.))?/ix + + e.hour = $1.to_i + e.min = $2.to_i if $2 + e.sec = $3.to_i if $3 + e.sec_fraction = Rational($4.to_i, 10**$4.size) if $4 + + if $5 + e.hour %= 12 + if $5.downcase == 'p' + e.hour += 12 + end + end + true + end + end + +=begin + def self._parse_beat(str, e) # :nodoc: + if str.sub!(/@\s*(\d+)(?:[,.](\d*))?/, ' ') + beat = Rational($1.to_i) + beat += Rational($2.to_i, 10**$2.size) if $2 + secs = Rational(beat, 1000) + h, min, s, fr = self.day_fraction_to_time(secs) + e.hour = h + e.min = min + e.sec = s + e.sec_fraction = fr * 86400 + e.zone = '+01:00' + true + end + end +=end + + def self._parse_eu(str, e) # :nodoc: + if str.sub!( + /'?(\d+)[^-\d\s]* + \s* + (#{Format::ABBR_MONTHS.keys.join('|')})[^-\d\s']* + (?: + \s* + (c(?:e|\.e\.)|b(?:ce|\.c\.e\.)|a(?:d|\.d\.)|b(?:c|\.c\.))? + \s* + ('?-?\d+(?:(?:st|nd|rd|th)\b)?) + )? + /iox, + ' ') # ' + s3e(e, $4, Format::ABBR_MONTHS[$2.downcase], $1, + $3 && $3[0,1].downcase == 'b') + true + end + end + + def self._parse_us(str, e) # :nodoc: + if str.sub!( + /\b(#{Format::ABBR_MONTHS.keys.join('|')})[^-\d\s']* + \s* + ('?\d+)[^-\d\s']* + (?: + \s* + (c(?:e|\.e\.)|b(?:ce|\.c\.e\.)|a(?:d|\.d\.)|b(?:c|\.c\.))? + \s* + ('?-?\d+) + )? + /iox, + ' ') # ' + s3e(e, $4, Format::ABBR_MONTHS[$1.downcase], $2, + $3 && $3[0,1].downcase == 'b') + true + end + end + + def self._parse_iso(str, e) # :nodoc: + if str.sub!(/('?[-+]?\d+)-(\d+)-('?-?\d+)/, ' ') + s3e(e, $1, $2, $3) + true + end + end + + def self._parse_iso2(str, e) # :nodoc: + if str.sub!(/\b(\d{2}|\d{4})?-?w(\d{2})(?:-?(\d))?\b/i, ' ') + e.cwyear = $1.to_i if $1 + e.cweek = $2.to_i + e.cwday = $3.to_i if $3 + true + elsif str.sub!(/-w-(\d)\b/i, ' ') + e.cwday = $1.to_i + true + elsif str.sub!(/--(\d{2})?-(\d{2})\b/, ' ') + e.mon = $1.to_i if $1 + e.mday = $2.to_i + true + elsif str.sub!(/--(\d{2})(\d{2})?\b/, ' ') + e.mon = $1.to_i + e.mday = $2.to_i if $2 + true + elsif /[,.](\d{2}|\d{4})-\d{3}\b/ !~ str && + str.sub!(/\b(\d{2}|\d{4})-(\d{3})\b/, ' ') + e.year = $1.to_i + e.yday = $2.to_i + true + elsif /\d-\d{3}\b/ !~ str && + str.sub!(/\b-(\d{3})\b/, ' ') + e.yday = $1.to_i + true + end + end + + def self._parse_jis(str, e) # :nodoc: + if str.sub!(/\b([mtsh])(\d+)\.(\d+)\.(\d+)/i, ' ') + era = { 'm'=>1867, + 't'=>1911, + 's'=>1925, + 'h'=>1988 + }[$1.downcase] + e.year = $2.to_i + era + e.mon = $3.to_i + e.mday = $4.to_i + true + end + end + + def self._parse_vms(str, e) # :nodoc: + if str.sub!(/('?-?\d+)-(#{Format::ABBR_MONTHS.keys.join('|')})[^-]* + -('?-?\d+)/iox, ' ') + s3e(e, $3, Format::ABBR_MONTHS[$2.downcase], $1) + true + elsif str.sub!(/\b(#{Format::ABBR_MONTHS.keys.join('|')})[^-]* + -('?-?\d+)(?:-('?-?\d+))?/iox, ' ') + s3e(e, $3, Format::ABBR_MONTHS[$1.downcase], $2) + true + end + end + + def self._parse_sla(str, e) # :nodoc: + if str.sub!(%r|('?-?\d+)/\s*('?\d+)(?:\D\s*('?-?\d+))?|, ' ') # ' + s3e(e, $1, $2, $3) + true + end + end + + def self._parse_dot(str, e) # :nodoc: + if str.sub!(%r|('?-?\d+)\.\s*('?\d+)\.\s*('?-?\d+)|, ' ') # ' + s3e(e, $1, $2, $3) + true + end + end + + def self._parse_year(str, e) # :nodoc: + if str.sub!(/'(\d+)\b/, ' ') + e.year = $1.to_i + true + end + end + + def self._parse_mon(str, e) # :nodoc: + if str.sub!(/\b(#{Format::ABBR_MONTHS.keys.join('|')})\S*/io, ' ') + e.mon = Format::ABBR_MONTHS[$1.downcase] + true + end + end + + def self._parse_mday(str, e) # :nodoc: + if str.sub!(/(\d+)(st|nd|rd|th)\b/i, ' ') + e.mday = $1.to_i + true + end + end + + def self._parse_ddd(str, e) # :nodoc: + if str.sub!( + /([-+]?)(\d{2,14}) + (?: + \s* + t? + \s* + (\d{2,6})?(?:[,.](\d*))? + )? + (?: + \s* + ( + z\b + | + [-+]\d{1,4}\b + | + \[[-+]?\d[^\]]*\] + ) + )? + /ix, + ' ') + case $2.size + when 2 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + else + e.mday = $2[ 0, 2].to_i + end + when 4 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + e.min = $2[-4, 2].to_i + else + e.mon = $2[ 0, 2].to_i + e.mday = $2[ 2, 2].to_i + end + when 6 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + e.min = $2[-4, 2].to_i + e.hour = $2[-6, 2].to_i + else + e.year = ($1 + $2[ 0, 2]).to_i + e.mon = $2[ 2, 2].to_i + e.mday = $2[ 4, 2].to_i + end + when 8, 10, 12, 14 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + e.min = $2[-4, 2].to_i + e.hour = $2[-6, 2].to_i + e.mday = $2[-8, 2].to_i + if $2.size >= 10 + e.mon = $2[-10, 2].to_i + end + if $2.size == 12 + e.year = ($1 + $2[-12, 2]).to_i + end + if $2.size == 14 + e.year = ($1 + $2[-14, 4]).to_i + e._comp = false + end + else + e.year = ($1 + $2[ 0, 4]).to_i + e.mon = $2[ 4, 2].to_i + e.mday = $2[ 6, 2].to_i + e.hour = $2[ 8, 2].to_i if $2.size >= 10 + e.min = $2[10, 2].to_i if $2.size >= 12 + e.sec = $2[12, 2].to_i if $2.size >= 14 + e._comp = false + end + when 3 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + e.min = $2[-3, 1].to_i + else + e.yday = $2[ 0, 3].to_i + end + when 5 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + e.min = $2[-4, 2].to_i + e.hour = $2[-5, 1].to_i + else + e.year = ($1 + $2[ 0, 2]).to_i + e.yday = $2[ 2, 3].to_i + end + when 7 + if $3.nil? && $4 + e.sec = $2[-2, 2].to_i + e.min = $2[-4, 2].to_i + e.hour = $2[-6, 2].to_i + e.mday = $2[-7, 1].to_i + else + e.year = ($1 + $2[ 0, 4]).to_i + e.yday = $2[ 4, 3].to_i + end + end + if $3 + if $4 + case $3.size + when 2, 4, 6 + e.sec = $3[-2, 2].to_i + e.min = $3[-4, 2].to_i if $3.size >= 4 + e.hour = $3[-6, 2].to_i if $3.size >= 6 + end + else + case $3.size + when 2, 4, 6 + e.hour = $3[ 0, 2].to_i + e.min = $3[ 2, 2].to_i if $3.size >= 4 + e.sec = $3[ 4, 2].to_i if $3.size >= 6 + end + end + end + if $4 + e.sec_fraction = Rational($4.to_i, 10**$4.size) + end + if $5 + e.zone = $5 + if e.zone[0,1] == '[' + o, n, = e.zone[1..-2].split(':') + e.zone = n || o + if /\A\d/ =~ o + o = format('+%s', o) + end + e.offset = zone_to_diff(o) + end + end + true + end + end + + private_class_method :_parse_day, :_parse_time, # :_parse_beat, + :_parse_eu, :_parse_us, :_parse_iso, :_parse_iso2, + :_parse_jis, :_parse_vms, :_parse_sla, :_parse_dot, + :_parse_year, :_parse_mon, :_parse_mday, :_parse_ddd + + def self._parse(str, comp=true) + str = str.dup + + e = Format::Bag.new + + e._comp = comp + + str.gsub!(/[^-+',.\/:@[:alnum:]\[\]]+/, ' ') + + _parse_time(str, e) # || _parse_beat(str, e) + _parse_day(str, e) + + _parse_eu(str, e) || + _parse_us(str, e) || + _parse_iso(str, e) || + _parse_jis(str, e) || + _parse_vms(str, e) || + _parse_sla(str, e) || + _parse_dot(str, e) || + _parse_iso2(str, e) || + _parse_year(str, e) || + _parse_mon(str, e) || + _parse_mday(str, e) || + _parse_ddd(str, e) + + if str.sub!(/\b(bc\b|bce\b|b\.c\.|b\.c\.e\.)/i, ' ') + if e.year + e.year = -e.year + 1 + end + end + + if str.sub!(/\A\s*(\d{1,2})\s*\z/, ' ') + if e.hour && !e.mday + v = $1.to_i + if (1..31) === v + e.mday = v + end + end + if e.mday && !e.hour + v = $1.to_i + if (0..24) === v + e.hour = v + end + end + end + + if e._comp + if e.cwyear + if e.cwyear >= 0 && e.cwyear <= 99 + e.cwyear += if e.cwyear >= 69 + then 1900 else 2000 end + end + end + if e.year + if e.year >= 0 && e.year <= 99 + e.year += if e.year >= 69 + then 1900 else 2000 end + end + end + end + + e.offset ||= zone_to_diff(e.zone) if e.zone + + e.to_hash + end + + def self._iso8601(str) # :nodoc: + if /\A\s*(([-+]?\d{2,}|-)-\d{2}-\d{2}| + ([-+]?\d{2,})?-\d{3}| + (\d{2}|\d{4})?-w\d{2}-\d| + -w-\d) + (t + \d{2}:\d{2}(:\d{2}([,.]\d+)?)? + (z|[-+]\d{2}(:?\d{2})?)?)?\s*\z/ix =~ str + _parse(str) + elsif /\A\s*(([-+]?(\d{2}|\d{4})|--)\d{2}\d{2}| + ([-+]?(\d{2}|\d{4}))?\d{3}|-\d{3}| + (\d{2}|\d{4})?w\d{2}\d) + (t? + \d{2}\d{2}(\d{2}([,.]\d+)?)? + (z|[-+]\d{2}(\d{2})?)?)?\s*\z/ix =~ str + _parse(str) + elsif /\A\s*(\d{2}:\d{2}(:\d{2}([,.]\d+)?)? + (z|[-+]\d{2}(:?\d{2})?)?)?\s*\z/ix =~ str + _parse(str) + elsif /\A\s*(\d{2}\d{2}(\d{2}([,.]\d+)?)? + (z|[-+]\d{2}(\d{2})?)?)?\s*\z/ix =~ str + _parse(str) + end + end + + def self._rfc3339(str) # :nodoc: + if /\A\s*-?\d{4}-\d{2}-\d{2} # allow minus, anyway + (t|\s) + \d{2}:\d{2}:\d{2}(\.\d+)? + (z|[-+]\d{2}:\d{2})\s*\z/ix =~ str + _parse(str) + end + end + + def self._xmlschema(str) # :nodoc: + if /\A\s*(-?\d{4,})(?:-(\d{2})(?:-(\d{2}))?)? + (?:t + (\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?)? + (z|[-+]\d{2}:\d{2})?\s*\z/ix =~ str + e = Format::Bag.new + e.year = $1.to_i + e.mon = $2.to_i if $2 + e.mday = $3.to_i if $3 + e.hour = $4.to_i if $4 + e.min = $5.to_i if $5 + e.sec = $6.to_i if $6 + e.sec_fraction = Rational($7.to_i, 10**$7.size) if $7 + if $8 + e.zone = $8 + e.offset = zone_to_diff($8) + end + e.to_hash + elsif /\A\s*(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))? + (z|[-+]\d{2}:\d{2})?\s*\z/ix =~ str + e = Format::Bag.new + e.hour = $1.to_i if $1 + e.min = $2.to_i if $2 + e.sec = $3.to_i if $3 + e.sec_fraction = Rational($4.to_i, 10**$4.size) if $4 + if $5 + e.zone = $5 + e.offset = zone_to_diff($5) + end + e.to_hash + elsif /\A\s*(?:--(\d{2})(?:-(\d{2}))?|---(\d{2})) + (z|[-+]\d{2}:\d{2})?\s*\z/ix =~ str + e = Format::Bag.new + e.mon = $1.to_i if $1 + e.mday = $2.to_i if $2 + e.mday = $3.to_i if $3 + if $4 + e.zone = $4 + e.offset = zone_to_diff($4) + end + e.to_hash + end + end + + def self._rfc2822(str) # :nodoc: + if /\A\s*(?:(?:#{Format::ABBR_DAYS.keys.join('|')})\s*,\s+)? + \d{1,2}\s+ + (?:#{Format::ABBR_MONTHS.keys.join('|')})\s+ + -?(\d{2,})\s+ # allow minus, anyway + \d{2}:\d{2}(:\d{2})?\s* + (?:[-+]\d{4}|ut|gmt|e[sd]t|c[sd]t|m[sd]t|p[sd]t|[a-ik-z])\s*\z/iox =~ str + e = _parse(str, false) + if $1.size < 4 + if e[:year] < 50 + e[:year] += 2000 + elsif e[:year] < 1000 + e[:year] += 1900 + end + end + e + end + end + + class << self; alias_method :_rfc822, :_rfc2822 end + + def self._httpdate(str) # :nodoc: + if /\A\s*(#{Format::ABBR_DAYS.keys.join('|')})\s*,\s+ + \d{2}\s+ + (#{Format::ABBR_MONTHS.keys.join('|')})\s+ + -?\d{4}\s+ # allow minus, anyway + \d{2}:\d{2}:\d{2}\s+ + gmt\s*\z/iox =~ str + _rfc2822(str) + elsif /\A\s*(#{Format::DAYS.keys.join('|')})\s*,\s+ + \d{2}\s*-\s* + (#{Format::ABBR_MONTHS.keys.join('|')})\s*-\s* + \d{2}\s+ + \d{2}:\d{2}:\d{2}\s+ + gmt\s*\z/iox =~ str + _parse(str) + elsif /\A\s*(#{Format::ABBR_DAYS.keys.join('|')})\s+ + (#{Format::ABBR_MONTHS.keys.join('|')})\s+ + \d{1,2}\s+ + \d{2}:\d{2}:\d{2}\s+ + \d{4}\s*\z/iox =~ str + _parse(str) + end + end + + def self._jisx0301(str) # :nodoc: + if /\A\s*[mtsh]?\d{2}\.\d{2}\.\d{2} + (t + (\d{2}:\d{2}(:\d{2}([,.]\d*)?)? + (z|[-+]\d{2}(:?\d{2})?)?)?)?\s*\z/ix =~ str + if /\A\s*\d/ =~ str + _parse(str.sub(/\A\s*(\d)/, 'h\1')) + else + _parse(str) + end + else + _iso8601(str) + end + end + + t = Module.new do + + private + + def zone_to_diff(zone) # :nodoc: + zone = zone.downcase + if zone.sub!(/\s+(standard|daylight)\s+time\z/, '') + dst = $1 == 'daylight' + else + dst = zone.sub!(/\s+dst\z/, '') + end + if Format::ZONES.include?(zone) + offset = Format::ZONES[zone] + offset += 3600 if dst + elsif zone.sub!(/\A(?:gmt|utc?)?([-+])/, '') + sign = $1 + if zone.include?(':') + hour, min, sec, = zone.split(':') + elsif zone.include?(',') || zone.include?('.') + hour, fr, = zone.split(/[,.]/) + min = Rational(fr.to_i, 10**fr.size) * 60 + else + case zone.size + when 3 + hour = zone[0,1] + min = zone[1,2] + else + hour = zone[0,2] + min = zone[2,2] + sec = zone[4,2] + end + end + offset = hour.to_i * 3600 + min.to_i * 60 + sec.to_i + offset *= -1 if sign == '-' + end + offset + end + + end + + extend t + include t + +end + +class DateTime < Date + + def strftime(fmt='%FT%T%:z') + super(fmt) + end + + def self._strptime(str, fmt='%FT%T%z') + super(str, fmt) + end + + def iso8601_timediv(n) # :nodoc: + strftime('T%T' + + if n < 1 + '' + else + '.%0*d' % [n, (sec_fraction / Rational(1, 10**n)).round] + end + + '%:z') + end + + private :iso8601_timediv + + def iso8601(n=0) + super() + iso8601_timediv(n) + end + + def rfc3339(n=0) iso8601(n) end + + def xmlschema(n=0) iso8601(n) end # :nodoc: + + def jisx0301(n=0) + super() + iso8601_timediv(n) + end + +end diff --git a/lib/debug.rb b/lib/debug.rb new file mode 100644 index 0000000..7bb1450 --- /dev/null +++ b/lib/debug.rb @@ -0,0 +1,907 @@ +# Copyright (C) 2000 Network Applied Communication Laboratory, Inc. +# Copyright (C) 2000 Information-technology Promotion Agency, Japan +# Copyright (C) 2000-2003 NAKAMURA, Hiroshi + +require 'continuation' + +if $SAFE > 0 + STDERR.print "-r debug.rb is not available in safe mode\n" + exit 1 +end + +require 'tracer' +require 'pp' + +class Tracer + def Tracer.trace_func(*vars) + Single.trace_func(*vars) + end +end + +SCRIPT_LINES__ = {} unless defined? SCRIPT_LINES__ + +class DEBUGGER__ +MUTEX = Mutex.new + +class Context + DEBUG_LAST_CMD = [] + + begin + require 'readline' + def readline(prompt, hist) + Readline::readline(prompt, hist) + end + rescue LoadError + def readline(prompt, hist) + STDOUT.print prompt + STDOUT.flush + line = STDIN.gets + exit unless line + line.chomp! + line + end + USE_READLINE = false + end + + def initialize + if Thread.current == Thread.main + @stop_next = 1 + else + @stop_next = 0 + end + @last_file = nil + @file = nil + @line = nil + @no_step = nil + @frames = [] + @finish_pos = 0 + @trace = false + @catch = "StandardError" + @suspend_next = false + end + + def stop_next(n=1) + @stop_next = n + end + + def set_suspend + @suspend_next = true + end + + def clear_suspend + @suspend_next = false + end + + def suspend_all + DEBUGGER__.suspend + end + + def resume_all + DEBUGGER__.resume + end + + def check_suspend + while MUTEX.synchronize { + if @suspend_next + DEBUGGER__.waiting.push Thread.current + @suspend_next = false + true + end + } + end + end + + def trace? + @trace + end + + def set_trace(arg) + @trace = arg + end + + def stdout + DEBUGGER__.stdout + end + + def break_points + DEBUGGER__.break_points + end + + def display + DEBUGGER__.display + end + + def context(th) + DEBUGGER__.context(th) + end + + def set_trace_all(arg) + DEBUGGER__.set_trace(arg) + end + + def set_last_thread(th) + DEBUGGER__.set_last_thread(th) + end + + def debug_eval(str, binding) + begin + val = eval(str, binding) + rescue StandardError, ScriptError => e + at = eval("caller(1)", binding) + stdout.printf "%s:%s\n", at.shift, e.to_s.sub(/\(eval\):1:(in `.*?':)?/, '') + for i in at + stdout.printf "\tfrom %s\n", i + end + throw :debug_error + end + end + + def debug_silent_eval(str, binding) + begin + eval(str, binding) + rescue StandardError, ScriptError + nil + end + end + + def var_list(ary, binding) + ary.sort! + for v in ary + stdout.printf " %s => %s\n", v, eval(v, binding).inspect + end + end + + def debug_variable_info(input, binding) + case input + when /^\s*g(?:lobal)?\s*$/ + var_list(global_variables, binding) + + when /^\s*l(?:ocal)?\s*$/ + var_list(eval("local_variables", binding), binding) + + when /^\s*i(?:nstance)?\s+/ + obj = debug_eval($', binding) + var_list(obj.instance_variables, obj.instance_eval{binding()}) + + when /^\s*c(?:onst(?:ant)?)?\s+/ + obj = debug_eval($', binding) + unless obj.kind_of? Module + stdout.print "Should be Class/Module: ", $', "\n" + else + var_list(obj.constants, obj.module_eval{binding()}) + end + end + end + + def debug_method_info(input, binding) + case input + when /^i(:?nstance)?\s+/ + obj = debug_eval($', binding) + + len = 0 + for v in obj.methods.sort + len += v.size + 1 + if len > 70 + len = v.size + 1 + stdout.print "\n" + end + stdout.print v, " " + end + stdout.print "\n" + + else + obj = debug_eval(input, binding) + unless obj.kind_of? Module + stdout.print "Should be Class/Module: ", input, "\n" + else + len = 0 + for v in obj.instance_methods(false).sort + len += v.size + 1 + if len > 70 + len = v.size + 1 + stdout.print "\n" + end + stdout.print v, " " + end + stdout.print "\n" + end + end + end + + def thnum + num = DEBUGGER__.instance_eval{@thread_list[Thread.current]} + unless num + DEBUGGER__.make_thread_list + num = DEBUGGER__.instance_eval{@thread_list[Thread.current]} + end + num + end + + def debug_command(file, line, id, binding) + MUTEX.lock + unless defined?($debugger_restart) and $debugger_restart + callcc{|c| $debugger_restart = c} + end + set_last_thread(Thread.current) + frame_pos = 0 + binding_file = file + binding_line = line + previous_line = nil + if ENV['EMACS'] + stdout.printf "\032\032%s:%d:\n", binding_file, binding_line + else + stdout.printf "%s:%d:%s", binding_file, binding_line, + line_at(binding_file, binding_line) + end + @frames[0] = [binding, file, line, id] + display_expressions(binding) + prompt = true + while prompt and input = readline("(rdb:%d) "%thnum(), true) + catch(:debug_error) do + if input == "" + next unless DEBUG_LAST_CMD[0] + input = DEBUG_LAST_CMD[0] + stdout.print input, "\n" + else + DEBUG_LAST_CMD[0] = input + end + + case input + when /^\s*tr(?:ace)?(?:\s+(on|off))?(?:\s+(all))?$/ + if defined?( $2 ) + if $1 == 'on' + set_trace_all true + else + set_trace_all false + end + elsif defined?( $1 ) + if $1 == 'on' + set_trace true + else + set_trace false + end + end + if trace? + stdout.print "Trace on.\n" + else + stdout.print "Trace off.\n" + end + + when /^\s*b(?:reak)?\s+(?:(.+):)?([^.:]+)$/ + pos = $2 + if $1 + klass = debug_silent_eval($1, binding) + file = $1 + end + if pos =~ /^\d+$/ + pname = pos + pos = pos.to_i + else + pname = pos = pos.intern.id2name + end + break_points.push [true, 0, klass || file, pos] + stdout.printf "Set breakpoint %d at %s:%s\n", break_points.size, klass || file, pname + + when /^\s*b(?:reak)?\s+(.+)[#.]([^.:]+)$/ + pos = $2.intern.id2name + klass = debug_eval($1, binding) + break_points.push [true, 0, klass, pos] + stdout.printf "Set breakpoint %d at %s.%s\n", break_points.size, klass, pos + + when /^\s*wat(?:ch)?\s+(.+)$/ + exp = $1 + break_points.push [true, 1, exp] + stdout.printf "Set watchpoint %d:%s\n", break_points.size, exp + + when /^\s*b(?:reak)?$/ + if break_points.find{|b| b[1] == 0} + n = 1 + stdout.print "Breakpoints:\n" + break_points.each do |b| + if b[0] and b[1] == 0 + stdout.printf " %d %s:%s\n", n, b[2], b[3] + end + n += 1 + end + end + if break_points.find{|b| b[1] == 1} + n = 1 + stdout.print "\n" + stdout.print "Watchpoints:\n" + for b in break_points + if b[0] and b[1] == 1 + stdout.printf " %d %s\n", n, b[2] + end + n += 1 + end + end + if break_points.size == 0 + stdout.print "No breakpoints\n" + else + stdout.print "\n" + end + + when /^\s*del(?:ete)?(?:\s+(\d+))?$/ + pos = $1 + unless pos + input = readline("Clear all breakpoints? (y/n) ", false) + if input == "y" + for b in break_points + b[0] = false + end + end + else + pos = pos.to_i + if break_points[pos-1] + break_points[pos-1][0] = false + else + stdout.printf "Breakpoint %d is not defined\n", pos + end + end + + when /^\s*disp(?:lay)?\s+(.+)$/ + exp = $1 + display.push [true, exp] + stdout.printf "%d: ", display.size + display_expression(exp, binding) + + when /^\s*disp(?:lay)?$/ + display_expressions(binding) + + when /^\s*undisp(?:lay)?(?:\s+(\d+))?$/ + pos = $1 + unless pos + input = readline("Clear all expressions? (y/n) ", false) + if input == "y" + for d in display + d[0] = false + end + end + else + pos = pos.to_i + if display[pos-1] + display[pos-1][0] = false + else + stdout.printf "Display expression %d is not defined\n", pos + end + end + + when /^\s*c(?:ont)?$/ + prompt = false + + when /^\s*s(?:tep)?(?:\s+(\d+))?$/ + if $1 + lev = $1.to_i + else + lev = 1 + end + @stop_next = lev + prompt = false + + when /^\s*n(?:ext)?(?:\s+(\d+))?$/ + if $1 + lev = $1.to_i + else + lev = 1 + end + @stop_next = lev + @no_step = @frames.size - frame_pos + prompt = false + + when /^\s*w(?:here)?$/, /^\s*f(?:rame)?$/ + display_frames(frame_pos) + + when /^\s*l(?:ist)?(?:\s+(.+))?$/ + if not $1 + b = previous_line ? previous_line + 10 : binding_line - 5 + e = b + 9 + elsif $1 == '-' + b = previous_line ? previous_line - 10 : binding_line - 5 + e = b + 9 + else + b, e = $1.split(/[-,]/) + if e + b = b.to_i + e = e.to_i + else + b = b.to_i - 5 + e = b + 9 + end + end + previous_line = b + display_list(b, e, binding_file, binding_line) + + when /^\s*up(?:\s+(\d+))?$/ + previous_line = nil + if $1 + lev = $1.to_i + else + lev = 1 + end + frame_pos += lev + if frame_pos >= @frames.size + frame_pos = @frames.size - 1 + stdout.print "At toplevel\n" + end + binding, binding_file, binding_line = @frames[frame_pos] + stdout.print format_frame(frame_pos) + + when /^\s*down(?:\s+(\d+))?$/ + previous_line = nil + if $1 + lev = $1.to_i + else + lev = 1 + end + frame_pos -= lev + if frame_pos < 0 + frame_pos = 0 + stdout.print "At stack bottom\n" + end + binding, binding_file, binding_line = @frames[frame_pos] + stdout.print format_frame(frame_pos) + + when /^\s*fin(?:ish)?$/ + if frame_pos == @frames.size + stdout.print "\"finish\" not meaningful in the outermost frame.\n" + else + @finish_pos = @frames.size - frame_pos + frame_pos = 0 + prompt = false + end + + when /^\s*cat(?:ch)?(?:\s+(.+))?$/ + if $1 + excn = $1 + if excn == 'off' + @catch = nil + stdout.print "Clear catchpoint.\n" + else + @catch = excn + stdout.printf "Set catchpoint %s.\n", @catch + end + else + if @catch + stdout.printf "Catchpoint %s.\n", @catch + else + stdout.print "No catchpoint.\n" + end + end + + when /^\s*q(?:uit)?$/ + input = readline("Really quit? (y/n) ", false) + if input == "y" + exit! # exit -> exit!: No graceful way to stop threads... + end + + when /^\s*v(?:ar)?\s+/ + debug_variable_info($', binding) + + when /^\s*m(?:ethod)?\s+/ + debug_method_info($', binding) + + when /^\s*th(?:read)?\s+/ + if DEBUGGER__.debug_thread_info($', binding) == :cont + prompt = false + end + + when /^\s*pp\s+/ + PP.pp(debug_eval($', binding), stdout) + + when /^\s*p\s+/ + stdout.printf "%s\n", debug_eval($', binding).inspect + + when /^\s*r(?:estart)?$/ + $debugger_restart.call + + when /^\s*h(?:elp)?$/ + debug_print_help() + + else + v = debug_eval(input, binding) + stdout.printf "%s\n", v.inspect + end + end + end + MUTEX.unlock + resume_all + end + + def debug_print_help + stdout.print < + b[reak] [class.] + set breakpoint to some position + wat[ch] set watchpoint to some expression + cat[ch] (|off) set catchpoint to an exception + b[reak] list breakpoints + cat[ch] show catchpoint + del[ete][ nnn] delete some or all breakpoints + disp[lay] add expression into display expression list + undisp[lay][ nnn] delete one particular or all display expressions + c[ont] run until program ends or hit breakpoint + s[tep][ nnn] step (into methods) one line or till line nnn + n[ext][ nnn] go over one line or till line nnn + w[here] display frames + f[rame] alias for where + l[ist][ (-|nn-mm)] list program, - lists backwards + nn-mm lists given lines + up[ nn] move to higher frame + down[ nn] move to lower frame + fin[ish] return to outer frame + tr[ace] (on|off) set trace mode of current thread + tr[ace] (on|off) all set trace mode of all threads + q[uit] exit from debugger + v[ar] g[lobal] show global variables + v[ar] l[ocal] show local variables + v[ar] i[nstance] show instance variables of object + v[ar] c[onst] show constants of object + m[ethod] i[nstance] show methods of object + m[ethod] show instance methods of class or module + th[read] l[ist] list all threads + th[read] c[ur[rent]] show current thread + th[read] [sw[itch]] switch thread context to nnn + th[read] stop stop thread nnn + th[read] resume resume thread nnn + p expression evaluate expression and print its value + h[elp] print this help + evaluate +EOHELP + end + + def display_expressions(binding) + n = 1 + for d in display + if d[0] + stdout.printf "%d: ", n + display_expression(d[1], binding) + end + n += 1 + end + end + + def display_expression(exp, binding) + stdout.printf "%s = %s\n", exp, debug_silent_eval(exp, binding).to_s + end + + def frame_set_pos(file, line) + if @frames[0] + @frames[0][1] = file + @frames[0][2] = line + end + end + + def display_frames(pos) + 0.upto(@frames.size - 1) do |n| + if n == pos + stdout.print "--> " + else + stdout.print " " + end + stdout.print format_frame(n) + end + end + + def format_frame(pos) + bind, file, line, id = @frames[pos] + sprintf "#%d %s:%s%s\n", pos + 1, file, line, + (id ? ":in `#{id.id2name}'" : "") + end + + def display_list(b, e, file, line) + stdout.printf "[%d, %d] in %s\n", b, e, file + if lines = SCRIPT_LINES__[file] and lines != true + b.upto(e) do |n| + if n > 0 && lines[n-1] + if n == line + stdout.printf "=> %d %s\n", n, lines[n-1].chomp + else + stdout.printf " %d %s\n", n, lines[n-1].chomp + end + end + end + else + stdout.printf "No sourcefile available for %s\n", file + end + end + + def line_at(file, line) + lines = SCRIPT_LINES__[file] + if lines + return "\n" if lines == true + line = lines[line-1] + return "\n" unless line + return line + end + return "\n" + end + + def debug_funcname(id) + if id.nil? + "toplevel" + else + id.id2name + end + end + + def check_break_points(file, klass, pos, binding, id) + return false if break_points.empty? + n = 1 + for b in break_points + if b[0] # valid + if b[1] == 0 # breakpoint + if (b[2] == file and b[3] == pos) or + (klass and b[2] == klass and b[3] == pos) + stdout.printf "Breakpoint %d, %s at %s:%s\n", n, debug_funcname(id), file, pos + return true + end + elsif b[1] == 1 # watchpoint + if debug_silent_eval(b[2], binding) + stdout.printf "Watchpoint %d, %s at %s:%s\n", n, debug_funcname(id), file, pos + return true + end + end + end + n += 1 + end + return false + end + + def excn_handle(file, line, id, binding) + if $!.class <= SystemExit + set_trace_func nil + exit + end + + if @catch and ($!.class.ancestors.find { |e| e.to_s == @catch }) + stdout.printf "%s:%d: `%s' (%s)\n", file, line, $!, $!.class + fs = @frames.size + tb = caller(0)[-fs..-1] + if tb + for i in tb + stdout.printf "\tfrom %s\n", i + end + end + suspend_all + debug_command(file, line, id, binding) + end + end + + def trace_func(event, file, line, id, binding, klass) + Tracer.trace_func(event, file, line, id, binding, klass) if trace? + context(Thread.current).check_suspend + @file = file + @line = line + case event + when 'line' + frame_set_pos(file, line) + if !@no_step or @frames.size == @no_step + @stop_next -= 1 + @stop_next = -1 if @stop_next < 0 + elsif @frames.size < @no_step + @stop_next = 0 # break here before leaving... + else + # nothing to do. skipped. + end + if @stop_next == 0 or check_break_points(file, nil, line, binding, id) + @no_step = nil + suspend_all + debug_command(file, line, id, binding) + end + + when 'call' + @frames.unshift [binding, file, line, id] + if check_break_points(file, klass, id.id2name, binding, id) + suspend_all + debug_command(file, line, id, binding) + end + + when 'c-call' + frame_set_pos(file, line) + + when 'class' + @frames.unshift [binding, file, line, id] + + when 'return', 'end' + if @frames.size == @finish_pos + @stop_next = 1 + @finish_pos = 0 + end + @frames.shift + + when 'raise' + excn_handle(file, line, id, binding) + + end + @last_file = file + end +end + +trap("INT") { DEBUGGER__.interrupt } +@last_thread = Thread::main +@max_thread = 1 +@thread_list = {Thread::main => 1} +@break_points = [] +@display = [] +@waiting = [] +@stdout = STDOUT + +class << DEBUGGER__ + def stdout + @stdout + end + + def stdout=(s) + @stdout = s + end + + def display + @display + end + + def break_points + @break_points + end + + def waiting + @waiting + end + + def set_trace( arg ) + MUTEX.synchronize do + make_thread_list + for th, in @thread_list + context(th).set_trace arg + end + end + arg + end + + def set_last_thread(th) + @last_thread = th + end + + def suspend + MUTEX.synchronize do + make_thread_list + for th, in @thread_list + next if th == Thread.current + context(th).set_suspend + end + end + # Schedule other threads to suspend as soon as possible. + Thread.pass + end + + def resume + MUTEX.synchronize do + make_thread_list + @thread_list.each do |th,| + next if th == Thread.current + context(th).clear_suspend + end + waiting.each do |th| + th.run + end + waiting.clear + end + # Schedule other threads to restart as soon as possible. + Thread.pass + end + + def context(thread=Thread.current) + c = thread[:__debugger_data__] + unless c + thread[:__debugger_data__] = c = Context.new + end + c + end + + def interrupt + context(@last_thread).stop_next + end + + def get_thread(num) + th = @thread_list.key(num) + unless th + @stdout.print "No thread ##{num}\n" + throw :debug_error + end + th + end + + def thread_list(num) + th = get_thread(num) + if th == Thread.current + @stdout.print "+" + else + @stdout.print " " + end + @stdout.printf "%d ", num + @stdout.print th.inspect, "\t" + file = context(th).instance_eval{@file} + if file + @stdout.print file,":",context(th).instance_eval{@line} + end + @stdout.print "\n" + end + + def thread_list_all + for th in @thread_list.values.sort + thread_list(th) + end + end + + def make_thread_list + hash = {} + for th in Thread::list + if @thread_list.key? th + hash[th] = @thread_list[th] + else + @max_thread += 1 + hash[th] = @max_thread + end + end + @thread_list = hash + end + + def debug_thread_info(input, binding) + case input + when /^l(?:ist)?/ + make_thread_list + thread_list_all + + when /^c(?:ur(?:rent)?)?$/ + make_thread_list + thread_list(@thread_list[Thread.current]) + + when /^(?:sw(?:itch)?\s+)?(\d+)/ + make_thread_list + th = get_thread($1.to_i) + if th == Thread.current + @stdout.print "It's the current thread.\n" + else + thread_list(@thread_list[th]) + context(th).stop_next + th.run + return :cont + end + + when /^stop\s+(\d+)/ + make_thread_list + th = get_thread($1.to_i) + if th == Thread.current + @stdout.print "It's the current thread.\n" + elsif th.stop? + @stdout.print "Already stopped.\n" + else + thread_list(@thread_list[th]) + context(th).suspend + end + + when /^resume\s+(\d+)/ + make_thread_list + th = get_thread($1.to_i) + if th == Thread.current + @stdout.print "It's the current thread.\n" + elsif !th.stop? + @stdout.print "Already running." + else + thread_list(@thread_list[th]) + th.run + end + end + end +end + +stdout.printf "Debug.rb\n" +stdout.printf "Emacs support available.\n\n" +RubyVM::InstructionSequence.compile_option = { + trace_instruction: true +} +set_trace_func proc { |event, file, line, id, binding, klass, *rest| + DEBUGGER__.context.trace_func event, file, line, id, binding, klass +} +end diff --git a/lib/delegate.rb b/lib/delegate.rb new file mode 100644 index 0000000..cb16adb --- /dev/null +++ b/lib/delegate.rb @@ -0,0 +1,311 @@ +# = delegate -- Support for the Delegation Pattern +# +# Documentation by James Edward Gray II and Gavin Sinclair +# +# == Introduction +# +# This library provides three different ways to delegate method calls to an +# object. The easiest to use is SimpleDelegator. Pass an object to the +# constructor and all methods supported by the object will be delegated. This +# object can be changed later. +# +# Going a step further, the top level DelegateClass method allows you to easily +# setup delegation through class inheritance. This is considerably more +# flexible and thus probably the most common use for this library. +# +# Finally, if you need full control over the delegation scheme, you can inherit +# from the abstract class Delegator and customize as needed. (If you find +# yourself needing this control, have a look at _forwardable_, also in the +# standard library. It may suit your needs better.) +# +# == Notes +# +# Be advised, RDoc will not detect delegated methods. +# +# delegate.rb provides full-class delegation via the +# DelegateClass() method. For single-method delegation via +# def_delegator(), see forwardable.rb. +# +# == Examples +# +# === SimpleDelegator +# +# Here's a simple example that takes advantage of the fact that +# SimpleDelegator's delegation object can be changed at any time. +# +# class Stats +# def initialize +# @source = SimpleDelegator.new([]) +# end +# +# def stats( records ) +# @source.__setobj__(records) +# +# "Elements: #{@source.size}\n" + +# " Non-Nil: #{@source.compact.size}\n" + +# " Unique: #{@source.uniq.size}\n" +# end +# end +# +# s = Stats.new +# puts s.stats(%w{James Edward Gray II}) +# puts +# puts s.stats([1, 2, 3, nil, 4, 5, 1, 2]) +# +# Prints: +# +# Elements: 4 +# Non-Nil: 4 +# Unique: 4 +# +# Elements: 8 +# Non-Nil: 7 +# Unique: 6 +# +# === DelegateClass() +# +# Here's a sample of use from tempfile.rb. +# +# A _Tempfile_ object is really just a _File_ object with a few special rules +# about storage location and/or when the File should be deleted. That makes for +# an almost textbook perfect example of how to use delegation. +# +# class Tempfile < DelegateClass(File) +# # constant and class member data initialization... +# +# def initialize(basename, tmpdir=Dir::tmpdir) +# # build up file path/name in var tmpname... +# +# @tmpfile = File.open(tmpname, File::RDWR|File::CREAT|File::EXCL, 0600) +# +# # ... +# +# super(@tmpfile) +# +# # below this point, all methods of File are supported... +# end +# +# # ... +# end +# +# === Delegator +# +# SimpleDelegator's implementation serves as a nice example here. +# +# class SimpleDelegator < Delegator +# def initialize(obj) +# super # pass obj to Delegator constructor, required +# @delegate_sd_obj = obj # store obj for future use +# end +# +# def __getobj__ +# @delegate_sd_obj # return object we are delegating to, required +# end +# +# def __setobj__(obj) +# @delegate_sd_obj = obj # change delegation object, a feature we're providing +# end +# +# # ... +# end + +# +# Delegator is an abstract class used to build delegator pattern objects from +# subclasses. Subclasses should redefine \_\_getobj\_\_. For a concrete +# implementation, see SimpleDelegator. +# +class Delegator + [:to_s,:inspect,:=~,:!~,:===].each do |m| + undef_method m + end + + # + # Pass in the _obj_ to delegate method calls to. All methods supported by + # _obj_ will be delegated to. + # + def initialize(obj) + __setobj__(obj) + end + + # Handles the magic of delegation through \_\_getobj\_\_. + def method_missing(m, *args, &block) + begin + target = self.__getobj__ + unless target.respond_to?(m) + super(m, *args, &block) + else + target.__send__(m, *args, &block) + end + rescue Exception + $@.delete_if{|s| %r"\A#{Regexp.quote(__FILE__)}:\d+:in `method_missing'\z"o =~ s} + ::Kernel::raise + end + end + + # + # Checks for a method provided by this the delegate object by fowarding the + # call through \_\_getobj\_\_. + # + def respond_to?(m, include_private = false) + return true if super + return self.__getobj__.respond_to?(m, include_private) + end + + # + # Returns true if two objects are considered same. + # + def ==(obj) + return true if obj.equal?(self) + self.__getobj__ == obj + end + + # + # This method must be overridden by subclasses and should return the object + # method calls are being delegated to. + # + def __getobj__ + raise NotImplementedError, "need to define `__getobj__'" + end + + # + # This method must be overridden by subclasses and change the object delegate + # to _obj_. + # + def __setobj__(obj) + raise NotImplementedError, "need to define `__setobj__'" + end + + # Serialization support for the object returned by \_\_getobj\_\_. + def marshal_dump + __getobj__ + end + # Reinitializes delegation from a serialized object. + def marshal_load(obj) + __setobj__(obj) + end + + # Clone support for the object returned by \_\_getobj\_\_. + def clone + new = super + new.__setobj__(__getobj__.clone) + new + end + # Duplication support for the object returned by \_\_getobj\_\_. + def dup + new = super + new.__setobj__(__getobj__.dup) + new + end +end + +# +# A concrete implementation of Delegator, this class provides the means to +# delegate all supported method calls to the object passed into the constructor +# and even to change the object being delegated to at a later time with +# \_\_setobj\_\_ . +# +class SimpleDelegator Edward + # names.__setobj__(%w{Gavin Sinclair}) + # puts names[1] # => Sinclair + # + def __setobj__(obj) + raise ArgumentError, "cannot delegate to self" if self.equal?(obj) + @delegate_sd_obj = obj + end +end + +# :stopdoc: +def Delegator.delegating_block(mid) + lambda do |*args, &block| + begin + __getobj__.__send__(mid, *args, &block) + rescue + re = /\A#{Regexp.quote(__FILE__)}:#{__LINE__-2}:/o + $!.backtrace.delete_if {|t| re =~ t} + raise + end + end +end +# :startdoc: + +# +# The primary interface to this library. Use to setup delegation when defining +# your class. +# +# class MyClass < DelegateClass( ClassToDelegateTo ) # Step 1 +# def initialize +# super(obj_of_ClassToDelegateTo) # Step 2 +# end +# end +# +def DelegateClass(superclass) + klass = Class.new(Delegator) + methods = superclass.public_instance_methods(true) + methods -= ::Delegator.public_instance_methods + methods -= [:to_s,:inspect,:=~,:!~,:===] + klass.module_eval { + def __getobj__ # :nodoc: + @delegate_dc_obj + end + def __setobj__(obj) # :nodoc: + raise ArgumentError, "cannot delegate to self" if self.equal?(obj) + @delegate_dc_obj = obj + end + } + klass.module_eval do + methods.each do |method| + define_method(method, Delegator.delegating_block(method)) + end + end + return klass +end + +# :enddoc: + +if __FILE__ == $0 + class ExtArray true + p foo2.iter{[55,true]} # => true + foo2.error # raise error! +end diff --git a/lib/drb.rb b/lib/drb.rb new file mode 100644 index 0000000..93cc811 --- /dev/null +++ b/lib/drb.rb @@ -0,0 +1,2 @@ +require 'drb/drb' + diff --git a/lib/e2mmap.rb b/lib/e2mmap.rb new file mode 100644 index 0000000..b8d1d44 --- /dev/null +++ b/lib/e2mmap.rb @@ -0,0 +1,172 @@ +# +# e2mmap.rb - for ruby 1.1 +# $Release Version: 2.0$ +# $Revision: 1.10 $ +# by Keiju ISHITSUKA +# +# -- +# Usage: +# +# U1) +# class Foo +# extend Exception2MessageMapper +# def_e2message ExistingExceptionClass, "message..." +# def_exception :NewExceptionClass, "message..."[, superclass] +# ... +# end +# +# U2) +# module Error +# extend Exception2MessageMapper +# def_e2meggage ExistingExceptionClass, "message..." +# def_exception :NewExceptionClass, "message..."[, superclass] +# ... +# end +# class Foo +# include Error +# ... +# end +# +# foo = Foo.new +# foo.Fail .... +# +# U3) +# module Error +# extend Exception2MessageMapper +# def_e2message ExistingExceptionClass, "message..." +# def_exception :NewExceptionClass, "message..."[, superclass] +# ... +# end +# class Foo +# extend Exception2MessageMapper +# include Error +# ... +# end +# +# Foo.Fail NewExceptionClass, arg... +# Foo.Fail ExistingExceptionClass, arg... +# +# +module Exception2MessageMapper + @RCS_ID='-$Id: e2mmap.rb,v 1.10 1999/02/17 12:33:17 keiju Exp keiju $-' + + E2MM = Exception2MessageMapper + + def E2MM.extend_object(cl) + super + cl.bind(self) unless cl < E2MM + end + + def bind(cl) + self.module_eval %[ + def Raise(err = nil, *rest) + Exception2MessageMapper.Raise(self.class, err, *rest) + end + alias Fail Raise + + def self.included(mod) + mod.extend Exception2MessageMapper + end + ] + end + + # Fail(err, *rest) + # err: exception + # rest: message arguments + # + def Raise(err = nil, *rest) + E2MM.Raise(self, err, *rest) + end + alias Fail Raise + alias fail Raise + + # def_e2message(c, m) + # c: exception + # m: message_form + # define exception c with message m. + # + def def_e2message(c, m) + E2MM.def_e2message(self, c, m) + end + + # def_exception(n, m, s) + # n: exception_name + # m: message_form + # s: superclass(default: StandardError) + # define exception named ``c'' with message m. + # + def def_exception(n, m, s = StandardError) + E2MM.def_exception(self, n, m, s) + end + + # + # Private definitions. + # + # {[class, exp] => message, ...} + @MessageMap = {} + + # E2MM.def_e2message(k, e, m) + # k: class to define exception under. + # e: exception + # m: message_form + # define exception c with message m. + # + def E2MM.def_e2message(k, c, m) + E2MM.instance_eval{@MessageMap[[k, c]] = m} + c + end + + # E2MM.def_exception(k, n, m, s) + # k: class to define exception under. + # n: exception_name + # m: message_form + # s: superclass(default: StandardError) + # define exception named ``c'' with message m. + # + def E2MM.def_exception(k, n, m, s = StandardError) + n = n.id2name if n.kind_of?(Fixnum) + e = Class.new(s) + E2MM.instance_eval{@MessageMap[[k, e]] = m} + k.const_set(n, e) + end + + # Fail(klass, err, *rest) + # klass: class to define exception under. + # err: exception + # rest: message arguments + # + def E2MM.Raise(klass = E2MM, err = nil, *rest) + if form = e2mm_message(klass, err) + b = $@.nil? ? caller(1) : $@ + #p $@ + #p __FILE__ + b.shift if b[0] =~ /^#{Regexp.quote(__FILE__)}:/ + raise err, sprintf(form, *rest), b + else + E2MM.Fail E2MM, ErrNotRegisteredException, err.inspect + end + end + class < + EOF + puts template.result(binding) + +Prints: The value of x is: 42 + +More complex examples are given below. + + +== Recognized Tags + +ERB recognizes certain tags in the provided template and converts them based +on the rules below: + + <% Ruby code -- inline with output %> + <%= Ruby expression -- replace with result %> + <%# comment -- ignored -- useful in testing %> + % a line of Ruby code -- treated as <% line %> (optional -- see ERB.new) + %% replaced with % if first thing on a line and % processing is used + <%% or %%> -- replace with <% or %> respectively + +All other text is passed through ERB filtering unchanged. + + +== Options + +There are several settings you can change when you use ERB: +* the nature of the tags that are recognized; +* the value of $SAFE under which the template is run; +* the binding used to resolve local variables in the template. + +See the ERB.new and ERB#result methods for more detail. + +== Character encodings + +ERB (or ruby code generated by ERB) returns a string in the same +character encoding as the input string. When the input string has +a magic comment, however, it returns a string in the encoding specified +by the magic comment. + + # -*- coding: UTF-8 -*- + require 'erb' + + template = ERB.new < + \_\_ENCODING\_\_ is <%= \_\_ENCODING\_\_ %>. + EOF + puts template.result + +Prints: \_\_ENCODING\_\_ is Big5. + + +== Examples + +=== Plain Text + +ERB is useful for any generic templating situation. Note that in this example, we use the +convenient "% at start of line" tag, and we quote the template literally with +%q{...} to avoid trouble with the backslash. + + require "erb" + + # Create template. + template = %q{ + From: James Edward Gray II + To: <%= to %> + Subject: Addressing Needs + + <%= to[/\w+/] %>: + + Just wanted to send a quick note assuring that your needs are being + addressed. + + I want you to know that my team will keep working on the issues, + especially: + + <%# ignore numerous minor requests -- focus on priorities %> + % priorities.each do |priority| + * <%= priority %> + % end + + Thanks for your patience. + + James Edward Gray II + }.gsub(/^ /, '') + + message = ERB.new(template, 0, "%<>") + + # Set up template data. + to = "Community Spokesman " + priorities = [ "Run Ruby Quiz", + "Document Modules", + "Answer Questions on Ruby Talk" ] + + # Produce result. + email = message.result + puts email + +Generates: + + From: James Edward Gray II + To: Community Spokesman + Subject: Addressing Needs + + Community: + + Just wanted to send a quick note assuring that your needs are being addressed. + + I want you to know that my team will keep working on the issues, especially: + + * Run Ruby Quiz + * Document Modules + * Answer Questions on Ruby Talk + + Thanks for your patience. + + James Edward Gray II + +=== Ruby in HTML + +ERB is often used in .rhtml files (HTML with embedded Ruby). Notice the need in +this example to provide a special binding when the template is run, so that the instance +variables in the Product object can be resolved. + + require "erb" + + # Build template data class. + class Product + def initialize( code, name, desc, cost ) + @code = code + @name = name + @desc = desc + @cost = cost + + @features = [ ] + end + + def add_feature( feature ) + @features << feature + end + + # Support templating of member data. + def get_binding + binding + end + + # ... + end + + # Create template. + template = %{ + + Ruby Toys -- <%= @name %> + + +

<%= @name %> (<%= @code %>)

+

<%= @desc %>

+ +
    + <% @features.each do |f| %> +
  • <%= f %>
  • + <% end %> +
+ +

+ <% if @cost < 10 %> + Only <%= @cost %>!!! + <% else %> + Call for a price, today! + <% end %> +

+ + + + }.gsub(/^ /, '') + + rhtml = ERB.new(template) + + # Set up template data. + toy = Product.new( "TZ-1002", + "Rubysapien", + "Geek's Best Friend! Responds to Ruby commands...", + 999.95 ) + toy.add_feature("Listens for verbal commands in the Ruby language!") + toy.add_feature("Ignores Perl, Java, and all C variants.") + toy.add_feature("Karate-Chop Action!!!") + toy.add_feature("Matz signature on left leg.") + toy.add_feature("Gem studded eyes... Rubies, of course!") + + # Produce result. + rhtml.run(toy.get_binding) + +Generates (some blank lines removed): + + + Ruby Toys -- Rubysapien + + +

Rubysapien (TZ-1002)

+

Geek's Best Friend! Responds to Ruby commands...

+ +
    +
  • Listens for verbal commands in the Ruby language!
  • +
  • Ignores Perl, Java, and all C variants.
  • +
  • Karate-Chop Action!!!
  • +
  • Matz signature on left leg.
  • +
  • Gem studded eyes... Rubies, of course!
  • +
+ +

+ Call for a price, today! +

+ + + + + +== Notes + +There are a variety of templating solutions available in various Ruby projects: +* ERB's big brother, eRuby, works the same but is written in C for speed; +* Amrita (smart at producing HTML/XML); +* cs/Template (written in C for speed); +* RDoc, distributed with Ruby, uses its own template engine, which can be reused elsewhere; +* and others; search the RAA. + +Rails, the web application framework, uses ERB to create views. +=end +class ERB + Revision = '$Date:: 2009-01-17 21:20:08 +0900#$' #' + + # Returns revision information for the erb.rb module. + def self.version + "erb.rb [2.1.0 #{ERB::Revision.split[1]}]" + end +end + +#-- +# ERB::Compiler +class ERB + class Compiler # :nodoc: + class PercentLine # :nodoc: + def initialize(str) + @value = str + end + attr_reader :value + alias :to_s :value + + def empty? + @value.empty? + end + end + + class Scanner # :nodoc: + @scanner_map = {} + def self.regist_scanner(klass, trim_mode, percent) + @scanner_map[[trim_mode, percent]] = klass + end + + def self.default_scanner=(klass) + @default_scanner = klass + end + + def self.make_scanner(src, trim_mode, percent) + klass = @scanner_map.fetch([trim_mode, percent], @default_scanner) + klass.new(src, trim_mode, percent) + end + + def initialize(src, trim_mode, percent) + @src = src + @stag = nil + end + attr_accessor :stag + + def scan; end + end + + class TrimScanner < Scanner # :nodoc: + def initialize(src, trim_mode, percent) + super + @trim_mode = trim_mode + @percent = percent + if @trim_mode == '>' + @scan_line = self.method(:trim_line1) + elsif @trim_mode == '<>' + @scan_line = self.method(:trim_line2) + elsif @trim_mode == '-' + @scan_line = self.method(:explicit_trim_line) + else + @scan_line = self.method(:scan_line) + end + end + attr_accessor :stag + + def scan(&block) + @stag = nil + if @percent + @src.each_line do |line| + percent_line(line, &block) + end + else + @scan_line.call(@src, &block) + end + nil + end + + def percent_line(line, &block) + if @stag || line[0] != ?% + return @scan_line.call(line, &block) + end + + line[0] = '' + if line[0] == ?% + @scan_line.call(line, &block) + else + yield(PercentLine.new(line.chomp)) + end + end + + def scan_line(line) + line.scan(/(.*?)(<%%|%%>|<%=|<%#|<%|%>|\n|\z)/m) do |tokens| + tokens.each do |token| + next if token.empty? + yield(token) + end + end + end + + def trim_line1(line) + line.scan(/(.*?)(<%%|%%>|<%=|<%#|<%|%>\n|%>|\n|\z)/m) do |tokens| + tokens.each do |token| + next if token.empty? + if token == "%>\n" + yield('%>') + yield(:cr) + else + yield(token) + end + end + end + end + + def trim_line2(line) + head = nil + line.scan(/(.*?)(<%%|%%>|<%=|<%#|<%|%>\n|%>|\n|\z)/m) do |tokens| + tokens.each do |token| + next if token.empty? + head = token unless head + if token == "%>\n" + yield('%>') + if is_erb_stag?(head) + yield(:cr) + else + yield("\n") + end + head = nil + else + yield(token) + head = nil if token == "\n" + end + end + end + end + + def explicit_trim_line(line) + line.scan(/(.*?)(^[ \t]*<%\-|<%\-|<%%|%%>|<%=|<%#|<%|-%>\n|-%>|%>|\z)/m) do |tokens| + tokens.each do |token| + next if token.empty? + if @stag.nil? && /[ \t]*<%-/ =~ token + yield('<%') + elsif @stag && token == "-%>\n" + yield('%>') + yield(:cr) + elsif @stag && token == '-%>' + yield('%>') + else + yield(token) + end + end + end + end + + ERB_STAG = %w(<%= <%# <%) + def is_erb_stag?(s) + ERB_STAG.member?(s) + end + end + + Scanner.default_scanner = TrimScanner + + class SimpleScanner < Scanner # :nodoc: + def scan + @src.scan(/(.*?)(<%%|%%>|<%=|<%#|<%|%>|\n|\z)/m) do |tokens| + tokens.each do |token| + next if token.empty? + yield(token) + end + end + end + end + + Scanner.regist_scanner(SimpleScanner, nil, false) + + begin + require 'strscan' + class SimpleScanner2 < Scanner # :nodoc: + def scan + stag_reg = /(.*?)(<%%|<%=|<%#|<%|\z)/m + etag_reg = /(.*?)(%%>|%>|\z)/m + scanner = StringScanner.new(@src) + while ! scanner.eos? + scanner.scan(@stag ? etag_reg : stag_reg) + yield(scanner[1]) + yield(scanner[2]) + end + end + end + Scanner.regist_scanner(SimpleScanner2, nil, false) + + class ExplicitScanner < Scanner # :nodoc: + def scan + stag_reg = /(.*?)(^[ \t]*<%-|<%%|<%=|<%#|<%-|<%|\z)/m + etag_reg = /(.*?)(%%>|-%>|%>|\z)/m + scanner = StringScanner.new(@src) + while ! scanner.eos? + scanner.scan(@stag ? etag_reg : stag_reg) + yield(scanner[1]) + + elem = scanner[2] + if /[ \t]*<%-/ =~ elem + yield('<%') + elsif elem == '-%>' + yield('%>') + yield(:cr) if scanner.scan(/(\n|\z)/) + else + yield(elem) + end + end + end + end + Scanner.regist_scanner(ExplicitScanner, '-', false) + + rescue LoadError + end + + class Buffer # :nodoc: + def initialize(compiler, enc=nil) + @compiler = compiler + @line = [] + @script = enc ? "#coding:#{enc.to_s}\n" : "" + @compiler.pre_cmd.each do |x| + push(x) + end + end + attr_reader :script + + def push(cmd) + @line << cmd + end + + def cr + @script << (@line.join('; ')) + @line = [] + @script << "\n" + end + + def close + return unless @line + @compiler.post_cmd.each do |x| + push(x) + end + @script << (@line.join('; ')) + @line = nil + end + end + + def content_dump(s) + n = s.count("\n") + if n > 0 + s.dump + "\n" * n + else + s.dump + end + end + + def compile(s) + enc = s.encoding + raise ArgumentError, "#{enc} is not ASCII compatible" if enc.dummy? + s = s.dup.force_encoding("ASCII-8BIT") # don't use constant Enoding::ASCII_8BIT for miniruby + enc = detect_magic_comment(s) || enc + out = Buffer.new(self, enc) + + content = '' + scanner = make_scanner(s) + scanner.scan do |token| + next if token.nil? + next if token == '' + if scanner.stag.nil? + case token + when PercentLine + out.push("#{@put_cmd} #{content_dump(content)}") if content.size > 0 + content = '' + out.push(token.to_s) + out.cr + when :cr + out.cr + when '<%', '<%=', '<%#' + scanner.stag = token + out.push("#{@put_cmd} #{content_dump(content)}") if content.size > 0 + content = '' + when "\n" + content << "\n" + out.push("#{@put_cmd} #{content_dump(content)}") + content = '' + when '<%%' + content << '<%' + else + content << token + end + else + case token + when '%>' + case scanner.stag + when '<%' + if content[-1] == ?\n + content.chop! + out.push(content) + out.cr + else + out.push(content) + end + when '<%=' + out.push("#{@insert_cmd}((#{content}).to_s)") + when '<%#' + # out.push("# #{content_dump(content)}") + end + scanner.stag = nil + content = '' + when '%%>' + content << '%>' + else + content << token + end + end + end + out.push("#{@put_cmd} #{content_dump(content)}") if content.size > 0 + out.close + return out.script, enc + end + + def prepare_trim_mode(mode) + case mode + when 1 + return [false, '>'] + when 2 + return [false, '<>'] + when 0 + return [false, nil] + when String + perc = mode.include?('%') + if mode.include?('-') + return [perc, '-'] + elsif mode.include?('<>') + return [perc, '<>'] + elsif mode.include?('>') + return [perc, '>'] + else + [perc, nil] + end + else + return [false, nil] + end + end + + def make_scanner(src) + Scanner.make_scanner(src, @trim_mode, @percent) + end + + def initialize(trim_mode) + @percent, @trim_mode = prepare_trim_mode(trim_mode) + @put_cmd = 'print' + @insert_cmd = @put_cmd + @pre_cmd = [] + @post_cmd = [] + end + attr_reader :percent, :trim_mode + attr_accessor :put_cmd, :insert_cmd, :pre_cmd, :post_cmd + + private + def detect_magic_comment(s) + if /\A<%#(.*)%>/ =~ s or (@percent and /\A%#(.*)/ =~ s) + comment = $1 + comment = $1 if comment[/-\*-\s*(.*?)\s*-*-$/] + if %r"coding\s*[=:]\s*([[:alnum:]\-_]+)" =~ comment + enc = $1.sub(/-(?:mac|dos|unix)/i, '') + enc = Encoding.find(enc) + end + end + end + end +end + +#-- +# ERB +class ERB + # + # Constructs a new ERB object with the template specified in _str_. + # + # An ERB object works by building a chunk of Ruby code that will output + # the completed template when run. If _safe_level_ is set to a non-nil value, + # ERB code will be run in a separate thread with $SAFE set to the + # provided level. + # + # If _trim_mode_ is passed a String containing one or more of the following + # modifiers, ERB will adjust its code generation as listed: + # + # % enables Ruby code processing for lines beginning with % + # <> omit newline for lines starting with <% and ending in %> + # > omit newline for lines ending in %> + # + # _eoutvar_ can be used to set the name of the variable ERB will build up + # its output in. This is useful when you need to run multiple ERB + # templates through the same binding and/or when you want to control where + # output ends up. Pass the name of the variable to be used inside a String. + # + # === Example + # + # require "erb" + # + # # build data class + # class Listings + # PRODUCT = { :name => "Chicken Fried Steak", + # :desc => "A well messages pattie, breaded and fried.", + # :cost => 9.95 } + # + # attr_reader :product, :price + # + # def initialize( product = "", price = "" ) + # @product = product + # @price = price + # end + # + # def build + # b = binding + # # create and run templates, filling member data variables + # ERB.new(<<-'END_PRODUCT'.gsub(/^\s+/, ""), 0, "", "@product").result b + # <%= PRODUCT[:name] %> + # <%= PRODUCT[:desc] %> + # END_PRODUCT + # ERB.new(<<-'END_PRICE'.gsub(/^\s+/, ""), 0, "", "@price").result b + # <%= PRODUCT[:name] %> -- <%= PRODUCT[:cost] %> + # <%= PRODUCT[:desc] %> + # END_PRICE + # end + # end + # + # # setup template data + # listings = Listings.new + # listings.build + # + # puts listings.product + "\n" + listings.price + # + # _Generates_ + # + # Chicken Fried Steak + # A well messages pattie, breaded and fried. + # + # Chicken Fried Steak -- 9.95 + # A well messages pattie, breaded and fried. + # + def initialize(str, safe_level=nil, trim_mode=nil, eoutvar='_erbout') + @safe_level = safe_level + compiler = ERB::Compiler.new(trim_mode) + set_eoutvar(compiler, eoutvar) + @src, @enc = *compiler.compile(str) + @filename = nil + end + + # The Ruby code generated by ERB + attr_reader :src + + # The optional _filename_ argument passed to Kernel#eval when the ERB code + # is run + attr_accessor :filename + + # + # Can be used to set _eoutvar_ as described in ERB#new. It's probably easier + # to just use the constructor though, since calling this method requires the + # setup of an ERB _compiler_ object. + # + def set_eoutvar(compiler, eoutvar = '_erbout') + compiler.put_cmd = "#{eoutvar}.concat" + compiler.insert_cmd = "#{eoutvar}.concat" + + cmd = [] + cmd.push "#{eoutvar} = ''" + + compiler.pre_cmd = cmd + + cmd = [] + cmd.push("#{eoutvar}.force_encoding(__ENCODING__)") + + compiler.post_cmd = cmd + end + + # Generate results and print them. (see ERB#result) + def run(b=TOPLEVEL_BINDING) + print self.result(b) + end + + # + # Executes the generated ERB code to produce a completed template, returning + # the results of that code. (See ERB#new for details on how this process can + # be affected by _safe_level_.) + # + # _b_ accepts a Binding or Proc object which is used to set the context of + # code evaluation. + # + def result(b=TOPLEVEL_BINDING) + if @safe_level + proc { + $SAFE = @safe_level + eval(@src, b, (@filename || '(erb)'), 0) + }.call + else + eval(@src, b, (@filename || '(erb)'), 0) + end + end + + # Define _methodname_ as instance method of _mod_ from compiled ruby source. + # + # example: + # filename = 'example.rhtml' # 'arg1' and 'arg2' are used in example.rhtml + # erb = ERB.new(File.read(filename)) + # erb.def_method(MyClass, 'render(arg1, arg2)', filename) + # print MyClass.new.render('foo', 123) + def def_method(mod, methodname, fname='(ERB)') + src = self.src + magic_comment = "#coding:#{@enc}\n" + mod.module_eval do + eval(magic_comment + "def #{methodname}\n" + src + "\nend\n", binding, fname, -2) + end + end + + # Create unnamed module, define _methodname_ as instance method of it, and return it. + # + # example: + # filename = 'example.rhtml' # 'arg1' and 'arg2' are used in example.rhtml + # erb = ERB.new(File.read(filename)) + # erb.filename = filename + # MyModule = erb.def_module('render(arg1, arg2)') + # class MyClass + # include MyModule + # end + def def_module(methodname='erb') + mod = Module.new + def_method(mod, methodname, @filename || '(ERB)') + mod + end + + # Define unnamed class which has _methodname_ as instance method, and return it. + # + # example: + # class MyClass_ + # def initialize(arg1, arg2) + # @arg1 = arg1; @arg2 = arg2 + # end + # end + # filename = 'example.rhtml' # @arg1 and @arg2 are used in example.rhtml + # erb = ERB.new(File.read(filename)) + # erb.filename = filename + # MyClass = erb.def_class(MyClass_, 'render()') + # print MyClass.new('foo', 123).render() + def def_class(superklass=Object, methodname='result') + cls = Class.new(superklass) + def_method(cls, methodname, @filename || '(ERB)') + cls + end +end + +#-- +# ERB::Util +class ERB + # A utility module for conversion routines, often handy in HTML generation. + module Util + public + # + # A utility method for escaping HTML tag characters in _s_. + # + # require "erb" + # include ERB::Util + # + # puts html_escape("is a > 0 & a < 10?") + # + # _Generates_ + # + # is a > 0 & a < 10? + # + def html_escape(s) + s.to_s.gsub(/&/, "&").gsub(/\"/, """).gsub(/>/, ">").gsub(/ + # <%= item %> + # <% end %> + # + # example.rb: + # require 'erb' + # class MyClass + # extend ERB::DefMethod + # def_erb_method('render()', 'example.rhtml') + # def initialize(items) + # @items = items + # end + # end + # print MyClass.new([10,20,30]).render() + # + # result: + # + # 10 + # + # 20 + # + # 30 + # + module DefMethod + public + # define _methodname_ as instance method of current module, using ERB object or eRuby file + def def_erb_method(methodname, erb_or_fname) + if erb_or_fname.kind_of? String + fname = erb_or_fname + erb = ERB.new(File.read(fname)) + erb.def_method(self, methodname, fname) + else + erb = erb_or_fname + erb.def_method(self, methodname, erb.filename || '(ERB)') + end + end + module_function :def_erb_method + end +end diff --git a/lib/fileutils.rb b/lib/fileutils.rb new file mode 100644 index 0000000..0a3fdc4 --- /dev/null +++ b/lib/fileutils.rb @@ -0,0 +1,1592 @@ +# +# = fileutils.rb +# +# Copyright (c) 2000-2007 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# +# == module FileUtils +# +# Namespace for several file utility methods for copying, moving, removing, etc. +# +# === Module Functions +# +# cd(dir, options) +# cd(dir, options) {|dir| .... } +# pwd() +# mkdir(dir, options) +# mkdir(list, options) +# mkdir_p(dir, options) +# mkdir_p(list, options) +# rmdir(dir, options) +# rmdir(list, options) +# ln(old, new, options) +# ln(list, destdir, options) +# ln_s(old, new, options) +# ln_s(list, destdir, options) +# ln_sf(src, dest, options) +# cp(src, dest, options) +# cp(list, dir, options) +# cp_r(src, dest, options) +# cp_r(list, dir, options) +# mv(src, dest, options) +# mv(list, dir, options) +# rm(list, options) +# rm_r(list, options) +# rm_rf(list, options) +# install(src, dest, mode = , options) +# chmod(mode, list, options) +# chmod_R(mode, list, options) +# chown(user, group, list, options) +# chown_R(user, group, list, options) +# touch(list, options) +# +# The options parameter is a hash of options, taken from the list +# :force, :noop, :preserve, and :verbose. +# :noop means that no changes are made. The other two are obvious. +# Each method documents the options that it honours. +# +# All methods that have the concept of a "source" file or directory can take +# either one file or a list of files in that argument. See the method +# documentation for examples. +# +# There are some `low level' methods, which do not accept any option: +# +# copy_entry(src, dest, preserve = false, dereference = false) +# copy_file(src, dest, preserve = false, dereference = true) +# copy_stream(srcstream, deststream) +# remove_entry(path, force = false) +# remove_entry_secure(path, force = false) +# remove_file(path, force = false) +# compare_file(path_a, path_b) +# compare_stream(stream_a, stream_b) +# uptodate?(file, cmp_list) +# +# == module FileUtils::Verbose +# +# This module has all methods of FileUtils module, but it outputs messages +# before acting. This equates to passing the :verbose flag to methods +# in FileUtils. +# +# == module FileUtils::NoWrite +# +# This module has all methods of FileUtils module, but never changes +# files/directories. This equates to passing the :noop flag to methods +# in FileUtils. +# +# == module FileUtils::DryRun +# +# This module has all methods of FileUtils module, but never changes +# files/directories. This equates to passing the :noop and +# :verbose flags to methods in FileUtils. +# + +module FileUtils + + def self.private_module_function(name) #:nodoc: + module_function name + private_class_method name + end + + # This hash table holds command options. + OPT_TABLE = {} #:nodoc: internal use only + + # + # Options: (none) + # + # Returns the name of the current directory. + # + def pwd + Dir.pwd + end + module_function :pwd + + alias getwd pwd + module_function :getwd + + # + # Options: verbose + # + # Changes the current directory to the directory +dir+. + # + # If this method is called with block, resumes to the old + # working directory after the block execution finished. + # + # FileUtils.cd('/', :verbose => true) # chdir and report it + # + def cd(dir, options = {}, &block) # :yield: dir + fu_check_options options, OPT_TABLE['cd'] + fu_output_message "cd #{dir}" if options[:verbose] + Dir.chdir(dir, &block) + fu_output_message 'cd -' if options[:verbose] and block + end + module_function :cd + + alias chdir cd + module_function :chdir + + OPT_TABLE['cd'] = + OPT_TABLE['chdir'] = [:verbose] + + # + # Options: (none) + # + # Returns true if +newer+ is newer than all +old_list+. + # Non-existent files are older than any file. + # + # FileUtils.uptodate?('hello.o', %w(hello.c hello.h)) or \ + # system 'make hello.o' + # + def uptodate?(new, old_list, options = nil) + raise ArgumentError, 'uptodate? does not accept any option' if options + + return false unless File.exist?(new) + new_time = File.mtime(new) + old_list.each do |old| + if File.exist?(old) + return false unless new_time > File.mtime(old) + end + end + true + end + module_function :uptodate? + + # + # Options: mode noop verbose + # + # Creates one or more directories. + # + # FileUtils.mkdir 'test' + # FileUtils.mkdir %w( tmp data ) + # FileUtils.mkdir 'notexist', :noop => true # Does not really create. + # FileUtils.mkdir 'tmp', :mode => 0700 + # + def mkdir(list, options = {}) + fu_check_options options, OPT_TABLE['mkdir'] + list = fu_list(list) + fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}" if options[:verbose] + return if options[:noop] + + list.each do |dir| + fu_mkdir dir, options[:mode] + end + end + module_function :mkdir + + OPT_TABLE['mkdir'] = [:mode, :noop, :verbose] + + # + # Options: mode noop verbose + # + # Creates a directory and all its parent directories. + # For example, + # + # FileUtils.mkdir_p '/usr/local/lib/ruby' + # + # causes to make following directories, if it does not exist. + # * /usr + # * /usr/local + # * /usr/local/lib + # * /usr/local/lib/ruby + # + # You can pass several directories at a time in a list. + # + def mkdir_p(list, options = {}) + fu_check_options options, OPT_TABLE['mkdir_p'] + list = fu_list(list) + fu_output_message "mkdir -p #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}" if options[:verbose] + return *list if options[:noop] + + list.map {|path| path.sub(%r, '') }.each do |path| + # optimize for the most common case + begin + fu_mkdir path, options[:mode] + next + rescue SystemCallError + next if File.directory?(path) + end + + stack = [] + until path == stack.last # dirname("/")=="/", dirname("C:/")=="C:/" + stack.push path + path = File.dirname(path) + end + stack.reverse_each do |dir| + begin + fu_mkdir dir, options[:mode] + rescue SystemCallError => err + raise unless File.directory?(dir) + end + end + end + + return *list + end + module_function :mkdir_p + + alias mkpath mkdir_p + alias makedirs mkdir_p + module_function :mkpath + module_function :makedirs + + OPT_TABLE['mkdir_p'] = + OPT_TABLE['mkpath'] = + OPT_TABLE['makedirs'] = [:mode, :noop, :verbose] + + def fu_mkdir(path, mode) #:nodoc: + path = path.sub(%r, '') + if mode + Dir.mkdir path, mode + File.chmod mode, path + else + Dir.mkdir path + end + end + private_module_function :fu_mkdir + + # + # Options: noop, verbose + # + # Removes one or more directories. + # + # FileUtils.rmdir 'somedir' + # FileUtils.rmdir %w(somedir anydir otherdir) + # # Does not really remove directory; outputs message. + # FileUtils.rmdir 'somedir', :verbose => true, :noop => true + # + def rmdir(list, options = {}) + fu_check_options options, OPT_TABLE['rmdir'] + list = fu_list(list) + parents = options[:parents] + fu_output_message "rmdir #{parents ? '-p ' : ''}#{list.join ' '}" if options[:verbose] + return if options[:noop] + list.each do |dir| + begin + Dir.rmdir(dir = dir.sub(%r, '')) + if parents + until (parent = File.dirname(dir)) == '.' or parent == dir + Dir.rmdir(dir) + end + end + rescue Errno::ENOTEMPTY, Errno::ENOENT + end + end + end + module_function :rmdir + + OPT_TABLE['rmdir'] = [:parents, :noop, :verbose] + + # + # Options: force noop verbose + # + # ln(old, new, options = {}) + # + # Creates a hard link +new+ which points to +old+. + # If +new+ already exists and it is a directory, creates a link +new/old+. + # If +new+ already exists and it is not a directory, raises Errno::EEXIST. + # But if :force option is set, overwrite +new+. + # + # FileUtils.ln 'gcc', 'cc', :verbose => true + # FileUtils.ln '/usr/bin/emacs21', '/usr/bin/emacs' + # + # ln(list, destdir, options = {}) + # + # Creates several hard links in a directory, with each one pointing to the + # item in +list+. If +destdir+ is not a directory, raises Errno::ENOTDIR. + # + # include FileUtils + # cd '/sbin' + # FileUtils.ln %w(cp mv mkdir), '/bin' # Now /sbin/cp and /bin/cp are linked. + # + def ln(src, dest, options = {}) + fu_check_options options, OPT_TABLE['ln'] + fu_output_message "ln#{options[:force] ? ' -f' : ''} #{[src,dest].flatten.join ' '}" if options[:verbose] + return if options[:noop] + fu_each_src_dest0(src, dest) do |s,d| + remove_file d, true if options[:force] + File.link s, d + end + end + module_function :ln + + alias link ln + module_function :link + + OPT_TABLE['ln'] = + OPT_TABLE['link'] = [:force, :noop, :verbose] + + # + # Options: force noop verbose + # + # ln_s(old, new, options = {}) + # + # Creates a symbolic link +new+ which points to +old+. If +new+ already + # exists and it is a directory, creates a symbolic link +new/old+. If +new+ + # already exists and it is not a directory, raises Errno::EEXIST. But if + # :force option is set, overwrite +new+. + # + # FileUtils.ln_s '/usr/bin/ruby', '/usr/local/bin/ruby' + # FileUtils.ln_s 'verylongsourcefilename.c', 'c', :force => true + # + # ln_s(list, destdir, options = {}) + # + # Creates several symbolic links in a directory, with each one pointing to the + # item in +list+. If +destdir+ is not a directory, raises Errno::ENOTDIR. + # + # If +destdir+ is not a directory, raises Errno::ENOTDIR. + # + # FileUtils.ln_s Dir.glob('bin/*.rb'), '/home/aamine/bin' + # + def ln_s(src, dest, options = {}) + fu_check_options options, OPT_TABLE['ln_s'] + fu_output_message "ln -s#{options[:force] ? 'f' : ''} #{[src,dest].flatten.join ' '}" if options[:verbose] + return if options[:noop] + fu_each_src_dest0(src, dest) do |s,d| + remove_file d, true if options[:force] + File.symlink s, d + end + end + module_function :ln_s + + alias symlink ln_s + module_function :symlink + + OPT_TABLE['ln_s'] = + OPT_TABLE['symlink'] = [:force, :noop, :verbose] + + # + # Options: noop verbose + # + # Same as + # #ln_s(src, dest, :force) + # + def ln_sf(src, dest, options = {}) + fu_check_options options, OPT_TABLE['ln_sf'] + options = options.dup + options[:force] = true + ln_s src, dest, options + end + module_function :ln_sf + + OPT_TABLE['ln_sf'] = [:noop, :verbose] + + # + # Options: preserve noop verbose + # + # Copies a file content +src+ to +dest+. If +dest+ is a directory, + # copies +src+ to +dest/src+. + # + # If +src+ is a list of files, then +dest+ must be a directory. + # + # FileUtils.cp 'eval.c', 'eval.c.org' + # FileUtils.cp %w(cgi.rb complex.rb date.rb), '/usr/lib/ruby/1.6' + # FileUtils.cp %w(cgi.rb complex.rb date.rb), '/usr/lib/ruby/1.6', :verbose => true + # FileUtils.cp 'symlink', 'dest' # copy content, "dest" is not a symlink + # + def cp(src, dest, options = {}) + fu_check_options options, OPT_TABLE['cp'] + fu_output_message "cp#{options[:preserve] ? ' -p' : ''} #{[src,dest].flatten.join ' '}" if options[:verbose] + return if options[:noop] + fu_each_src_dest(src, dest) do |s, d| + copy_file s, d, options[:preserve] + end + end + module_function :cp + + alias copy cp + module_function :copy + + OPT_TABLE['cp'] = + OPT_TABLE['copy'] = [:preserve, :noop, :verbose] + + # + # Options: preserve noop verbose dereference_root remove_destination + # + # Copies +src+ to +dest+. If +src+ is a directory, this method copies + # all its contents recursively. If +dest+ is a directory, copies + # +src+ to +dest/src+. + # + # +src+ can be a list of files. + # + # # Installing ruby library "mylib" under the site_ruby + # FileUtils.rm_r site_ruby + '/mylib', :force + # FileUtils.cp_r 'lib/', site_ruby + '/mylib' + # + # # Examples of copying several files to target directory. + # FileUtils.cp_r %w(mail.rb field.rb debug/), site_ruby + '/tmail' + # FileUtils.cp_r Dir.glob('*.rb'), '/home/aamine/lib/ruby', :noop => true, :verbose => true + # + # # If you want to copy all contents of a directory instead of the + # # directory itself, c.f. src/x -> dest/x, src/y -> dest/y, + # # use following code. + # FileUtils.cp_r 'src/.', 'dest' # cp_r('src', 'dest') makes src/dest, + # # but this doesn't. + # + def cp_r(src, dest, options = {}) + fu_check_options options, OPT_TABLE['cp_r'] + fu_output_message "cp -r#{options[:preserve] ? 'p' : ''}#{options[:remove_destination] ? ' --remove-destination' : ''} #{[src,dest].flatten.join ' '}" if options[:verbose] + return if options[:noop] + fu_each_src_dest(src, dest) do |s, d| + copy_entry s, d, options[:preserve], options[:dereference_root], options[:remove_destination] + end + end + module_function :cp_r + + OPT_TABLE['cp_r'] = [:preserve, :noop, :verbose, + :dereference_root, :remove_destination] + + # + # Copies a file system entry +src+ to +dest+. + # If +src+ is a directory, this method copies its contents recursively. + # This method preserves file types, c.f. symlink, directory... + # (FIFO, device files and etc. are not supported yet) + # + # Both of +src+ and +dest+ must be a path name. + # +src+ must exist, +dest+ must not exist. + # + # If +preserve+ is true, this method preserves owner, group, permissions + # and modified time. + # + # If +dereference_root+ is true, this method dereference tree root. + # + # If +remove_destination+ is true, this method removes each destination file before copy. + # + def copy_entry(src, dest, preserve = false, dereference_root = false, remove_destination = false) + Entry_.new(src, nil, dereference_root).traverse do |ent| + destent = Entry_.new(dest, ent.rel, false) + File.unlink destent.path if remove_destination && File.file?(destent.path) + ent.copy destent.path + ent.copy_metadata destent.path if preserve + end + end + module_function :copy_entry + + # + # Copies file contents of +src+ to +dest+. + # Both of +src+ and +dest+ must be a path name. + # + def copy_file(src, dest, preserve = false, dereference = true) + ent = Entry_.new(src, nil, dereference) + ent.copy_file dest + ent.copy_metadata dest if preserve + end + module_function :copy_file + + # + # Copies stream +src+ to +dest+. + # +src+ must respond to #read(n) and + # +dest+ must respond to #write(str). + # + def copy_stream(src, dest) + IO.copy_stream(src, dest) + end + module_function :copy_stream + + # + # Options: force noop verbose + # + # Moves file(s) +src+ to +dest+. If +file+ and +dest+ exist on the different + # disk partition, the file is copied then the original file is removed. + # + # FileUtils.mv 'badname.rb', 'goodname.rb' + # FileUtils.mv 'stuff.rb', '/notexist/lib/ruby', :force => true # no error + # + # FileUtils.mv %w(junk.txt dust.txt), '/home/aamine/.trash/' + # FileUtils.mv Dir.glob('test*.rb'), 'test', :noop => true, :verbose => true + # + def mv(src, dest, options = {}) + fu_check_options options, OPT_TABLE['mv'] + fu_output_message "mv#{options[:force] ? ' -f' : ''} #{[src,dest].flatten.join ' '}" if options[:verbose] + return if options[:noop] + fu_each_src_dest(src, dest) do |s, d| + destent = Entry_.new(d, nil, true) + begin + if destent.exist? + if destent.directory? + raise Errno::EEXIST, dest + else + destent.remove_file if rename_cannot_overwrite_file? + end + end + begin + File.rename s, d + rescue Errno::EXDEV + copy_entry s, d, true + if options[:secure] + remove_entry_secure s, options[:force] + else + remove_entry s, options[:force] + end + end + rescue SystemCallError + raise unless options[:force] + end + end + end + module_function :mv + + alias move mv + module_function :move + + OPT_TABLE['mv'] = + OPT_TABLE['move'] = [:force, :noop, :verbose, :secure] + + def rename_cannot_overwrite_file? #:nodoc: + /cygwin|mswin|mingw|bccwin|emx/ =~ RUBY_PLATFORM + end + private_module_function :rename_cannot_overwrite_file? + + # + # Options: force noop verbose + # + # Remove file(s) specified in +list+. This method cannot remove directories. + # All StandardErrors are ignored when the :force option is set. + # + # FileUtils.rm %w( junk.txt dust.txt ) + # FileUtils.rm Dir.glob('*.so') + # FileUtils.rm 'NotExistFile', :force => true # never raises exception + # + def rm(list, options = {}) + fu_check_options options, OPT_TABLE['rm'] + list = fu_list(list) + fu_output_message "rm#{options[:force] ? ' -f' : ''} #{list.join ' '}" if options[:verbose] + return if options[:noop] + + list.each do |path| + remove_file path, options[:force] + end + end + module_function :rm + + alias remove rm + module_function :remove + + OPT_TABLE['rm'] = + OPT_TABLE['remove'] = [:force, :noop, :verbose] + + # + # Options: noop verbose + # + # Equivalent to + # + # #rm(list, :force => true) + # + def rm_f(list, options = {}) + fu_check_options options, OPT_TABLE['rm_f'] + options = options.dup + options[:force] = true + rm list, options + end + module_function :rm_f + + alias safe_unlink rm_f + module_function :safe_unlink + + OPT_TABLE['rm_f'] = + OPT_TABLE['safe_unlink'] = [:noop, :verbose] + + # + # Options: force noop verbose secure + # + # remove files +list+[0] +list+[1]... If +list+[n] is a directory, + # removes its all contents recursively. This method ignores + # StandardError when :force option is set. + # + # FileUtils.rm_r Dir.glob('/tmp/*') + # FileUtils.rm_r '/', :force => true # :-) + # + # WARNING: This method causes local vulnerability + # if one of parent directories or removing directory tree are world + # writable (including /tmp, whose permission is 1777), and the current + # process has strong privilege such as Unix super user (root), and the + # system has symbolic link. For secure removing, read the documentation + # of #remove_entry_secure carefully, and set :secure option to true. + # Default is :secure=>false. + # + # NOTE: This method calls #remove_entry_secure if :secure option is set. + # See also #remove_entry_secure. + # + def rm_r(list, options = {}) + fu_check_options options, OPT_TABLE['rm_r'] + # options[:secure] = true unless options.key?(:secure) + list = fu_list(list) + fu_output_message "rm -r#{options[:force] ? 'f' : ''} #{list.join ' '}" if options[:verbose] + return if options[:noop] + list.each do |path| + if options[:secure] + remove_entry_secure path, options[:force] + else + remove_entry path, options[:force] + end + end + end + module_function :rm_r + + OPT_TABLE['rm_r'] = [:force, :noop, :verbose, :secure] + + # + # Options: noop verbose secure + # + # Equivalent to + # + # #rm_r(list, :force => true) + # + # WARNING: This method causes local vulnerability. + # Read the documentation of #rm_r first. + # + def rm_rf(list, options = {}) + fu_check_options options, OPT_TABLE['rm_rf'] + options = options.dup + options[:force] = true + rm_r list, options + end + module_function :rm_rf + + alias rmtree rm_rf + module_function :rmtree + + OPT_TABLE['rm_rf'] = + OPT_TABLE['rmtree'] = [:noop, :verbose, :secure] + + # + # This method removes a file system entry +path+. +path+ shall be a + # regular file, a directory, or something. If +path+ is a directory, + # remove it recursively. This method is required to avoid TOCTTOU + # (time-of-check-to-time-of-use) local security vulnerability of #rm_r. + # #rm_r causes security hole when: + # + # * Parent directory is world writable (including /tmp). + # * Removing directory tree includes world writable directory. + # * The system has symbolic link. + # + # To avoid this security hole, this method applies special preprocess. + # If +path+ is a directory, this method chown(2) and chmod(2) all + # removing directories. This requires the current process is the + # owner of the removing whole directory tree, or is the super user (root). + # + # WARNING: You must ensure that *ALL* parent directories are not + # world writable. Otherwise this method does not work. + # Only exception is temporary directory like /tmp and /var/tmp, + # whose permission is 1777. + # + # WARNING: Only the owner of the removing directory tree, or Unix super + # user (root) should invoke this method. Otherwise this method does not + # work. + # + # For details of this security vulnerability, see Perl's case: + # + # http://www.cve.mitre.org/cgi-bin/cvename.cgi?name=CAN-2005-0448 + # http://www.cve.mitre.org/cgi-bin/cvename.cgi?name=CAN-2004-0452 + # + # For fileutils.rb, this vulnerability is reported in [ruby-dev:26100]. + # + def remove_entry_secure(path, force = false) + unless fu_have_symlink? + remove_entry path, force + return + end + fullpath = File.expand_path(path) + st = File.lstat(fullpath) + unless st.directory? + File.unlink fullpath + return + end + # is a directory. + parent_st = File.stat(File.dirname(fullpath)) + unless parent_st.world_writable? + remove_entry path, force + return + end + unless parent_st.sticky? + raise ArgumentError, "parent directory is world writable, FileUtils#remove_entry_secure does not work; abort: #{path.inspect} (parent directory mode #{'%o' % parent_st.mode})" + end + # freeze tree root + euid = Process.euid + File.open(fullpath + '/.') {|f| + unless fu_stat_identical_entry?(st, f.stat) + # symlink (TOC-to-TOU attack?) + File.unlink fullpath + return + end + f.chown euid, -1 + f.chmod 0700 + } + # ---- tree root is frozen ---- + root = Entry_.new(path) + root.preorder_traverse do |ent| + if ent.directory? + ent.chown euid, -1 + ent.chmod 0700 + end + end + root.postorder_traverse do |ent| + begin + ent.remove + rescue + raise unless force + end + end + rescue + raise unless force + end + module_function :remove_entry_secure + + def fu_have_symlink? #:nodoc + File.symlink nil, nil + rescue NotImplementedError + return false + rescue + return true + end + private_module_function :fu_have_symlink? + + def fu_stat_identical_entry?(a, b) #:nodoc: + a.dev == b.dev and a.ino == b.ino + end + private_module_function :fu_stat_identical_entry? + + # + # This method removes a file system entry +path+. + # +path+ might be a regular file, a directory, or something. + # If +path+ is a directory, remove it recursively. + # + # See also #remove_entry_secure. + # + def remove_entry(path, force = false) + Entry_.new(path).postorder_traverse do |ent| + begin + ent.remove + rescue + raise unless force + end + end + rescue + raise unless force + end + module_function :remove_entry + + # + # Removes a file +path+. + # This method ignores StandardError if +force+ is true. + # + def remove_file(path, force = false) + Entry_.new(path).remove_file + rescue + raise unless force + end + module_function :remove_file + + # + # Removes a directory +dir+ and its contents recursively. + # This method ignores StandardError if +force+ is true. + # + def remove_dir(path, force = false) + remove_entry path, force # FIXME?? check if it is a directory + end + module_function :remove_dir + + # + # Returns true if the contents of a file A and a file B are identical. + # + # FileUtils.compare_file('somefile', 'somefile') #=> true + # FileUtils.compare_file('/bin/cp', '/bin/mv') #=> maybe false + # + def compare_file(a, b) + return false unless File.size(a) == File.size(b) + File.open(a, 'rb') {|fa| + File.open(b, 'rb') {|fb| + return compare_stream(fa, fb) + } + } + end + module_function :compare_file + + alias identical? compare_file + alias cmp compare_file + module_function :identical? + module_function :cmp + + # + # Returns true if the contents of a stream +a+ and +b+ are identical. + # + def compare_stream(a, b) + bsize = fu_stream_blksize(a, b) + sa = sb = nil + while sa == sb + sa = a.read(bsize) + sb = b.read(bsize) + unless sa and sb + if sa.nil? and sb.nil? + return true + end + end + end + false + end + module_function :compare_stream + + # + # Options: mode preserve noop verbose + # + # If +src+ is not same as +dest+, copies it and changes the permission + # mode to +mode+. If +dest+ is a directory, destination is +dest+/+src+. + # This method removes destination before copy. + # + # FileUtils.install 'ruby', '/usr/local/bin/ruby', :mode => 0755, :verbose => true + # FileUtils.install 'lib.rb', '/usr/local/lib/ruby/site_ruby', :verbose => true + # + def install(src, dest, options = {}) + fu_check_options options, OPT_TABLE['install'] + fu_output_message "install -c#{options[:preserve] && ' -p'}#{options[:mode] ? (' -m 0%o' % options[:mode]) : ''} #{[src,dest].flatten.join ' '}" if options[:verbose] + return if options[:noop] + fu_each_src_dest(src, dest) do |s, d| + unless File.exist?(d) and compare_file(s, d) + remove_file d, true + st = File.stat(s) if options[:preserve] + copy_file s, d + File.utime st.atime, st.mtime, d if options[:preserve] + File.chmod options[:mode], d if options[:mode] + end + end + end + module_function :install + + OPT_TABLE['install'] = [:mode, :preserve, :noop, :verbose] + + # + # Options: noop verbose + # + # Changes permission bits on the named files (in +list+) to the bit pattern + # represented by +mode+. + # + # FileUtils.chmod 0755, 'somecommand' + # FileUtils.chmod 0644, %w(my.rb your.rb his.rb her.rb) + # FileUtils.chmod 0755, '/usr/bin/ruby', :verbose => true + # + def chmod(mode, list, options = {}) + fu_check_options options, OPT_TABLE['chmod'] + list = fu_list(list) + fu_output_message sprintf('chmod %o %s', mode, list.join(' ')) if options[:verbose] + return if options[:noop] + list.each do |path| + Entry_.new(path).chmod mode + end + end + module_function :chmod + + OPT_TABLE['chmod'] = [:noop, :verbose] + + # + # Options: noop verbose force + # + # Changes permission bits on the named files (in +list+) + # to the bit pattern represented by +mode+. + # + # FileUtils.chmod_R 0700, "/tmp/app.#{$$}" + # + def chmod_R(mode, list, options = {}) + fu_check_options options, OPT_TABLE['chmod_R'] + list = fu_list(list) + fu_output_message sprintf('chmod -R%s %o %s', + (options[:force] ? 'f' : ''), + mode, list.join(' ')) if options[:verbose] + return if options[:noop] + list.each do |root| + Entry_.new(root).traverse do |ent| + begin + ent.chmod mode + rescue + raise unless options[:force] + end + end + end + end + module_function :chmod_R + + OPT_TABLE['chmod_R'] = [:noop, :verbose, :force] + + # + # Options: noop verbose + # + # Changes owner and group on the named files (in +list+) + # to the user +user+ and the group +group+. +user+ and +group+ + # may be an ID (Integer/String) or a name (String). + # If +user+ or +group+ is nil, this method does not change + # the attribute. + # + # FileUtils.chown 'root', 'staff', '/usr/local/bin/ruby' + # FileUtils.chown nil, 'bin', Dir.glob('/usr/bin/*'), :verbose => true + # + def chown(user, group, list, options = {}) + fu_check_options options, OPT_TABLE['chown'] + list = fu_list(list) + fu_output_message sprintf('chown %s%s', + [user,group].compact.join(':') + ' ', + list.join(' ')) if options[:verbose] + return if options[:noop] + uid = fu_get_uid(user) + gid = fu_get_gid(group) + list.each do |path| + Entry_.new(path).chown uid, gid + end + end + module_function :chown + + OPT_TABLE['chown'] = [:noop, :verbose] + + # + # Options: noop verbose force + # + # Changes owner and group on the named files (in +list+) + # to the user +user+ and the group +group+ recursively. + # +user+ and +group+ may be an ID (Integer/String) or + # a name (String). If +user+ or +group+ is nil, this + # method does not change the attribute. + # + # FileUtils.chown_R 'www', 'www', '/var/www/htdocs' + # FileUtils.chown_R 'cvs', 'cvs', '/var/cvs', :verbose => true + # + def chown_R(user, group, list, options = {}) + fu_check_options options, OPT_TABLE['chown_R'] + list = fu_list(list) + fu_output_message sprintf('chown -R%s %s%s', + (options[:force] ? 'f' : ''), + [user,group].compact.join(':') + ' ', + list.join(' ')) if options[:verbose] + return if options[:noop] + uid = fu_get_uid(user) + gid = fu_get_gid(group) + return unless uid or gid + list.each do |root| + Entry_.new(root).traverse do |ent| + begin + ent.chown uid, gid + rescue + raise unless options[:force] + end + end + end + end + module_function :chown_R + + OPT_TABLE['chown_R'] = [:noop, :verbose, :force] + + begin + require 'etc' + + def fu_get_uid(user) #:nodoc: + return nil unless user + user = user.to_s + if /\A\d+\z/ =~ user + then user.to_i + else Etc.getpwnam(user).uid + end + end + private_module_function :fu_get_uid + + def fu_get_gid(group) #:nodoc: + return nil unless group + group = group.to_s + if /\A\d+\z/ =~ group + then group.to_i + else Etc.getgrnam(group).gid + end + end + private_module_function :fu_get_gid + + rescue LoadError + # need Win32 support??? + + def fu_get_uid(user) #:nodoc: + user # FIXME + end + private_module_function :fu_get_uid + + def fu_get_gid(group) #:nodoc: + group # FIXME + end + private_module_function :fu_get_gid + end + + # + # Options: noop verbose + # + # Updates modification time (mtime) and access time (atime) of file(s) in + # +list+. Files are created if they don't exist. + # + # FileUtils.touch 'timestamp' + # FileUtils.touch Dir.glob('*.c'); system 'make' + # + def touch(list, options = {}) + fu_check_options options, OPT_TABLE['touch'] + list = fu_list(list) + created = nocreate = options[:nocreate] + t = options[:mtime] + if options[:verbose] + fu_output_message "touch #{nocreate ? ' -c' : ''}#{t ? t.strftime(' -t %Y%m%d%H%M.%S') : ''}#{list.join ' '}" + end + return if options[:noop] + list.each do |path| + created = nocreate + begin + File.utime(t, t, path) + rescue Errno::ENOENT + raise if created + File.open(path, 'a') { + ; + } + created = true + retry if t + end + end + end + module_function :touch + + OPT_TABLE['touch'] = [:noop, :verbose, :mtime, :nocreate] + + private + + module StreamUtils_ + private + + def fu_windows? + /mswin|mingw|bccwin|emx/ =~ RUBY_PLATFORM + end + + def fu_copy_stream0(src, dest, blksize = nil) #:nodoc: + IO.copy_stream(src, dest) + end + + def fu_stream_blksize(*streams) + streams.each do |s| + next unless s.respond_to?(:stat) + size = fu_blksize(s.stat) + return size if size + end + fu_default_blksize() + end + + def fu_blksize(st) + s = st.blksize + return nil unless s + return nil if s == 0 + s + end + + def fu_default_blksize + 1024 + end + end + + include StreamUtils_ + extend StreamUtils_ + + class Entry_ #:nodoc: internal use only + include StreamUtils_ + + def initialize(a, b = nil, deref = false) + @prefix = @rel = @path = nil + if b + @prefix = a + @rel = b + else + @path = a + end + @deref = deref + @stat = nil + @lstat = nil + end + + def inspect + "\#<#{self.class} #{path()}>" + end + + def path + if @path + File.path(@path) + else + join(@prefix, @rel) + end + end + + def prefix + @prefix || @path + end + + def rel + @rel + end + + def dereference? + @deref + end + + def exist? + lstat! ? true : false + end + + def file? + s = lstat! + s and s.file? + end + + def directory? + s = lstat! + s and s.directory? + end + + def symlink? + s = lstat! + s and s.symlink? + end + + def chardev? + s = lstat! + s and s.chardev? + end + + def blockdev? + s = lstat! + s and s.blockdev? + end + + def socket? + s = lstat! + s and s.socket? + end + + def pipe? + s = lstat! + s and s.pipe? + end + + S_IF_DOOR = 0xD000 + + def door? + s = lstat! + s and (s.mode & 0xF000 == S_IF_DOOR) + end + + def entries + Dir.entries(path())\ + .reject {|n| n == '.' or n == '..' }\ + .map {|n| Entry_.new(prefix(), join(rel(), n.untaint)) } + end + + def stat + return @stat if @stat + if lstat() and lstat().symlink? + @stat = File.stat(path()) + else + @stat = lstat() + end + @stat + end + + def stat! + return @stat if @stat + if lstat! and lstat!.symlink? + @stat = File.stat(path()) + else + @stat = lstat! + end + @stat + rescue SystemCallError + nil + end + + def lstat + if dereference? + @lstat ||= File.stat(path()) + else + @lstat ||= File.lstat(path()) + end + end + + def lstat! + lstat() + rescue SystemCallError + nil + end + + def chmod(mode) + if symlink? + File.lchmod mode, path() if have_lchmod? + else + File.chmod mode, path() + end + end + + def chown(uid, gid) + if symlink? + File.lchown uid, gid, path() if have_lchown? + else + File.chown uid, gid, path() + end + end + + def copy(dest) + case + when file? + copy_file dest + when directory? + if !File.exist?(dest) and /^#{Regexp.quote(path)}/ =~ File.dirname(dest) + raise ArgumentError, "cannot copy directory %s to itself %s" % [path, dest] + end + begin + Dir.mkdir dest + rescue + raise unless File.directory?(dest) + end + when symlink? + File.symlink File.readlink(path()), dest + when chardev? + raise "cannot handle device file" unless File.respond_to?(:mknod) + mknod dest, ?c, 0666, lstat().rdev + when blockdev? + raise "cannot handle device file" unless File.respond_to?(:mknod) + mknod dest, ?b, 0666, lstat().rdev + when socket? + raise "cannot handle socket" unless File.respond_to?(:mknod) + mknod dest, nil, lstat().mode, 0 + when pipe? + raise "cannot handle FIFO" unless File.respond_to?(:mkfifo) + mkfifo dest, 0666 + when door? + raise "cannot handle door: #{path()}" + else + raise "unknown file type: #{path()}" + end + end + + def copy_file(dest) + File.open(dest, 'wb') do |f| + IO.copy_stream(path(), f) + end + end + + def copy_metadata(path) + st = lstat() + File.utime st.atime, st.mtime, path + begin + File.chown st.uid, st.gid, path + rescue Errno::EPERM + # clear setuid/setgid + File.chmod st.mode & 01777, path + else + File.chmod st.mode, path + end + end + + def remove + if directory? + remove_dir1 + else + remove_file + end + end + + def remove_dir1 + platform_support { + Dir.rmdir path().sub(%r, '') + } + end + + def remove_file + platform_support { + File.unlink path + } + end + + def platform_support + return yield unless fu_windows? + first_time_p = true + begin + yield + rescue Errno::ENOENT + raise + rescue => err + if first_time_p + first_time_p = false + begin + File.chmod 0700, path() # Windows does not have symlink + retry + rescue SystemCallError + end + end + raise err + end + end + + def preorder_traverse + stack = [self] + while ent = stack.pop + yield ent + stack.concat ent.entries.reverse if ent.directory? + end + end + + alias traverse preorder_traverse + + def postorder_traverse + if directory? + entries().each do |ent| + ent.postorder_traverse do |e| + yield e + end + end + end + yield self + end + + private + + $fileutils_rb_have_lchmod = nil + + def have_lchmod? + # This is not MT-safe, but it does not matter. + if $fileutils_rb_have_lchmod == nil + $fileutils_rb_have_lchmod = check_have_lchmod? + end + $fileutils_rb_have_lchmod + end + + def check_have_lchmod? + return false unless File.respond_to?(:lchmod) + File.lchmod 0 + return true + rescue NotImplementedError + return false + end + + $fileutils_rb_have_lchown = nil + + def have_lchown? + # This is not MT-safe, but it does not matter. + if $fileutils_rb_have_lchown == nil + $fileutils_rb_have_lchown = check_have_lchown? + end + $fileutils_rb_have_lchown + end + + def check_have_lchown? + return false unless File.respond_to?(:lchown) + File.lchown nil, nil + return true + rescue NotImplementedError + return false + end + + def join(dir, base) + return File.path(dir) if not base or base == '.' + return File.path(base) if not dir or dir == '.' + File.join(dir, base) + end + end # class Entry_ + + def fu_list(arg) #:nodoc: + [arg].flatten.map {|path| File.path(path) } + end + private_module_function :fu_list + + def fu_each_src_dest(src, dest) #:nodoc: + fu_each_src_dest0(src, dest) do |s, d| + raise ArgumentError, "same file: #{s} and #{d}" if fu_same?(s, d) + yield s, d + end + end + private_module_function :fu_each_src_dest + + def fu_each_src_dest0(src, dest) #:nodoc: + if tmp = Array.try_convert(src) + tmp.each do |s| + s = File.path(s) + yield s, File.join(dest, File.basename(s)) + end + else + src = File.path(src) + if File.directory?(dest) + yield src, File.join(dest, File.basename(src)) + else + yield src, File.path(dest) + end + end + end + private_module_function :fu_each_src_dest0 + + def fu_same?(a, b) #:nodoc: + if fu_have_st_ino? + st1 = File.stat(a) + st2 = File.stat(b) + st1.dev == st2.dev and st1.ino == st2.ino + else + File.expand_path(a) == File.expand_path(b) + end + rescue Errno::ENOENT + return false + end + private_module_function :fu_same? + + def fu_have_st_ino? #:nodoc: + not fu_windows? + end + private_module_function :fu_have_st_ino? + + def fu_check_options(options, optdecl) #:nodoc: + h = options.dup + optdecl.each do |opt| + h.delete opt + end + raise ArgumentError, "no such option: #{h.keys.join(' ')}" unless h.empty? + end + private_module_function :fu_check_options + + def fu_update_option(args, new) #:nodoc: + if tmp = Hash.try_convert(args.last) + args[-1] = tmp.dup.update(new) + else + args.push new + end + args + end + private_module_function :fu_update_option + + @fileutils_output = $stderr + @fileutils_label = '' + + def fu_output_message(msg) #:nodoc: + @fileutils_output ||= $stderr + @fileutils_label ||= '' + @fileutils_output.puts @fileutils_label + msg + end + private_module_function :fu_output_message + + # + # Returns an Array of method names which have any options. + # + # p FileUtils.commands #=> ["chmod", "cp", "cp_r", "install", ...] + # + def FileUtils.commands + OPT_TABLE.keys + end + + # + # Returns an Array of option names. + # + # p FileUtils.options #=> ["noop", "force", "verbose", "preserve", "mode"] + # + def FileUtils.options + OPT_TABLE.values.flatten.uniq.map {|sym| sym.to_s } + end + + # + # Returns true if the method +mid+ have an option +opt+. + # + # p FileUtils.have_option?(:cp, :noop) #=> true + # p FileUtils.have_option?(:rm, :force) #=> true + # p FileUtils.have_option?(:rm, :perserve) #=> false + # + def FileUtils.have_option?(mid, opt) + li = OPT_TABLE[mid.to_s] or raise ArgumentError, "no such method: #{mid}" + li.include?(opt) + end + + # + # Returns an Array of option names of the method +mid+. + # + # p FileUtils.options(:rm) #=> ["noop", "verbose", "force"] + # + def FileUtils.options_of(mid) + OPT_TABLE[mid.to_s].map {|sym| sym.to_s } + end + + # + # Returns an Array of method names which have the option +opt+. + # + # p FileUtils.collect_method(:preserve) #=> ["cp", "cp_r", "copy", "install"] + # + def FileUtils.collect_method(opt) + OPT_TABLE.keys.select {|m| OPT_TABLE[m].include?(opt) } + end + + METHODS = singleton_methods() - [:private_module_function, + :commands, :options, :have_option?, :options_of, :collect_method] + + # + # This module has all methods of FileUtils module, but it outputs messages + # before acting. This equates to passing the :verbose flag to + # methods in FileUtils. + # + module Verbose + include FileUtils + @fileutils_output = $stderr + @fileutils_label = '' + ::FileUtils.collect_method(:verbose).each do |name| + module_eval(<<-EOS, __FILE__, __LINE__ + 1) + def #{name}(*args) + super(*fu_update_option(args, :verbose => true)) + end + private :#{name} + EOS + end + extend self + class << self + ::FileUtils::METHODS.each do |m| + public m + end + end + end + + # + # This module has all methods of FileUtils module, but never changes + # files/directories. This equates to passing the :noop flag + # to methods in FileUtils. + # + module NoWrite + include FileUtils + @fileutils_output = $stderr + @fileutils_label = '' + ::FileUtils.collect_method(:noop).each do |name| + module_eval(<<-EOS, __FILE__, __LINE__ + 1) + def #{name}(*args) + super(*fu_update_option(args, :noop => true)) + end + private :#{name} + EOS + end + extend self + class << self + ::FileUtils::METHODS.each do |m| + public m + end + end + end + + # + # This module has all methods of FileUtils module, but never changes + # files/directories, with printing message before acting. + # This equates to passing the :noop and :verbose flag + # to methods in FileUtils. + # + module DryRun + include FileUtils + @fileutils_output = $stderr + @fileutils_label = '' + ::FileUtils.collect_method(:noop).each do |name| + module_eval(<<-EOS, __FILE__, __LINE__ + 1) + def #{name}(*args) + super(*fu_update_option(args, :noop => true, :verbose => true)) + end + private :#{name} + EOS + end + extend self + class << self + ::FileUtils::METHODS.each do |m| + public m + end + end + end + +end diff --git a/lib/find.rb b/lib/find.rb new file mode 100644 index 0000000..79ff7c1 --- /dev/null +++ b/lib/find.rb @@ -0,0 +1,81 @@ +# +# find.rb: the Find module for processing all files under a given directory. +# + +# +# The +Find+ module supports the top-down traversal of a set of file paths. +# +# For example, to total the size of all files under your home directory, +# ignoring anything in a "dot" directory (e.g. $HOME/.ssh): +# +# require 'find' +# +# total_size = 0 +# +# Find.find(ENV["HOME"]) do |path| +# if FileTest.directory?(path) +# if File.basename(path)[0] == ?. +# Find.prune # Don't look any further into this directory. +# else +# next +# end +# else +# total_size += FileTest.size(path) +# end +# end +# +module Find + + # + # Calls the associated block with the name of every file and directory listed + # as arguments, then recursively on their subdirectories, and so on. + # + # See the +Find+ module documentation for an example. + # + def find(*paths) # :yield: path + block_given? or return enum_for(__method__, *paths) + + paths.collect!{|d| raise Errno::ENOENT unless File.exist?(d); d.dup} + while file = paths.shift + catch(:prune) do + yield file.dup.taint + next unless File.exist? file + begin + if File.lstat(file).directory? then + d = Dir.open(file) + begin + for f in d + next if f == "." or f == ".." + if File::ALT_SEPARATOR and file =~ /^(?:[\/\\]|[A-Za-z]:[\/\\]?)$/ then + f = file + f + elsif file == "/" then + f = "/" + f + else + f = File.join(file, f) + end + paths.unshift f.untaint + end + ensure + d.close + end + end + rescue Errno::ENOENT, Errno::EACCES + end + end + end + end + + # + # Skips the current file or directory, restarting the loop with the next + # entry. If the current file is a directory, that directory will not be + # recursively entered. Meaningful only within the block associated with + # Find::find. + # + # See the +Find+ module documentation for an example. + # + def prune + throw :prune + end + + module_function :find, :prune +end diff --git a/lib/forwardable.rb b/lib/forwardable.rb new file mode 100644 index 0000000..7b6b1dd --- /dev/null +++ b/lib/forwardable.rb @@ -0,0 +1,270 @@ +# +# forwardable.rb - +# $Release Version: 1.1$ +# $Revision: 24174 $ +# by Keiju ISHITSUKA(keiju@ishitsuka.com) +# original definition by delegator.rb +# Revised by Daniel J. Berger with suggestions from Florian Gross. +# +# Documentation by James Edward Gray II and Gavin Sinclair +# +# == Introduction +# +# This library allows you delegate method calls to an object, on a method by +# method basis. +# +# == Notes +# +# Be advised, RDoc will not detect delegated methods. +# +# forwardable.rb provides single-method delegation via the +# def_delegator() and def_delegators() methods. For full-class +# delegation via DelegateClass(), see delegate.rb. +# +# == Examples +# +# === Forwardable +# +# Forwardable makes building a new class based on existing work, with a proper +# interface, almost trivial. We want to rely on what has come before obviously, +# but with delegation we can take just the methods we need and even rename them +# as appropriate. In many cases this is preferable to inheritance, which gives +# us the entire old interface, even if much of it isn't needed. +# +# class Queue +# extend Forwardable +# +# def initialize +# @q = [ ] # prepare delegate object +# end +# +# # setup preferred interface, enq() and deq()... +# def_delegator :@q, :push, :enq +# def_delegator :@q, :shift, :deq +# +# # support some general Array methods that fit Queues well +# def_delegators :@q, :clear, :first, :push, :shift, :size +# end +# +# q = Queue.new +# q.enq 1, 2, 3, 4, 5 +# q.push 6 +# +# q.shift # => 1 +# while q.size > 0 +# puts q.deq +# end +# +# q.enq "Ruby", "Perl", "Python" +# puts q.first +# q.clear +# puts q.first +# +# Prints: +# +# 2 +# 3 +# 4 +# 5 +# 6 +# Ruby +# nil +# +# SingleForwardable can be used to setup delegation at the object level as well. +# +# printer = String.new +# printer.extend SingleForwardable # prepare object for delegation +# printer.def_delegator "STDOUT", "puts" # add delegation for STDOUT.puts() +# printer.puts "Howdy!" +# +# Also, SingleForwardable can be use to Class or Module. +# +# module Facade +# extend SingleForwardable +# def_delegator :Implementation, :service +# +# class Implementation +# def service... +# end +# end +# +# If you want to use both Forwardable and SingleForwardable, you can +# use methods def_instance_delegator and def_single_delegator, etc. +# +# If the object isn't a Module and Class, You can too extend +# Forwardable module. +# printer = String.new +# printer.extend Forwardable # prepare object for delegation +# printer.def_delegator "STDOUT", "puts" # add delegation for STDOUT.puts() +# printer.puts "Howdy!" +# +# Prints: +# +# Howdy! + +# +# The Forwardable module provides delegation of specified +# methods to a designated object, using the methods #def_delegator +# and #def_delegators. +# +# For example, say you have a class RecordCollection which +# contains an array @records. You could provide the lookup method +# #record_number(), which simply calls #[] on the @records +# array, like this: +# +# class RecordCollection +# extend Forwardable +# def_delegator :@records, :[], :record_number +# end +# +# Further, if you wish to provide the methods #size, #<<, and #map, +# all of which delegate to @records, this is how you can do it: +# +# class RecordCollection +# # extend Forwardable, but we did that above +# def_delegators :@records, :size, :<<, :map +# end +# f = Foo.new +# f.printf ... +# f.gets +# f.content_at(1) +# +# Also see the example at forwardable.rb. + +module Forwardable + FORWARDABLE_VERSION = "1.1.0" + + @debug = nil + class< accessor + # delegate [method, method, ...] => accessor + # + def instance_delegate(hash) + hash.each{ |methods, accessor| + methods = methods.to_s unless methods.respond_to?(:each) + methods.each{ |method| + def_instance_delegator(accessor, method) + } + } + end + + # + # Shortcut for defining multiple delegator methods, but with no + # provision for using a different name. The following two code + # samples have the same effect: + # + # def_delegators :@records, :size, :<<, :map + # + # def_delegator :@records, :size + # def_delegator :@records, :<< + # def_delegator :@records, :map + # + def def_instance_delegators(accessor, *methods) + methods.delete("__send__") + methods.delete("__id__") + for method in methods + def_instance_delegator(accessor, method) + end + end + + def def_instance_delegator(accessor, method, ali = method) + line_no = __LINE__; str = %{ + def #{ali}(*args, &block) + begin + #{accessor}.__send__(:#{method}, *args, &block) + rescue Exception + $@.delete_if{|s| %r"#{Regexp.quote(__FILE__)}"o =~ s} unless Forwardable::debug + ::Kernel::raise + end + end + } + # If it's not a class or module, it's an instance + begin + module_eval(str, __FILE__, line_no) + rescue + instance_eval(str, __FILE__, line_no) + end + + end + + alias delegate instance_delegate + alias def_delegators def_instance_delegators + alias def_delegator def_instance_delegator +end + +# +# Usage of The SingleForwardable is like Fowadable module. +# +module SingleForwardable + # Takes a hash as its argument. The key is a symbol or an array of + # symbols. These symbols correspond to method names. The value is + # the accessor to which the methods will be delegated. + # + # :call-seq: + # delegate method => accessor + # delegate [method, method, ...] => accessor + # + def single_delegate(hash) + hash.each{ |methods, accessor| + methods = methods.to_s unless methods.respond_to?(:each) + methods.each{ |method| + def_single_delegator(accessor, method) + } + } + end + + # + # Shortcut for defining multiple delegator methods, but with no + # provision for using a different name. The following two code + # samples have the same effect: + # + # def_delegators :@records, :size, :<<, :map + # + # def_delegator :@records, :size + # def_delegator :@records, :<< + # def_delegator :@records, :map + # + def def_single_delegators(accessor, *methods) + methods.delete("__send__") + methods.delete("__id__") + for method in methods + def_single_delegator(accessor, method) + end + end + + # + # Defines a method _method_ which delegates to _obj_ (i.e. it calls + # the method of the same name in _obj_). If _new_name_ is + # provided, it is used as the name for the delegate method. + # + def def_single_delegator(accessor, method, ali = method) + line_no = __LINE__; str = %{ + def #{ali}(*args, &block) + begin + #{accessor}.__send__(:#{method}, *args, &block) + rescue Exception + $@.delete_if{|s| %r"#{Regexp.quote(__FILE__)}"o =~ s} unless Forwardable::debug + ::Kernel::raise + end + end + } + + instance_eval(str, __FILE__, __LINE__) + end + + alias delegate single_delegate + alias def_delegators def_single_delegators + alias def_delegator def_single_delegator +end + + + + diff --git a/lib/getoptlong.rb b/lib/getoptlong.rb new file mode 100644 index 0000000..4cfb5fb --- /dev/null +++ b/lib/getoptlong.rb @@ -0,0 +1,610 @@ +# +# GetoptLong for Ruby +# +# Copyright (C) 1998, 1999, 2000 Motoyuki Kasahara. +# +# You may redistribute and/or modify this library under the same license +# terms as Ruby. +# +# See GetoptLong for documentation. +# +# Additional documents and the latest version of `getoptlong.rb' can be +# found at http://www.sra.co.jp/people/m-kasahr/ruby/getoptlong/ + +# The GetoptLong class allows you to parse command line options similarly to +# the GNU getopt_long() C library call. Note, however, that GetoptLong is a +# pure Ruby implementation. +# +# GetoptLong allows for POSIX-style options like --file as well +# as single letter options like -f +# +# The empty option -- (two minus symbols) is used to end option +# processing. This can be particularly important if options have optional +# arguments. +# +# Here is a simple example of usage: +# +# require 'getoptlong' +# require 'rdoc/usage' +# +# opts = GetoptLong.new( +# [ '--help', '-h', GetoptLong::NO_ARGUMENT ], +# [ '--repeat', '-n', GetoptLong::REQUIRED_ARGUMENT ], +# [ '--name', GetoptLong::OPTIONAL_ARGUMENT ] +# ) +# +# dir = nil +# name = nil +# repetitions = 1 +# opts.each do |opt, arg| +# case opt +# when '--help' +# puts <<-EOF +# hello [OPTION] ... DIR +# +# -h, --help: +# show help +# +# --repeat x, -n x: +# repeat x times +# +# --name [name]: +# greet user by name, if name not supplied default is John +# +# DIR: The directory in which to issue the greeting. +# EOF +# when '--repeat' +# repetitions = arg.to_i +# when '--name' +# if arg == '' +# name = 'John' +# else +# name = arg +# end +# end +# end +# +# if ARGV.length != 1 +# puts "Missing dir argument (try --help)" +# exit 0 +# end +# +# dir = ARGV.shift +# +# Dir.chdir(dir) +# for i in (1..repetitions) +# print "Hello" +# if name +# print ", #{name}" +# end +# puts +# end +# +# Example command line: +# +# hello -n 6 --name -- /tmp +# +class GetoptLong + # + # Orderings. + # + ORDERINGS = [REQUIRE_ORDER = 0, PERMUTE = 1, RETURN_IN_ORDER = 2] + + # + # Argument flags. + # + ARGUMENT_FLAGS = [NO_ARGUMENT = 0, REQUIRED_ARGUMENT = 1, + OPTIONAL_ARGUMENT = 2] + + # + # Status codes. + # + STATUS_YET, STATUS_STARTED, STATUS_TERMINATED = 0, 1, 2 + + # + # Error types. + # + class Error < StandardError; end + class AmbiguousOption < Error; end + class NeedlessArgument < Error; end + class MissingArgument < Error; end + class InvalidOption < Error; end + + # + # Set up option processing. + # + # The options to support are passed to new() as an array of arrays. + # Each sub-array contains any number of String option names which carry + # the same meaning, and one of the following flags: + # + # GetoptLong::NO_ARGUMENT :: Option does not take an argument. + # + # GetoptLong::REQUIRED_ARGUMENT :: Option always takes an argument. + # + # GetoptLong::OPTIONAL_ARGUMENT :: Option may or may not take an argument. + # + # The first option name is considered to be the preferred (canonical) name. + # Other than that, the elements of each sub-array can be in any order. + # + def initialize(*arguments) + # + # Current ordering. + # + if ENV.include?('POSIXLY_CORRECT') + @ordering = REQUIRE_ORDER + else + @ordering = PERMUTE + end + + # + # Hash table of option names. + # Keys of the table are option names, and their values are canonical + # names of the options. + # + @canonical_names = Hash.new + + # + # Hash table of argument flags. + # Keys of the table are option names, and their values are argument + # flags of the options. + # + @argument_flags = Hash.new + + # + # Whether error messages are output to $stderr. + # + @quiet = FALSE + + # + # Status code. + # + @status = STATUS_YET + + # + # Error code. + # + @error = nil + + # + # Error message. + # + @error_message = nil + + # + # Rest of catenated short options. + # + @rest_singles = '' + + # + # List of non-option-arguments. + # Append them to ARGV when option processing is terminated. + # + @non_option_arguments = Array.new + + if 0 < arguments.length + set_options(*arguments) + end + end + + # + # Set the handling of the ordering of options and arguments. + # A RuntimeError is raised if option processing has already started. + # + # The supplied value must be a member of GetoptLong::ORDERINGS. It alters + # the processing of options as follows: + # + # REQUIRE_ORDER : + # + # Options are required to occur before non-options. + # + # Processing of options ends as soon as a word is encountered that has not + # been preceded by an appropriate option flag. + # + # For example, if -a and -b are options which do not take arguments, + # parsing command line arguments of '-a one -b two' would result in + # 'one', '-b', 'two' being left in ARGV, and only ('-a', '') being + # processed as an option/arg pair. + # + # This is the default ordering, if the environment variable + # POSIXLY_CORRECT is set. (This is for compatibility with GNU getopt_long.) + # + # PERMUTE : + # + # Options can occur anywhere in the command line parsed. This is the + # default behavior. + # + # Every sequence of words which can be interpreted as an option (with or + # without argument) is treated as an option; non-option words are skipped. + # + # For example, if -a does not require an argument and -b optionally takes + # an argument, parsing '-a one -b two three' would result in ('-a','') and + # ('-b', 'two') being processed as option/arg pairs, and 'one','three' + # being left in ARGV. + # + # If the ordering is set to PERMUTE but the environment variable + # POSIXLY_CORRECT is set, REQUIRE_ORDER is used instead. This is for + # compatibility with GNU getopt_long. + # + # RETURN_IN_ORDER : + # + # All words on the command line are processed as options. Words not + # preceded by a short or long option flag are passed as arguments + # with an option of '' (empty string). + # + # For example, if -a requires an argument but -b does not, a command line + # of '-a one -b two three' would result in option/arg pairs of ('-a', 'one') + # ('-b', ''), ('', 'two'), ('', 'three') being processed. + # + def ordering=(ordering) + # + # The method is failed if option processing has already started. + # + if @status != STATUS_YET + set_error(ArgumentError, "argument error") + raise RuntimeError, + "invoke ordering=, but option processing has already started" + end + + # + # Check ordering. + # + if !ORDERINGS.include?(ordering) + raise ArgumentError, "invalid ordering `#{ordering}'" + end + if ordering == PERMUTE && ENV.include?('POSIXLY_CORRECT') + @ordering = REQUIRE_ORDER + else + @ordering = ordering + end + end + + # + # Return ordering. + # + attr_reader :ordering + + # + # Set options. Takes the same argument as GetoptLong.new. + # + # Raises a RuntimeError if option processing has already started. + # + def set_options(*arguments) + # + # The method is failed if option processing has already started. + # + if @status != STATUS_YET + raise RuntimeError, + "invoke set_options, but option processing has already started" + end + + # + # Clear tables of option names and argument flags. + # + @canonical_names.clear + @argument_flags.clear + + arguments.each do |*arg| + arg = arg.first # TODO: YARV Hack + # + # Find an argument flag and it set to `argument_flag'. + # + argument_flag = nil + arg.each do |i| + if ARGUMENT_FLAGS.include?(i) + if argument_flag != nil + raise ArgumentError, "too many argument-flags" + end + argument_flag = i + end + end + + raise ArgumentError, "no argument-flag" if argument_flag == nil + + canonical_name = nil + arg.each do |i| + # + # Check an option name. + # + next if i == argument_flag + begin + if !i.is_a?(String) || i !~ /^-([^-]|-.+)$/ + raise ArgumentError, "an invalid option `#{i}'" + end + if (@canonical_names.include?(i)) + raise ArgumentError, "option redefined `#{i}'" + end + rescue + @canonical_names.clear + @argument_flags.clear + raise + end + + # + # Register the option (`i') to the `@canonical_names' and + # `@canonical_names' Hashes. + # + if canonical_name == nil + canonical_name = i + end + @canonical_names[i] = canonical_name + @argument_flags[i] = argument_flag + end + raise ArgumentError, "no option name" if canonical_name == nil + end + return self + end + + # + # Set/Unset `quiet' mode. + # + attr_writer :quiet + + # + # Return the flag of `quiet' mode. + # + attr_reader :quiet + + # + # `quiet?' is an alias of `quiet'. + # + alias quiet? quiet + + # + # Explicitly terminate option processing. + # + def terminate + return nil if @status == STATUS_TERMINATED + raise RuntimeError, "an error has occured" if @error != nil + + @status = STATUS_TERMINATED + @non_option_arguments.reverse_each do |argument| + ARGV.unshift(argument) + end + + @canonical_names = nil + @argument_flags = nil + @rest_singles = nil + @non_option_arguments = nil + + return self + end + + # + # Returns true if option processing has terminated, false otherwise. + # + def terminated? + return @status == STATUS_TERMINATED + end + + # + # Set an error (a protected method). + # + def set_error(type, message) + $stderr.print("#{$0}: #{message}\n") if !@quiet + + @error = type + @error_message = message + @canonical_names = nil + @argument_flags = nil + @rest_singles = nil + @non_option_arguments = nil + + raise type, message + end + protected :set_error + + # + # Examine whether an option processing is failed. + # + attr_reader :error + + # + # `error?' is an alias of `error'. + # + alias error? error + + # Return the appropriate error message in POSIX-defined format. + # If no error has occurred, returns nil. + # + def error_message + return @error_message + end + + # + # Get next option name and its argument, as an Array of two elements. + # + # The option name is always converted to the first (preferred) + # name given in the original options to GetoptLong.new. + # + # Example: ['--option', 'value'] + # + # Returns nil if the processing is complete (as determined by + # STATUS_TERMINATED). + # + def get + option_name, option_argument = nil, '' + + # + # Check status. + # + return nil if @error != nil + case @status + when STATUS_YET + @status = STATUS_STARTED + when STATUS_TERMINATED + return nil + end + + # + # Get next option argument. + # + if 0 < @rest_singles.length + argument = '-' + @rest_singles + elsif (ARGV.length == 0) + terminate + return nil + elsif @ordering == PERMUTE + while 0 < ARGV.length && ARGV[0] !~ /^-./ + @non_option_arguments.push(ARGV.shift) + end + if ARGV.length == 0 + terminate + return nil + end + argument = ARGV.shift + elsif @ordering == REQUIRE_ORDER + if (ARGV[0] !~ /^-./) + terminate + return nil + end + argument = ARGV.shift + else + argument = ARGV.shift + end + + # + # Check the special argument `--'. + # `--' indicates the end of the option list. + # + if argument == '--' && @rest_singles.length == 0 + terminate + return nil + end + + # + # Check for long and short options. + # + if argument =~ /^(--[^=]+)/ && @rest_singles.length == 0 + # + # This is a long style option, which start with `--'. + # + pattern = $1 + if @canonical_names.include?(pattern) + option_name = pattern + else + # + # The option `option_name' is not registered in `@canonical_names'. + # It may be an abbreviated. + # + matches = [] + @canonical_names.each_key do |key| + if key.index(pattern) == 0 + option_name = key + matches << key + end + end + if 2 <= matches.length + set_error(AmbiguousOption, "option `#{argument}' is ambiguous between #{matches.join(', ')}") + elsif matches.length == 0 + set_error(InvalidOption, "unrecognized option `#{argument}'") + end + end + + # + # Check an argument to the option. + # + if @argument_flags[option_name] == REQUIRED_ARGUMENT + if argument =~ /=(.*)$/ + option_argument = $1 + elsif 0 < ARGV.length + option_argument = ARGV.shift + else + set_error(MissingArgument, + "option `#{argument}' requires an argument") + end + elsif @argument_flags[option_name] == OPTIONAL_ARGUMENT + if argument =~ /=(.*)$/ + option_argument = $1 + elsif 0 < ARGV.length && ARGV[0] !~ /^-./ + option_argument = ARGV.shift + else + option_argument = '' + end + elsif argument =~ /=(.*)$/ + set_error(NeedlessArgument, + "option `#{option_name}' doesn't allow an argument") + end + + elsif argument =~ /^(-(.))(.*)/ + # + # This is a short style option, which start with `-' (not `--'). + # Short options may be catenated (e.g. `-l -g' is equivalent to + # `-lg'). + # + option_name, ch, @rest_singles = $1, $2, $3 + + if @canonical_names.include?(option_name) + # + # The option `option_name' is found in `@canonical_names'. + # Check its argument. + # + if @argument_flags[option_name] == REQUIRED_ARGUMENT + if 0 < @rest_singles.length + option_argument = @rest_singles + @rest_singles = '' + elsif 0 < ARGV.length + option_argument = ARGV.shift + else + # 1003.2 specifies the format of this message. + set_error(MissingArgument, "option requires an argument -- #{ch}") + end + elsif @argument_flags[option_name] == OPTIONAL_ARGUMENT + if 0 < @rest_singles.length + option_argument = @rest_singles + @rest_singles = '' + elsif 0 < ARGV.length && ARGV[0] !~ /^-./ + option_argument = ARGV.shift + else + option_argument = '' + end + end + else + # + # This is an invalid option. + # 1003.2 specifies the format of this message. + # + if ENV.include?('POSIXLY_CORRECT') + set_error(InvalidOption, "invalid option -- #{ch}") + else + set_error(InvalidOption, "invalid option -- #{ch}") + end + end + else + # + # This is a non-option argument. + # Only RETURN_IN_ORDER falled into here. + # + return '', argument + end + + return @canonical_names[option_name], option_argument + end + + # + # `get_option' is an alias of `get'. + # + alias get_option get + + # Iterator version of `get'. + # + # The block is called repeatedly with two arguments: + # The first is the option name. + # The second is the argument which followed it (if any). + # Example: ('--opt', 'value') + # + # The option name is always converted to the first (preferred) + # name given in the original options to GetoptLong.new. + # + def each + loop do + option_name, option_argument = get_option + break if option_name == nil + yield option_name, option_argument + end + end + + # + # `each_option' is an alias of `each'. + # + alias each_option each +end diff --git a/lib/gserver.rb b/lib/gserver.rb new file mode 100644 index 0000000..592e866 --- /dev/null +++ b/lib/gserver.rb @@ -0,0 +1,253 @@ +# +# Copyright (C) 2001 John W. Small All Rights Reserved +# +# Author:: John W. Small +# Documentation:: Gavin Sinclair +# Licence:: Freeware. +# +# See the class GServer for documentation. +# + +require "socket" +require "thread" + +# +# GServer implements a generic server, featuring thread pool management, +# simple logging, and multi-server management. See HttpServer in +# xmlrpc/httpserver.rb in the Ruby standard library for an example of +# GServer in action. +# +# Any kind of application-level server can be implemented using this class. +# It accepts multiple simultaneous connections from clients, up to an optional +# maximum number. Several _services_ (i.e. one service per TCP port) can be +# run simultaneously, and stopped at any time through the class method +# GServer.stop(port). All the threading issues are handled, saving +# you the effort. All events are optionally logged, but you can provide your +# own event handlers if you wish. +# +# === Example +# +# Using GServer is simple. Below we implement a simple time server, run it, +# query it, and shut it down. Try this code in +irb+: +# +# require 'gserver' +# +# # +# # A server that returns the time in seconds since 1970. +# # +# class TimeServer < GServer +# def initialize(port=10001, *args) +# super(port, *args) +# end +# def serve(io) +# io.puts(Time.now.to_s) +# end +# end +# +# # Run the server with logging enabled (it's a separate thread). +# server = TimeServer.new +# server.audit = true # Turn logging on. +# server.start +# +# # *** Now point your browser to http://localhost:10001 to see it working *** +# +# # See if it's still running. +# GServer.in_service?(10001) # -> true +# server.stopped? # -> false +# +# # Shut the server down gracefully. +# server.shutdown +# +# # Alternatively, stop it immediately. +# GServer.stop(10001) +# # or, of course, "server.stop". +# +# All the business of accepting connections and exception handling is taken +# care of. All we have to do is implement the method that actually serves the +# client. +# +# === Advanced +# +# As the example above shows, the way to use GServer is to subclass it to +# create a specific server, overriding the +serve+ method. You can override +# other methods as well if you wish, perhaps to collect statistics, or emit +# more detailed logging. +# +# connecting +# disconnecting +# starting +# stopping +# +# The above methods are only called if auditing is enabled. +# +# You can also override +log+ and +error+ if, for example, you wish to use a +# more sophisticated logging system. +# +class GServer + + DEFAULT_HOST = "127.0.0.1" + + def serve(io) + end + + @@services = {} # Hash of opened ports, i.e. services + @@servicesMutex = Mutex.new + + def GServer.stop(port, host = DEFAULT_HOST) + @@servicesMutex.synchronize { + @@services[host][port].stop + } + end + + def GServer.in_service?(port, host = DEFAULT_HOST) + @@services.has_key?(host) and + @@services[host].has_key?(port) + end + + def stop + @connectionsMutex.synchronize { + if @tcpServerThread + @tcpServerThread.raise "stop" + end + } + end + + def stopped? + @tcpServerThread == nil + end + + def shutdown + @shutdown = true + end + + def connections + @connections.size + end + + def join + @tcpServerThread.join if @tcpServerThread + end + + attr_reader :port, :host, :maxConnections + attr_accessor :stdlog, :audit, :debug + + def connecting(client) + addr = client.peeraddr + log("#{self.class.to_s} #{@host}:#{@port} client:#{addr[1]} " + + "#{addr[2]}<#{addr[3]}> connect") + true + end + + def disconnecting(clientPort) + log("#{self.class.to_s} #{@host}:#{@port} " + + "client:#{clientPort} disconnect") + end + + protected :connecting, :disconnecting + + def starting() + log("#{self.class.to_s} #{@host}:#{@port} start") + end + + def stopping() + log("#{self.class.to_s} #{@host}:#{@port} stop") + end + + protected :starting, :stopping + + def error(detail) + log(detail.backtrace.join("\n")) + end + + def log(msg) + if @stdlog + @stdlog.puts("[#{Time.new.ctime}] %s" % msg) + @stdlog.flush + end + end + + protected :error, :log + + def initialize(port, host = DEFAULT_HOST, maxConnections = 4, + stdlog = $stderr, audit = false, debug = false) + @tcpServerThread = nil + @port = port + @host = host + @maxConnections = maxConnections + @connections = [] + @connectionsMutex = Mutex.new + @connectionsCV = ConditionVariable.new + @stdlog = stdlog + @audit = audit + @debug = debug + end + + def start(maxConnections = -1) + raise "running" if !stopped? + @shutdown = false + @maxConnections = maxConnections if maxConnections > 0 + @@servicesMutex.synchronize { + if GServer.in_service?(@port,@host) + raise "Port already in use: #{host}:#{@port}!" + end + @tcpServer = TCPServer.new(@host,@port) + @port = @tcpServer.addr[1] + @@services[@host] = {} unless @@services.has_key?(@host) + @@services[@host][@port] = self; + } + @tcpServerThread = Thread.new { + begin + starting if @audit + while !@shutdown + @connectionsMutex.synchronize { + while @connections.size >= @maxConnections + @connectionsCV.wait(@connectionsMutex) + end + } + client = @tcpServer.accept + @connections << Thread.new(client) { |myClient| + begin + myPort = myClient.peeraddr[1] + serve(myClient) if !@audit or connecting(myClient) + rescue => detail + error(detail) if @debug + ensure + begin + myClient.close + rescue + end + @connectionsMutex.synchronize { + @connections.delete(Thread.current) + @connectionsCV.signal + } + disconnecting(myPort) if @audit + end + } + end + rescue => detail + error(detail) if @debug + ensure + begin + @tcpServer.close + rescue + end + if @shutdown + @connectionsMutex.synchronize { + while @connections.size > 0 + @connectionsCV.wait(@connectionsMutex) + end + } + else + @connections.each { |c| c.raise "stop" } + end + @tcpServerThread = nil + @@servicesMutex.synchronize { + @@services[@host].delete(@port) + } + stopping if @audit + end + } + self + end + +end diff --git a/lib/ipaddr.rb b/lib/ipaddr.rb new file mode 100644 index 0000000..a62cc46 --- /dev/null +++ b/lib/ipaddr.rb @@ -0,0 +1,813 @@ +# +# ipaddr.rb - A class to manipulate an IP address +# +# Copyright (c) 2002 Hajimu UMEMOTO . +# Copyright (c) 2007 Akinori MUSHA . +# All rights reserved. +# +# You can redistribute and/or modify it under the same terms as Ruby. +# +# $Id: ipaddr.rb 19504 2008-09-23 21:39:21Z ryan $ +# +# Contact: +# - Akinori MUSHA (current maintainer) +# +# TODO: +# - scope_id support +# +require 'socket' + +unless Socket.const_defined? "AF_INET6" + class Socket + AF_INET6 = Object.new + end + + class << IPSocket + def valid_v4?(addr) + if /\A(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\Z/ =~ addr + return $~.captures.all? {|i| i.to_i < 256} + end + return false + end + + def valid_v6?(addr) + # IPv6 (normal) + return true if /\A[\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*\Z/ =~ addr + return true if /\A[\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*::([\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*)?\Z/ =~ addr + return true if /\A::([\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*)?\Z/ =~ addr + # IPv6 (IPv4 compat) + return true if /\A[\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*:/ =~ addr && valid_v4?($') + return true if /\A[\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*::([\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*:)?/ =~ addr && valid_v4?($') + return true if /\A::([\dA-Fa-f]{1,4}(:[\dA-Fa-f]{1,4})*:)?/ =~ addr && valid_v4?($') + + false + end + + def valid?(addr) + valid_v4?(addr) || valid_v6?(addr) + end + + alias getaddress_orig getaddress + def getaddress(s) + if valid?(s) + s + elsif /\A[-A-Za-z\d.]+\Z/ =~ s + getaddress_orig(s) + else + raise ArgumentError, "invalid address" + end + end + end +end + +# IPAddr provides a set of methods to manipulate an IP address. Both IPv4 and +# IPv6 are supported. +# +# == Example +# +# require 'ipaddr' +# +# ipaddr1 = IPAddr.new "3ffe:505:2::1" +# +# p ipaddr1 #=> # +# +# p ipaddr1.to_s #=> "3ffe:505:2::1" +# +# ipaddr2 = ipaddr1.mask(48) #=> # +# +# p ipaddr2.to_s #=> "3ffe:505:2::" +# +# ipaddr3 = IPAddr.new "192.168.2.0/24" +# +# p ipaddr3 #=> # + +class IPAddr + + IN4MASK = 0xffffffff + IN6MASK = 0xffffffffffffffffffffffffffffffff + IN6FORMAT = (["%.4x"] * 8).join(':') + + # Returns the address family of this IP address. + attr_reader :family + + # Creates a new ipaddr containing the given network byte ordered + # string form of an IP address. + def IPAddr::new_ntoh(addr) + return IPAddr.new(IPAddr::ntop(addr)) + end + + # Convert a network byte ordered string form of an IP address into + # human readable form. + def IPAddr::ntop(addr) + case addr.size + when 4 + s = addr.unpack('C4').join('.') + when 16 + s = IN6FORMAT % addr.unpack('n8') + else + raise ArgumentError, "unsupported address family" + end + return s + end + + # Returns a new ipaddr built by bitwise AND. + def &(other) + return self.clone.set(@addr & coerce_other(other).to_i) + end + + # Returns a new ipaddr built by bitwise OR. + def |(other) + return self.clone.set(@addr | coerce_other(other).to_i) + end + + # Returns a new ipaddr built by bitwise right-shift. + def >>(num) + return self.clone.set(@addr >> num) + end + + # Returns a new ipaddr built by bitwise left shift. + def <<(num) + return self.clone.set(addr_mask(@addr << num)) + end + + # Returns a new ipaddr built by bitwise negation. + def ~ + return self.clone.set(addr_mask(~@addr)) + end + + # Returns true if two ipaddrs are equal. + def ==(other) + other = coerce_other(other) + return @family == other.family && @addr == other.to_i + end + + # Returns a new ipaddr built by masking IP address with the given + # prefixlen/netmask. (e.g. 8, 64, "255.255.255.0", etc.) + def mask(prefixlen) + return self.clone.mask!(prefixlen) + end + + # Returns true if the given ipaddr is in the range. + # + # e.g.: + # require 'ipaddr' + # net1 = IPAddr.new("192.168.2.0/24") + # net2 = IPAddr.new("192.168.2.100") + # net3 = IPAddr.new("192.168.3.0") + # p net1.include?(net2) #=> true + # p net1.include?(net3) #=> false + def include?(other) + other = coerce_other(other) + if ipv4_mapped? + if (@mask_addr >> 32) != 0xffffffffffffffffffffffff + return false + end + mask_addr = (@mask_addr & IN4MASK) + addr = (@addr & IN4MASK) + family = Socket::AF_INET + else + mask_addr = @mask_addr + addr = @addr + family = @family + end + if other.ipv4_mapped? + other_addr = (other.to_i & IN4MASK) + other_family = Socket::AF_INET + else + other_addr = other.to_i + other_family = other.family + end + + if family != other_family + return false + end + return ((addr & mask_addr) == (other_addr & mask_addr)) + end + alias === include? + + # Returns the integer representation of the ipaddr. + def to_i + return @addr + end + + # Returns a string containing the IP address representation. + def to_s + str = to_string + return str if ipv4? + + str.gsub!(/\b0{1,3}([\da-f]+)\b/i, '\1') + loop do + break if str.sub!(/\A0:0:0:0:0:0:0:0\Z/, '::') + break if str.sub!(/\b0:0:0:0:0:0:0\b/, ':') + break if str.sub!(/\b0:0:0:0:0:0\b/, ':') + break if str.sub!(/\b0:0:0:0:0\b/, ':') + break if str.sub!(/\b0:0:0:0\b/, ':') + break if str.sub!(/\b0:0:0\b/, ':') + break if str.sub!(/\b0:0\b/, ':') + break + end + str.sub!(/:{3,}/, '::') + + if /\A::(ffff:)?([\da-f]{1,4}):([\da-f]{1,4})\Z/i =~ str + str = sprintf('::%s%d.%d.%d.%d', $1, $2.hex / 256, $2.hex % 256, $3.hex / 256, $3.hex % 256) + end + + str + end + + # Returns a string containing the IP address representation in + # canonical form. + def to_string + return _to_string(@addr) + end + + # Returns a network byte ordered string form of the IP address. + def hton + case @family + when Socket::AF_INET + return [@addr].pack('N') + when Socket::AF_INET6 + return (0..7).map { |i| + (@addr >> (112 - 16 * i)) & 0xffff + }.pack('n8') + else + raise "unsupported address family" + end + end + + # Returns true if the ipaddr is an IPv4 address. + def ipv4? + return @family == Socket::AF_INET + end + + # Returns true if the ipaddr is an IPv6 address. + def ipv6? + return @family == Socket::AF_INET6 + end + + # Returns true if the ipaddr is an IPv4-mapped IPv6 address. + def ipv4_mapped? + return ipv6? && (@addr >> 32) == 0xffff + end + + # Returns true if the ipaddr is an IPv4-compatible IPv6 address. + def ipv4_compat? + if !ipv6? || (@addr >> 32) != 0 + return false + end + a = (@addr & IN4MASK) + return a != 0 && a != 1 + end + + # Returns a new ipaddr built by converting the native IPv4 address + # into an IPv4-mapped IPv6 address. + def ipv4_mapped + if !ipv4? + raise ArgumentError, "not an IPv4 address" + end + return self.clone.set(@addr | 0xffff00000000, Socket::AF_INET6) + end + + # Returns a new ipaddr built by converting the native IPv4 address + # into an IPv4-compatible IPv6 address. + def ipv4_compat + if !ipv4? + raise ArgumentError, "not an IPv4 address" + end + return self.clone.set(@addr, Socket::AF_INET6) + end + + # Returns a new ipaddr built by converting the IPv6 address into a + # native IPv4 address. If the IP address is not an IPv4-mapped or + # IPv4-compatible IPv6 address, returns self. + def native + if !ipv4_mapped? && !ipv4_compat? + return self + end + return self.clone.set(@addr & IN4MASK, Socket::AF_INET) + end + + # Returns a string for DNS reverse lookup. It returns a string in + # RFC3172 form for an IPv6 address. + def reverse + case @family + when Socket::AF_INET + return _reverse + ".in-addr.arpa" + when Socket::AF_INET6 + return ip6_arpa + else + raise "unsupported address family" + end + end + + # Returns a string for DNS reverse lookup compatible with RFC3172. + def ip6_arpa + if !ipv6? + raise ArgumentError, "not an IPv6 address" + end + return _reverse + ".ip6.arpa" + end + + # Returns a string for DNS reverse lookup compatible with RFC1886. + def ip6_int + if !ipv6? + raise ArgumentError, "not an IPv6 address" + end + return _reverse + ".ip6.int" + end + + # Returns the successor to the ipaddr. + def succ + return self.clone.set(@addr + 1, @family) + end + + # Compares the ipaddr with another. + def <=>(other) + other = coerce_other(other) + + return nil if other.family != @family + + return @addr <=> other.to_i + end + include Comparable + + # Creates a Range object for the network address. + def to_range + begin_addr = (@addr & @mask_addr) + + case @family + when Socket::AF_INET + end_addr = (@addr | (IN4MASK ^ @mask_addr)) + when Socket::AF_INET6 + end_addr = (@addr | (IN6MASK ^ @mask_addr)) + else + raise "unsupported address family" + end + + return clone.set(begin_addr, @family)..clone.set(end_addr, @family) + end + + # Returns a string containing a human-readable representation of the + # ipaddr. ("#") + def inspect + case @family + when Socket::AF_INET + af = "IPv4" + when Socket::AF_INET6 + af = "IPv6" + else + raise "unsupported address family" + end + return sprintf("#<%s: %s:%s/%s>", self.class.name, + af, _to_string(@addr), _to_string(@mask_addr)) + end + + protected + + def set(addr, *family) + case family[0] ? family[0] : @family + when Socket::AF_INET + if addr < 0 || addr > IN4MASK + raise ArgumentError, "invalid address" + end + when Socket::AF_INET6 + if addr < 0 || addr > IN6MASK + raise ArgumentError, "invalid address" + end + else + raise ArgumentError, "unsupported address family" + end + @addr = addr + if family[0] + @family = family[0] + end + return self + end + + def mask!(mask) + if mask.kind_of?(String) + if mask =~ /^\d+$/ + prefixlen = mask.to_i + else + m = IPAddr.new(mask) + if m.family != @family + raise ArgumentError, "address family is not same" + end + @mask_addr = m.to_i + @addr &= @mask_addr + return self + end + else + prefixlen = mask + end + case @family + when Socket::AF_INET + if prefixlen < 0 || prefixlen > 32 + raise ArgumentError, "invalid length" + end + masklen = 32 - prefixlen + @mask_addr = ((IN4MASK >> masklen) << masklen) + when Socket::AF_INET6 + if prefixlen < 0 || prefixlen > 128 + raise ArgumentError, "invalid length" + end + masklen = 128 - prefixlen + @mask_addr = ((IN6MASK >> masklen) << masklen) + else + raise "unsupported address family" + end + @addr = ((@addr >> masklen) << masklen) + return self + end + + private + + # Creates a new ipaddr object either from a human readable IP + # address representation in string, or from a packed in_addr value + # followed by an address family. + # + # In the former case, the following are the valid formats that will + # be recognized: "address", "address/prefixlen" and "address/mask", + # where IPv6 address may be enclosed in square brackets (`[' and + # `]'). If a prefixlen or a mask is specified, it returns a masked + # IP address. Although the address family is determined + # automatically from a specified string, you can specify one + # explicitly by the optional second argument. + # + # Otherwise an IP addess is generated from a packed in_addr value + # and an address family. + # + # The IPAddr class defines many methods and operators, and some of + # those, such as &, |, include? and ==, accept a string, or a packed + # in_addr value instead of an IPAddr object. + def initialize(addr = '::', family = Socket::AF_UNSPEC) + if !addr.kind_of?(String) + case family + when Socket::AF_INET, Socket::AF_INET6 + set(addr.to_i, family) + @mask_addr = (family == Socket::AF_INET) ? IN4MASK : IN6MASK + return + when Socket::AF_UNSPEC + raise ArgumentError, "address family must be specified" + else + raise ArgumentError, "unsupported address family: #{family}" + end + end + prefix, prefixlen = addr.split('/') + if prefix =~ /^\[(.*)\]$/i + prefix = $1 + family = Socket::AF_INET6 + end + # It seems AI_NUMERICHOST doesn't do the job. + #Socket.getaddrinfo(left, nil, Socket::AF_INET6, Socket::SOCK_STREAM, nil, + # Socket::AI_NUMERICHOST) + begin + IPSocket.getaddress(prefix) # test if address is vaild + rescue + raise ArgumentError, "invalid address" + end + @addr = @family = nil + if family == Socket::AF_UNSPEC || family == Socket::AF_INET + @addr = in_addr(prefix) + if @addr + @family = Socket::AF_INET + end + end + if !@addr && (family == Socket::AF_UNSPEC || family == Socket::AF_INET6) + @addr = in6_addr(prefix) + @family = Socket::AF_INET6 + end + if family != Socket::AF_UNSPEC && @family != family + raise ArgumentError, "address family mismatch" + end + if prefixlen + mask!(prefixlen) + else + @mask_addr = (@family == Socket::AF_INET) ? IN4MASK : IN6MASK + end + end + + def coerce_other(other) + case other + when IPAddr + other + when String + self.class.new(other) + else + self.class.new(other, @family) + end + end + + def in_addr(addr) + if addr =~ /^\d+\.\d+\.\d+\.\d+$/ + return addr.split('.').inject(0) { |i, s| + i << 8 | s.to_i + } + end + return nil + end + + def in6_addr(left) + case left + when /^::ffff:(\d+\.\d+\.\d+\.\d+)$/i + return in_addr($1) + 0xffff00000000 + when /^::(\d+\.\d+\.\d+\.\d+)$/i + return in_addr($1) + when /[^0-9a-f:]/i + raise ArgumentError, "invalid address" + when /^(.*)::(.*)$/ + left, right = $1, $2 + else + right = '' + end + l = left.split(':') + r = right.split(':') + rest = 8 - l.size - r.size + if rest < 0 + return nil + end + return (l + Array.new(rest, '0') + r).inject(0) { |i, s| + i << 16 | s.hex + } + end + + def addr_mask(addr) + case @family + when Socket::AF_INET + return addr & IN4MASK + when Socket::AF_INET6 + return addr & IN6MASK + else + raise "unsupported address family" + end + end + + def _reverse + case @family + when Socket::AF_INET + return (0..3).map { |i| + (@addr >> (8 * i)) & 0xff + }.join('.') + when Socket::AF_INET6 + return ("%.32x" % @addr).reverse!.gsub!(/.(?!$)/, '\&.') + else + raise "unsupported address family" + end + end + + def _to_string(addr) + case @family + when Socket::AF_INET + return (0..3).map { |i| + (addr >> (24 - 8 * i)) & 0xff + }.join('.') + when Socket::AF_INET6 + return (("%.32x" % addr).gsub!(/.{4}(?!$)/, '\&:')) + else + raise "unsupported address family" + end + end + +end + +if $0 == __FILE__ + eval DATA.read, nil, $0, __LINE__+4 +end + +__END__ + +require 'test/unit' + +class TC_IPAddr < Test::Unit::TestCase + def test_s_new + assert_nothing_raised { + IPAddr.new("3FFE:505:ffff::/48") + IPAddr.new("0:0:0:1::") + IPAddr.new("2001:200:300::/48") + } + + a = IPAddr.new + assert_equal("::", a.to_s) + assert_equal("0000:0000:0000:0000:0000:0000:0000:0000", a.to_string) + assert_equal(Socket::AF_INET6, a.family) + + a = IPAddr.new("0123:4567:89ab:cdef:0ABC:DEF0:1234:5678") + assert_equal("123:4567:89ab:cdef:abc:def0:1234:5678", a.to_s) + assert_equal("0123:4567:89ab:cdef:0abc:def0:1234:5678", a.to_string) + assert_equal(Socket::AF_INET6, a.family) + + a = IPAddr.new("3ffe:505:2::/48") + assert_equal("3ffe:505:2::", a.to_s) + assert_equal("3ffe:0505:0002:0000:0000:0000:0000:0000", a.to_string) + assert_equal(Socket::AF_INET6, a.family) + assert_equal(false, a.ipv4?) + assert_equal(true, a.ipv6?) + assert_equal("#", a.inspect) + + a = IPAddr.new("3ffe:505:2::/ffff:ffff:ffff::") + assert_equal("3ffe:505:2::", a.to_s) + assert_equal("3ffe:0505:0002:0000:0000:0000:0000:0000", a.to_string) + assert_equal(Socket::AF_INET6, a.family) + + a = IPAddr.new("0.0.0.0") + assert_equal("0.0.0.0", a.to_s) + assert_equal("0.0.0.0", a.to_string) + assert_equal(Socket::AF_INET, a.family) + + a = IPAddr.new("192.168.1.2") + assert_equal("192.168.1.2", a.to_s) + assert_equal("192.168.1.2", a.to_string) + assert_equal(Socket::AF_INET, a.family) + assert_equal(true, a.ipv4?) + assert_equal(false, a.ipv6?) + + a = IPAddr.new("192.168.1.2/24") + assert_equal("192.168.1.0", a.to_s) + assert_equal("192.168.1.0", a.to_string) + assert_equal(Socket::AF_INET, a.family) + assert_equal("#", a.inspect) + + a = IPAddr.new("192.168.1.2/255.255.255.0") + assert_equal("192.168.1.0", a.to_s) + assert_equal("192.168.1.0", a.to_string) + assert_equal(Socket::AF_INET, a.family) + + assert_equal("0:0:0:1::", IPAddr.new("0:0:0:1::").to_s) + assert_equal("2001:200:300::", IPAddr.new("2001:200:300::/48").to_s) + + assert_equal("2001:200:300::", IPAddr.new("[2001:200:300::]/48").to_s) + + [ + ["fe80::1%fxp0"], + ["::1/255.255.255.0"], + ["::1:192.168.1.2/120"], + [IPAddr.new("::1").to_i], + ["::ffff:192.168.1.2/120", Socket::AF_INET], + ["[192.168.1.2]/120"], + ].each { |args| + assert_raises(ArgumentError) { + IPAddr.new(*args) + } + } + end + + def test_s_new_ntoh + addr = '' + IPAddr.new("1234:5678:9abc:def0:1234:5678:9abc:def0").hton.each_byte { |c| + addr += sprintf("%02x", c) + } + assert_equal("123456789abcdef0123456789abcdef0", addr) + addr = '' + IPAddr.new("123.45.67.89").hton.each_byte { |c| + addr += sprintf("%02x", c) + } + assert_equal(sprintf("%02x%02x%02x%02x", 123, 45, 67, 89), addr) + a = IPAddr.new("3ffe:505:2::") + assert_equal("3ffe:505:2::", IPAddr.new_ntoh(a.hton).to_s) + a = IPAddr.new("192.168.2.1") + assert_equal("192.168.2.1", IPAddr.new_ntoh(a.hton).to_s) + end + + def test_ipv4_compat + a = IPAddr.new("::192.168.1.2") + assert_equal("::192.168.1.2", a.to_s) + assert_equal("0000:0000:0000:0000:0000:0000:c0a8:0102", a.to_string) + assert_equal(Socket::AF_INET6, a.family) + assert_equal(true, a.ipv4_compat?) + b = a.native + assert_equal("192.168.1.2", b.to_s) + assert_equal(Socket::AF_INET, b.family) + assert_equal(false, b.ipv4_compat?) + + a = IPAddr.new("192.168.1.2") + b = a.ipv4_compat + assert_equal("::192.168.1.2", b.to_s) + assert_equal(Socket::AF_INET6, b.family) + end + + def test_ipv4_mapped + a = IPAddr.new("::ffff:192.168.1.2") + assert_equal("::ffff:192.168.1.2", a.to_s) + assert_equal("0000:0000:0000:0000:0000:ffff:c0a8:0102", a.to_string) + assert_equal(Socket::AF_INET6, a.family) + assert_equal(true, a.ipv4_mapped?) + b = a.native + assert_equal("192.168.1.2", b.to_s) + assert_equal(Socket::AF_INET, b.family) + assert_equal(false, b.ipv4_mapped?) + + a = IPAddr.new("192.168.1.2") + b = a.ipv4_mapped + assert_equal("::ffff:192.168.1.2", b.to_s) + assert_equal(Socket::AF_INET6, b.family) + end + + def test_reverse + assert_equal("f.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.2.0.0.0.5.0.5.0.e.f.f.3.ip6.arpa", IPAddr.new("3ffe:505:2::f").reverse) + assert_equal("1.2.168.192.in-addr.arpa", IPAddr.new("192.168.2.1").reverse) + end + + def test_ip6_arpa + assert_equal("f.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.2.0.0.0.5.0.5.0.e.f.f.3.ip6.arpa", IPAddr.new("3ffe:505:2::f").ip6_arpa) + assert_raises(ArgumentError) { + IPAddr.new("192.168.2.1").ip6_arpa + } + end + + def test_ip6_int + assert_equal("f.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.2.0.0.0.5.0.5.0.e.f.f.3.ip6.int", IPAddr.new("3ffe:505:2::f").ip6_int) + assert_raises(ArgumentError) { + IPAddr.new("192.168.2.1").ip6_int + } + end + + def test_to_s + assert_equal("3ffe:0505:0002:0000:0000:0000:0000:0001", IPAddr.new("3ffe:505:2::1").to_string) + assert_equal("3ffe:505:2::1", IPAddr.new("3ffe:505:2::1").to_s) + end +end + +class TC_Operator < Test::Unit::TestCase + + IN6MASK32 = "ffff:ffff::" + IN6MASK128 = "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" + + def setup + @in6_addr_any = IPAddr.new() + @a = IPAddr.new("3ffe:505:2::/48") + @b = IPAddr.new("0:0:0:1::") + @c = IPAddr.new(IN6MASK32) + end + alias set_up setup + + def test_or + assert_equal("3ffe:505:2:1::", (@a | @b).to_s) + a = @a + a |= @b + assert_equal("3ffe:505:2:1::", a.to_s) + assert_equal("3ffe:505:2::", @a.to_s) + assert_equal("3ffe:505:2:1::", + (@a | 0x00000000000000010000000000000000).to_s) + end + + def test_and + assert_equal("3ffe:505::", (@a & @c).to_s) + a = @a + a &= @c + assert_equal("3ffe:505::", a.to_s) + assert_equal("3ffe:505:2::", @a.to_s) + assert_equal("3ffe:505::", (@a & 0xffffffff000000000000000000000000).to_s) + end + + def test_shift_right + assert_equal("0:3ffe:505:2::", (@a >> 16).to_s) + a = @a + a >>= 16 + assert_equal("0:3ffe:505:2::", a.to_s) + assert_equal("3ffe:505:2::", @a.to_s) + end + + def test_shift_left + assert_equal("505:2::", (@a << 16).to_s) + a = @a + a <<= 16 + assert_equal("505:2::", a.to_s) + assert_equal("3ffe:505:2::", @a.to_s) + end + + def test_carrot + a = ~@in6_addr_any + assert_equal(IN6MASK128, a.to_s) + assert_equal("::", @in6_addr_any.to_s) + end + + def test_equal + assert_equal(true, @a == IPAddr.new("3ffe:505:2::")) + assert_equal(false, @a == IPAddr.new("3ffe:505:3::")) + assert_equal(true, @a != IPAddr.new("3ffe:505:3::")) + assert_equal(false, @a != IPAddr.new("3ffe:505:2::")) + end + + def test_mask + a = @a.mask(32) + assert_equal("3ffe:505::", a.to_s) + assert_equal("3ffe:505:2::", @a.to_s) + end + + def test_include? + assert_equal(true, @a.include?(IPAddr.new("3ffe:505:2::"))) + assert_equal(true, @a.include?(IPAddr.new("3ffe:505:2::1"))) + assert_equal(false, @a.include?(IPAddr.new("3ffe:505:3::"))) + net1 = IPAddr.new("192.168.2.0/24") + assert_equal(true, net1.include?(IPAddr.new("192.168.2.0"))) + assert_equal(true, net1.include?(IPAddr.new("192.168.2.255"))) + assert_equal(false, net1.include?(IPAddr.new("192.168.3.0"))) + # test with integer parameter + int = (192 << 24) + (168 << 16) + (2 << 8) + 13 + + assert_equal(true, net1.include?(int)) + assert_equal(false, net1.include?(int+255)) + + end + +end diff --git a/lib/irb.rb b/lib/irb.rb new file mode 100644 index 0000000..3ca5ff8 --- /dev/null +++ b/lib/irb.rb @@ -0,0 +1,354 @@ +# +# irb.rb - irb main module +# $Release Version: 0.9.5 $ +# $Revision: 24294 $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +# -- +# +# +# +require "e2mmap" + +require "irb/init" +require "irb/context" +require "irb/extend-command" +#require "irb/workspace" + +require "irb/ruby-lex" +require "irb/input-method" +require "irb/locale" + +STDOUT.sync = true + +module IRB + @RCS_ID='-$Id: irb.rb 24294 2009-07-26 15:33:29Z yugui $-' + + class Abort < Exception;end + + # + @CONF = {} + + def IRB.conf + @CONF + end + + # IRB version method + def IRB.version + if v = @CONF[:VERSION] then return v end + + require "irb/version" + rv = @RELEASE_VERSION.sub(/\.0/, "") + @CONF[:VERSION] = format("irb %s(%s)", rv, @LAST_UPDATE_DATE) + end + + def IRB.CurrentContext + IRB.conf[:MAIN_CONTEXT] + end + + # initialize IRB and start TOP_LEVEL irb + def IRB.start(ap_path = nil) + $0 = File::basename(ap_path, ".rb") if ap_path + + IRB.setup(ap_path) + + if @CONF[:SCRIPT] + irb = Irb.new(nil, @CONF[:SCRIPT]) + else + irb = Irb.new + end + + @CONF[:IRB_RC].call(irb.context) if @CONF[:IRB_RC] + @CONF[:MAIN_CONTEXT] = irb.context + + trap("SIGINT") do + irb.signal_handle + end + + begin + catch(:IRB_EXIT) do + irb.eval_input + end + ensure + irb_at_exit + end +# print "\n" + end + + def IRB.irb_at_exit + @CONF[:AT_EXIT].each{|hook| hook.call} + end + + def IRB.irb_exit(irb, ret) + throw :IRB_EXIT, ret + end + + def IRB.irb_abort(irb, exception = Abort) + if defined? Thread + irb.context.thread.raise exception, "abort then interrupt!!" + else + raise exception, "abort then interrupt!!" + end + end + + # + # irb interpreter main routine + # + class Irb + def initialize(workspace = nil, input_method = nil, output_method = nil) + @context = Context.new(self, workspace, input_method, output_method) + @context.main.extend ExtendCommandBundle + @signal_status = :IN_IRB + + @scanner = RubyLex.new + @scanner.exception_on_syntax_error = false + end + attr_reader :context + attr_accessor :scanner + + def eval_input + @scanner.set_prompt do + |ltype, indent, continue, line_no| + if ltype + f = @context.prompt_s + elsif continue + f = @context.prompt_c + elsif indent > 0 + f = @context.prompt_n + else + f = @context.prompt_i + end + f = "" unless f + if @context.prompting? + @context.io.prompt = p = prompt(f, ltype, indent, line_no) + else + @context.io.prompt = p = "" + end + if @context.auto_indent_mode + unless ltype + ind = prompt(@context.prompt_i, ltype, indent, line_no)[/.*\z/].size + + indent * 2 - p.size + ind += 2 if continue + @context.io.prompt = p + " " * ind if ind > 0 + end + end + end + + @scanner.set_input(@context.io) do + signal_status(:IN_INPUT) do + if l = @context.io.gets + print l if @context.verbose? + else + if @context.ignore_eof? and @context.io.readable_atfer_eof? + l = "\n" + if @context.verbose? + printf "Use \"exit\" to leave %s\n", @context.ap_name + end + end + end + l + end + end + + @scanner.each_top_level_statement do |line, line_no| + signal_status(:IN_EVAL) do + begin + line.untaint + @context.evaluate(line, line_no) + output_value if @context.echo? + exc = nil + rescue Interrupt => exc + rescue SystemExit, SignalException + raise + rescue Exception => exc + end + if exc + print exc.class, ": ", exc, "\n" + if exc.backtrace[0] =~ /irb(2)?(\/.*|-.*|\.rb)?:/ && exc.class.to_s !~ /^IRB/ && + !(SyntaxError === exc) + irb_bug = true + else + irb_bug = false + end + + messages = [] + lasts = [] + levels = 0 + for m in exc.backtrace + m = @context.workspace.filter_backtrace(m) unless irb_bug + if m + if messages.size < @context.back_trace_limit + messages.push "\tfrom "+m + else + lasts.push "\tfrom "+m + if lasts.size > @context.back_trace_limit + lasts.shift + levels += 1 + end + end + end + end + print messages.join("\n"), "\n" + unless lasts.empty? + printf "... %d levels...\n", levels if levels > 0 + print lasts.join("\n") + end + print "Maybe IRB bug!!\n" if irb_bug + end + if $SAFE > 2 + abort "Error: irb does not work for $SAFE level higher than 2" + end + end + end + end + + def suspend_name(path = nil, name = nil) + @context.irb_path, back_path = path, @context.irb_path if path + @context.irb_name, back_name = name, @context.irb_name if name + begin + yield back_path, back_name + ensure + @context.irb_path = back_path if path + @context.irb_name = back_name if name + end + end + + def suspend_workspace(workspace) + @context.workspace, back_workspace = workspace, @context.workspace + begin + yield back_workspace + ensure + @context.workspace = back_workspace + end + end + + def suspend_input_method(input_method) + back_io = @context.io + @context.instance_eval{@io = input_method} + begin + yield back_io + ensure + @context.instance_eval{@io = back_io} + end + end + + def suspend_context(context) + @context, back_context = context, @context + begin + yield back_context + ensure + @context = back_context + end + end + + def signal_handle + unless @context.ignore_sigint? + print "\nabort!!\n" if @context.verbose? + exit + end + + case @signal_status + when :IN_INPUT + print "^C\n" + raise RubyLex::TerminateLineInput + when :IN_EVAL + IRB.irb_abort(self) + when :IN_LOAD + IRB.irb_abort(self, LoadAbort) + when :IN_IRB + # ignore + else + # ignore other cases as well + end + end + + def signal_status(status) + return yield if @signal_status == :IN_LOAD + + signal_status_back = @signal_status + @signal_status = status + begin + yield + ensure + @signal_status = signal_status_back + end + end + + def prompt(prompt, ltype, indent, line_no) + p = prompt.dup + p.gsub!(/%([0-9]+)?([a-zA-Z])/) do + case $2 + when "N" + @context.irb_name + when "m" + @context.main.to_s + when "M" + @context.main.inspect + when "l" + ltype + when "i" + if $1 + format("%" + $1 + "d", indent) + else + indent.to_s + end + when "n" + if $1 + format("%" + $1 + "d", line_no) + else + line_no.to_s + end + when "%" + "%" + end + end + p + end + + def output_value + if @context.inspect? + printf @context.return_format, @context.last_value.inspect + else + printf @context.return_format, @context.last_value + end + end + + def inspect + ary = [] + for iv in instance_variables + case (iv = iv.to_s) + when "@signal_status" + ary.push format("%s=:%s", iv, @signal_status.id2name) + when "@context" + ary.push format("%s=%s", iv, eval(iv).__to_s__) + else + ary.push format("%s=%s", iv, eval(iv)) + end + end + format("#<%s: %s>", self.class, ary.join(", ")) + end + end + + # Singleton method + def @CONF.inspect + IRB.version unless self[:VERSION] + + array = [] + for k, v in sort{|a1, a2| a1[0].id2name <=> a2[0].id2name} + case k + when :MAIN_CONTEXT, :__TMP__EHV__ + array.push format("CONF[:%s]=...myself...", k.id2name) + when :PROMPT + s = v.collect{ + |kk, vv| + ss = vv.collect{|kkk, vvv| ":#{kkk.id2name}=>#{vvv.inspect}"} + format(":%s=>{%s}", kk.id2name, ss.join(", ")) + } + array.push format("CONF[:%s]={%s}", k.id2name, s.join(", ")) + else + array.push format("CONF[:%s]=%s", k.id2name, v.inspect) + end + end + array.join("\n") + end +end diff --git a/lib/logger.rb b/lib/logger.rb new file mode 100644 index 0000000..c8d9cfc --- /dev/null +++ b/lib/logger.rb @@ -0,0 +1,732 @@ +# logger.rb - simple logging utility +# Copyright (C) 2000-2003, 2005 NAKAMURA, Hiroshi . + +require 'monitor' + +# = logger.rb +# +# Simple logging utility. +# +# Author:: NAKAMURA, Hiroshi +# Documentation:: NAKAMURA, Hiroshi and Gavin Sinclair +# License:: +# You can redistribute it and/or modify it under the same terms of Ruby's +# license; either the dual license version in 2003, or any later version. +# Revision:: $Id: logger.rb 20321 2008-11-22 14:52:06Z yugui $ +# +# See Logger for documentation. +# + + +# +# == Description +# +# The Logger class provides a simple but sophisticated logging utility that +# anyone can use because it's included in the Ruby 1.8.x standard library. +# +# The HOWTOs below give a code-based overview of Logger's usage, but the basic +# concept is as follows. You create a Logger object (output to a file or +# elsewhere), and use it to log messages. The messages will have varying +# levels (+info+, +error+, etc), reflecting their varying importance. The +# levels, and their meanings, are: +# +# +FATAL+:: an unhandleable error that results in a program crash +# +ERROR+:: a handleable error condition +# +WARN+:: a warning +# +INFO+:: generic (useful) information about system operation +# +DEBUG+:: low-level information for developers +# +# So each message has a level, and the Logger itself has a level, which acts +# as a filter, so you can control the amount of information emitted from the +# logger without having to remove actual messages. +# +# For instance, in a production system, you may have your logger(s) set to +# +INFO+ (or +WARN+ if you don't want the log files growing large with +# repetitive information). When you are developing it, though, you probably +# want to know about the program's internal state, and would set them to +# +DEBUG+. +# +# === Example +# +# A simple example demonstrates the above explanation: +# +# log = Logger.new(STDOUT) +# log.level = Logger::WARN +# +# log.debug("Created logger") +# log.info("Program started") +# log.warn("Nothing to do!") +# +# begin +# File.each_line(path) do |line| +# unless line =~ /^(\w+) = (.*)$/ +# log.error("Line in wrong format: #{line}") +# end +# end +# rescue => err +# log.fatal("Caught exception; exiting") +# log.fatal(err) +# end +# +# Because the Logger's level is set to +WARN+, only the warning, error, and +# fatal messages are recorded. The debug and info messages are silently +# discarded. +# +# === Features +# +# There are several interesting features that Logger provides, like +# auto-rolling of log files, setting the format of log messages, and +# specifying a program name in conjunction with the message. The next section +# shows you how to achieve these things. +# +# +# == HOWTOs +# +# === How to create a logger +# +# The options below give you various choices, in more or less increasing +# complexity. +# +# 1. Create a logger which logs messages to STDERR/STDOUT. +# +# logger = Logger.new(STDERR) +# logger = Logger.new(STDOUT) +# +# 2. Create a logger for the file which has the specified name. +# +# logger = Logger.new('logfile.log') +# +# 3. Create a logger for the specified file. +# +# file = File.open('foo.log', File::WRONLY | File::APPEND) +# # To create new (and to remove old) logfile, add File::CREAT like; +# # file = open('foo.log', File::WRONLY | File::APPEND | File::CREAT) +# logger = Logger.new(file) +# +# 4. Create a logger which ages logfile once it reaches a certain size. Leave +# 10 "old log files" and each file is about 1,024,000 bytes. +# +# logger = Logger.new('foo.log', 10, 1024000) +# +# 5. Create a logger which ages logfile daily/weekly/monthly. +# +# logger = Logger.new('foo.log', 'daily') +# logger = Logger.new('foo.log', 'weekly') +# logger = Logger.new('foo.log', 'monthly') +# +# === How to log a message +# +# Notice the different methods (+fatal+, +error+, +info+) being used to log +# messages of various levels. Other methods in this family are +warn+ and +# +debug+. +add+ is used below to log a message of an arbitrary (perhaps +# dynamic) level. +# +# 1. Message in block. +# +# logger.fatal { "Argument 'foo' not given." } +# +# 2. Message as a string. +# +# logger.error "Argument #{ @foo } mismatch." +# +# 3. With progname. +# +# logger.info('initialize') { "Initializing..." } +# +# 4. With severity. +# +# logger.add(Logger::FATAL) { 'Fatal error!' } +# +# === How to close a logger +# +# logger.close +# +# === Setting severity threshold +# +# 1. Original interface. +# +# logger.sev_threshold = Logger::WARN +# +# 2. Log4r (somewhat) compatible interface. +# +# logger.level = Logger::INFO +# +# DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN +# +# +# == Format +# +# Log messages are rendered in the output stream in a certain format by +# default. The default format and a sample are shown below: +# +# Log format: +# SeverityID, [Date Time mSec #pid] SeverityLabel -- ProgName: message +# +# Log sample: +# I, [Wed Mar 03 02:34:24 JST 1999 895701 #19074] INFO -- Main: info. +# +# You may change the date and time format in this manner: +# +# logger.datetime_format = "%Y-%m-%d %H:%M:%S" +# # e.g. "2004-01-03 00:54:26" +# +# You may change the overall format with Logger#formatter= method. +# +# logger.formatter = proc { |severity, datetime, progname, msg| +# "#{datetime}: #{msg}\n" +# } +# # e.g. "Thu Sep 22 08:51:08 GMT+9:00 2005: hello world" +# + + +class Logger + VERSION = "1.2.6" + id, name, rev = %w$Id: logger.rb 20321 2008-11-22 14:52:06Z yugui $ + if name + name = name.chomp(",v") + else + name = File.basename(__FILE__) + end + rev ||= "v#{VERSION}" + ProgName = "#{name}/#{rev}" + + class Error < RuntimeError; end + class ShiftingError < Error; end + + # Logging severity. + module Severity + DEBUG = 0 + INFO = 1 + WARN = 2 + ERROR = 3 + FATAL = 4 + UNKNOWN = 5 + end + include Severity + + # Logging severity threshold (e.g. Logger::INFO). + attr_accessor :level + + # Logging program name. + attr_accessor :progname + + # Logging date-time format (string passed to +strftime+). + def datetime_format=(datetime_format) + @default_formatter.datetime_format = datetime_format + end + + def datetime_format + @default_formatter.datetime_format + end + + # Logging formatter. formatter#call is invoked with 4 arguments; severity, + # time, progname and msg for each log. Bear in mind that time is a Time and + # msg is an Object that user passed and it could not be a String. It is + # expected to return a logdev#write-able Object. Default formatter is used + # when no formatter is set. + attr_accessor :formatter + + alias sev_threshold level + alias sev_threshold= level= + + # Returns +true+ iff the current severity level allows for the printing of + # +DEBUG+ messages. + def debug?; @level <= DEBUG; end + + # Returns +true+ iff the current severity level allows for the printing of + # +INFO+ messages. + def info?; @level <= INFO; end + + # Returns +true+ iff the current severity level allows for the printing of + # +WARN+ messages. + def warn?; @level <= WARN; end + + # Returns +true+ iff the current severity level allows for the printing of + # +ERROR+ messages. + def error?; @level <= ERROR; end + + # Returns +true+ iff the current severity level allows for the printing of + # +FATAL+ messages. + def fatal?; @level <= FATAL; end + + # + # === Synopsis + # + # Logger.new(name, shift_age = 7, shift_size = 1048576) + # Logger.new(name, shift_age = 'weekly') + # + # === Args + # + # +logdev+:: + # The log device. This is a filename (String) or IO object (typically + # +STDOUT+, +STDERR+, or an open file). + # +shift_age+:: + # Number of old log files to keep, *or* frequency of rotation (+daily+, + # +weekly+ or +monthly+). + # +shift_size+:: + # Maximum logfile size (only applies when +shift_age+ is a number). + # + # === Description + # + # Create an instance. + # + def initialize(logdev, shift_age = 0, shift_size = 1048576) + @progname = nil + @level = DEBUG + @default_formatter = Formatter.new + @formatter = nil + @logdev = nil + if logdev + @logdev = LogDevice.new(logdev, :shift_age => shift_age, + :shift_size => shift_size) + end + end + + # + # === Synopsis + # + # Logger#add(severity, message = nil, progname = nil) { ... } + # + # === Args + # + # +severity+:: + # Severity. Constants are defined in Logger namespace: +DEBUG+, +INFO+, + # +WARN+, +ERROR+, +FATAL+, or +UNKNOWN+. + # +message+:: + # The log message. A String or Exception. + # +progname+:: + # Program name string. Can be omitted. Treated as a message if no +message+ and + # +block+ are given. + # +block+:: + # Can be omitted. Called to get a message string if +message+ is nil. + # + # === Return + # + # +true+ if successful, +false+ otherwise. + # + # When the given severity is not high enough (for this particular logger), log + # no message, and return +true+. + # + # === Description + # + # Log a message if the given severity is high enough. This is the generic + # logging method. Users will be more inclined to use #debug, #info, #warn, + # #error, and #fatal. + # + # Message format: +message+ can be any object, but it has to be + # converted to a String in order to log it. Generally, +inspect+ is used + # if the given object is not a String. + # A special case is an +Exception+ object, which will be printed in detail, + # including message, class, and backtrace. See #msg2str for the + # implementation if required. + # + # === Bugs + # + # * Logfile is not locked. + # * Append open does not need to lock file. + # * But on the OS which supports multi I/O, records possibly be mixed. + # + def add(severity, message = nil, progname = nil, &block) + severity ||= UNKNOWN + if @logdev.nil? or severity < @level + return true + end + progname ||= @progname + if message.nil? + if block_given? + message = yield + else + message = progname + progname = @progname + end + end + @logdev.write( + format_message(format_severity(severity), Time.now, progname, message)) + true + end + alias log add + + # + # Dump given message to the log device without any formatting. If no log + # device exists, return +nil+. + # + def <<(msg) + unless @logdev.nil? + @logdev.write(msg) + end + end + + # + # Log a +DEBUG+ message. + # + # See #info for more information. + # + def debug(progname = nil, &block) + add(DEBUG, nil, progname, &block) + end + + # + # Log an +INFO+ message. + # + # The message can come either from the +progname+ argument or the +block+. If + # both are provided, then the +block+ is used as the message, and +progname+ + # is used as the program name. + # + # === Examples + # + # logger.info("MainApp") { "Received connection from #{ip}" } + # # ... + # logger.info "Waiting for input from user" + # # ... + # logger.info { "User typed #{input}" } + # + # You'll probably stick to the second form above, unless you want to provide a + # program name (which you can do with Logger#progname= as well). + # + # === Return + # + # See #add. + # + def info(progname = nil, &block) + add(INFO, nil, progname, &block) + end + + # + # Log a +WARN+ message. + # + # See #info for more information. + # + def warn(progname = nil, &block) + add(WARN, nil, progname, &block) + end + + # + # Log an +ERROR+ message. + # + # See #info for more information. + # + def error(progname = nil, &block) + add(ERROR, nil, progname, &block) + end + + # + # Log a +FATAL+ message. + # + # See #info for more information. + # + def fatal(progname = nil, &block) + add(FATAL, nil, progname, &block) + end + + # + # Log an +UNKNOWN+ message. This will be printed no matter what the logger + # level. + # + # See #info for more information. + # + def unknown(progname = nil, &block) + add(UNKNOWN, nil, progname, &block) + end + + # + # Close the logging device. + # + def close + @logdev.close if @logdev + end + +private + + # Severity label for logging. (max 5 char) + SEV_LABEL = %w(DEBUG INFO WARN ERROR FATAL ANY) + + def format_severity(severity) + SEV_LABEL[severity] || 'ANY' + end + + def format_message(severity, datetime, progname, msg) + (@formatter || @default_formatter).call(severity, datetime, progname, msg) + end + + + class Formatter + Format = "%s, [%s#%d] %5s -- %s: %s\n" + + attr_accessor :datetime_format + + def initialize + @datetime_format = nil + end + + def call(severity, time, progname, msg) + Format % [severity[0..0], format_datetime(time), $$, severity, progname, + msg2str(msg)] + end + + private + + def format_datetime(time) + if @datetime_format.nil? + time.strftime("%Y-%m-%dT%H:%M:%S.") << "%06d " % time.usec + else + time.strftime(@datetime_format) + end + end + + def msg2str(msg) + case msg + when ::String + msg + when ::Exception + "#{ msg.message } (#{ msg.class })\n" << + (msg.backtrace || []).join("\n") + else + msg.inspect + end + end + end + + + class LogDevice + attr_reader :dev + attr_reader :filename + + class LogDeviceMutex + include MonitorMixin + end + + def initialize(log = nil, opt = {}) + @dev = @filename = @shift_age = @shift_size = nil + @mutex = LogDeviceMutex.new + if log.respond_to?(:write) and log.respond_to?(:close) + @dev = log + else + @dev = open_logfile(log) + @dev.sync = true + @filename = log + @shift_age = opt[:shift_age] || 7 + @shift_size = opt[:shift_size] || 1048576 + end + end + + def write(message) + @mutex.synchronize do + if @shift_age and @dev.respond_to?(:stat) + begin + check_shift_log + rescue + raise Logger::ShiftingError.new("Shifting failed. #{$!}") + end + end + @dev.write(message) + end + end + + def close + @mutex.synchronize do + @dev.close + end + end + + private + + def open_logfile(filename) + if (FileTest.exist?(filename)) + open(filename, (File::WRONLY | File::APPEND)) + else + create_logfile(filename) + end + end + + def create_logfile(filename) + logdev = open(filename, (File::WRONLY | File::APPEND | File::CREAT)) + logdev.sync = true + add_log_header(logdev) + logdev + end + + def add_log_header(file) + file.write( + "# Logfile created on %s by %s\n" % [Time.now.to_s, Logger::ProgName] + ) + end + + SiD = 24 * 60 * 60 + + def check_shift_log + if @shift_age.is_a?(Integer) + # Note: always returns false if '0'. + if @filename && (@shift_age > 0) && (@dev.stat.size > @shift_size) + shift_log_age + end + else + now = Time.now + if @dev.stat.mtime <= previous_period_end(now) + shift_log_period(now) + end + end + end + + def shift_log_age + (@shift_age-3).downto(0) do |i| + if FileTest.exist?("#{@filename}.#{i}") + File.rename("#{@filename}.#{i}", "#{@filename}.#{i+1}") + end + end + @dev.close + File.rename("#{@filename}", "#{@filename}.0") + @dev = create_logfile(@filename) + return true + end + + def shift_log_period(now) + postfix = previous_period_end(now).strftime("%Y%m%d") # YYYYMMDD + age_file = "#{@filename}.#{postfix}" + if FileTest.exist?(age_file) + raise RuntimeError.new("'#{ age_file }' already exists.") + end + @dev.close + File.rename("#{@filename}", age_file) + @dev = create_logfile(@filename) + return true + end + + def previous_period_end(now) + case @shift_age + when /^daily$/ + eod(now - 1 * SiD) + when /^weekly$/ + eod(now - ((now.wday + 1) * SiD)) + when /^monthly$/ + eod(now - now.mday * SiD) + else + now + end + end + + def eod(t) + Time.mktime(t.year, t.month, t.mday, 23, 59, 59) + end + end + + + # + # == Description + # + # Application -- Add logging support to your application. + # + # == Usage + # + # 1. Define your application class as a sub-class of this class. + # 2. Override 'run' method in your class to do many things. + # 3. Instantiate it and invoke 'start'. + # + # == Example + # + # class FooApp < Application + # def initialize(foo_app, application_specific, arguments) + # super('FooApp') # Name of the application. + # end + # + # def run + # ... + # log(WARN, 'warning', 'my_method1') + # ... + # @log.error('my_method2') { 'Error!' } + # ... + # end + # end + # + # status = FooApp.new(....).start + # + class Application + include Logger::Severity + + # Name of the application given at initialize. + attr_reader :appname + + # + # == Synopsis + # + # Application.new(appname = '') + # + # == Args + # + # +appname+:: Name of the application. + # + # == Description + # + # Create an instance. Log device is +STDERR+ by default. This can be + # changed with #set_log. + # + def initialize(appname = nil) + @appname = appname + @log = Logger.new(STDERR) + @log.progname = @appname + @level = @log.level + end + + # + # Start the application. Return the status code. + # + def start + status = -1 + begin + log(INFO, "Start of #{ @appname }.") + status = run + rescue + log(FATAL, "Detected an exception. Stopping ... #{$!} (#{$!.class})\n" << $@.join("\n")) + ensure + log(INFO, "End of #{ @appname }. (status: #{ status.to_s })") + end + status + end + + # Logger for this application. See the class Logger for an explanation. + def logger + @log + end + + # + # Sets the logger for this application. See the class Logger for an explanation. + # + def logger=(logger) + @log = logger + end + + # + # Sets the log device for this application. See Logger.new for an explanation + # of the arguments. + # + def set_log(logdev, shift_age = 0, shift_size = 1024000) + @log = Logger.new(logdev, shift_age, shift_size) + @log.progname = @appname + @log.level = @level + end + + def log=(logdev) + set_log(logdev) + end + + # + # Set the logging threshold, just like Logger#level=. + # + def level=(level) + @level = level + @log.level = @level + end + + # + # See Logger#add. This application's +appname+ is used. + # + def log(severity, message = nil, &block) + @log.add(severity, message, @appname, &block) if @log + end + + private + + def run + raise RuntimeError.new('Method run must be defined in the derived class.') + end + end +end diff --git a/lib/mathn.rb b/lib/mathn.rb new file mode 100644 index 0000000..780ecd6 --- /dev/null +++ b/lib/mathn.rb @@ -0,0 +1,206 @@ +# +# mathn.rb - +# $Release Version: 0.5 $ +# $Revision: 1.1.1.1.4.1 $ +# by Keiju ISHITSUKA(SHL Japan Inc.) +# +# -- +# +# +# + +require "cmath.rb" +require "matrix.rb" +require "prime.rb" + +require "mathn/rational" +require "mathn/complex" + +unless defined?(Math.exp!) + Object.instance_eval{remove_const :Math} + Math = CMath +end + +class Fixnum + remove_method :/ + alias / quo + + alias power! ** unless method_defined? :power! + + def ** (other) + if self < 0 && other.round != other + Complex(self, 0.0) ** other + else + power!(other) + end + end + +end + +class Bignum + remove_method :/ + alias / quo + + alias power! ** unless method_defined? :power! + + def ** (other) + if self < 0 && other.round != other + Complex(self, 0.0) ** other + else + power!(other) + end + end + +end + +class Rational + def ** (other) + if other.kind_of?(Rational) + other2 = other + if self < 0 + return Complex(self, 0.0) ** other + elsif other == 0 + return Rational(1,1) + elsif self == 0 + return Rational(0,1) + elsif self == 1 + return Rational(1,1) + end + + npd = numerator.prime_division + dpd = denominator.prime_division + if other < 0 + other = -other + npd, dpd = dpd, npd + end + + for elm in npd + elm[1] = elm[1] * other + if !elm[1].kind_of?(Integer) and elm[1].denominator != 1 + return Float(self) ** other2 + end + elm[1] = elm[1].to_i + end + + for elm in dpd + elm[1] = elm[1] * other + if !elm[1].kind_of?(Integer) and elm[1].denominator != 1 + return Float(self) ** other2 + end + elm[1] = elm[1].to_i + end + + num = Integer.from_prime_division(npd) + den = Integer.from_prime_division(dpd) + + Rational(num,den) + + elsif other.kind_of?(Integer) + if other > 0 + num = numerator ** other + den = denominator ** other + elsif other < 0 + num = denominator ** -other + den = numerator ** -other + elsif other == 0 + num = 1 + den = 1 + end + Rational(num, den) + elsif other.kind_of?(Float) + Float(self) ** other + else + x , y = other.coerce(self) + x ** y + end + end +end + +module Math + remove_method(:sqrt) + def sqrt(a) + if a.kind_of?(Complex) + abs = sqrt(a.real*a.real + a.imag*a.imag) +# if not abs.kind_of?(Rational) +# return a**Rational(1,2) +# end + x = sqrt((a.real + abs)/Rational(2)) + y = sqrt((-a.real + abs)/Rational(2)) +# if !(x.kind_of?(Rational) and y.kind_of?(Rational)) +# return a**Rational(1,2) +# end + if a.imag >= 0 + Complex(x, y) + else + Complex(x, -y) + end + elsif a.respond_to?(:nan?) and a.nan? + a + elsif a >= 0 + rsqrt(a) + else + Complex(0,rsqrt(-a)) + end + end + + def rsqrt(a) + if a.kind_of?(Float) + sqrt!(a) + elsif a.kind_of?(Rational) + rsqrt(a.numerator)/rsqrt(a.denominator) + else + src = a + max = 2 ** 32 + byte_a = [src & 0xffffffff] + # ruby's bug + while (src >= max) and (src >>= 32) + byte_a.unshift src & 0xffffffff + end + + answer = 0 + main = 0 + side = 0 + for elm in byte_a + main = (main << 32) + elm + side <<= 16 + if answer != 0 + if main * 4 < side * side + applo = main.div(side) + else + applo = ((sqrt!(side * side + 4 * main) - side)/2.0).to_i + 1 + end + else + applo = sqrt!(main).to_i + 1 + end + + while (x = (side + applo) * applo) > main + applo -= 1 + end + main -= x + answer = (answer << 16) + applo + side += applo * 2 + end + if main == 0 + answer + else + sqrt!(a) + end + end + end + + module_function :sqrt + module_function :rsqrt +end + +class Float + alias power! ** + + def ** (other) + if self < 0 && other.round != other + Complex(self, 0.0) ** other + else + power!(other) + end + end + +end diff --git a/lib/matrix.rb b/lib/matrix.rb new file mode 100644 index 0000000..04616d0 --- /dev/null +++ b/lib/matrix.rb @@ -0,0 +1,1381 @@ +#!/usr/local/bin/ruby +#-- +# matrix.rb - +# $Release Version: 1.0$ +# $Revision: 1.13 $ +# Original Version from Smalltalk-80 version +# on July 23, 1985 at 8:37:17 am +# by Keiju ISHITSUKA +#++ +# +# = matrix.rb +# +# An implementation of Matrix and Vector classes. +# +# Author:: Keiju ISHITSUKA +# Documentation:: Gavin Sinclair (sourced from Ruby in a Nutshell (Matsumoto, O'Reilly)) +# +# See classes Matrix and Vector for documentation. +# + +require "e2mmap.rb" + +module ExceptionForMatrix # :nodoc: + extend Exception2MessageMapper + def_e2message(TypeError, "wrong argument type %s (expected %s)") + def_e2message(ArgumentError, "Wrong # of arguments(%d for %d)") + + def_exception("ErrDimensionMismatch", "\#{self.name} dimension mismatch") + def_exception("ErrNotRegular", "Not Regular Matrix") + def_exception("ErrOperationNotDefined", "This operation(%s) can\\'t defined") +end + +# +# The +Matrix+ class represents a mathematical matrix, and provides methods for creating +# special-case matrices (zero, identity, diagonal, singular, vector), operating on them +# arithmetically and algebraically, and determining their mathematical properties (trace, rank, +# inverse, determinant). +# +# Note that although matrices should theoretically be rectangular, this is not +# enforced by the class. +# +# Also note that the determinant of integer matrices may be incorrectly calculated unless you +# also require 'mathn'. This may be fixed in the future. +# +# == Method Catalogue +# +# To create a matrix: +# * Matrix[*rows] +# * Matrix.[](*rows) +# * Matrix.rows(rows, copy = true) +# * Matrix.columns(columns) +# * Matrix.diagonal(*values) +# * Matrix.scalar(n, value) +# * Matrix.scalar(n, value) +# * Matrix.identity(n) +# * Matrix.unit(n) +# * Matrix.I(n) +# * Matrix.zero(n) +# * Matrix.row_vector(row) +# * Matrix.column_vector(column) +# +# To access Matrix elements/columns/rows/submatrices/properties: +# * [](i, j) +# * #row_size +# * #column_size +# * #row(i) +# * #column(j) +# * #collect +# * #map +# * #minor(*param) +# +# Properties of a matrix: +# * #regular? +# * #singular? +# * #square? +# +# Matrix arithmetic: +# * *(m) +# * +(m) +# * -(m) +# * #/(m) +# * #inverse +# * #inv +# * ** +# +# Matrix functions: +# * #determinant +# * #det +# * #rank +# * #trace +# * #tr +# * #transpose +# * #t +# +# Conversion to other data types: +# * #coerce(other) +# * #row_vectors +# * #column_vectors +# * #to_a +# +# String representations: +# * #to_s +# * #inspect +# +class Matrix + @RCS_ID='-$Id: matrix.rb,v 1.13 2001/12/09 14:22:23 keiju Exp keiju $-' + +# extend Exception2MessageMapper + include ExceptionForMatrix + + # instance creations + private_class_method :new + + # + # Creates a matrix where each argument is a row. + # Matrix[ [25, 93], [-1, 66] ] + # => 25 93 + # -1 66 + # + def Matrix.[](*rows) + new(:init_rows, rows, false) + end + + # + # Creates a matrix where +rows+ is an array of arrays, each of which is a row + # to the matrix. If the optional argument +copy+ is false, use the given + # arrays as the internal structure of the matrix without copying. + # Matrix.rows([[25, 93], [-1, 66]]) + # => 25 93 + # -1 66 + def Matrix.rows(rows, copy = true) + new(:init_rows, rows, copy) + end + + # + # Creates a matrix using +columns+ as an array of column vectors. + # Matrix.columns([[25, 93], [-1, 66]]) + # => 25 -1 + # 93 66 + # + # + def Matrix.columns(columns) + rows = (0 .. columns[0].size - 1).collect {|i| + (0 .. columns.size - 1).collect {|j| + columns[j][i] + } + } + Matrix.rows(rows, false) + end + + # + # Creates a matrix where the diagonal elements are composed of +values+. + # Matrix.diagonal(9, 5, -3) + # => 9 0 0 + # 0 5 0 + # 0 0 -3 + # + def Matrix.diagonal(*values) + size = values.size + rows = (0 .. size - 1).collect {|j| + row = Array.new(size).fill(0, 0, size) + row[j] = values[j] + row + } + rows(rows, false) + end + + # + # Creates an +n+ by +n+ diagonal matrix where each diagonal element is + # +value+. + # Matrix.scalar(2, 5) + # => 5 0 + # 0 5 + # + def Matrix.scalar(n, value) + Matrix.diagonal(*Array.new(n).fill(value, 0, n)) + end + + # + # Creates an +n+ by +n+ identity matrix. + # Matrix.identity(2) + # => 1 0 + # 0 1 + # + def Matrix.identity(n) + Matrix.scalar(n, 1) + end + class << Matrix + alias unit identity + alias I identity + end + + # + # Creates an +n+ by +n+ zero matrix. + # Matrix.zero(2) + # => 0 0 + # 0 0 + # + def Matrix.zero(n) + Matrix.scalar(n, 0) + end + + # + # Creates a single-row matrix where the values of that row are as given in + # +row+. + # Matrix.row_vector([4,5,6]) + # => 4 5 6 + # + def Matrix.row_vector(row) + case row + when Vector + Matrix.rows([row.to_a], false) + when Array + Matrix.rows([row.dup], false) + else + Matrix.rows([[row]], false) + end + end + + # + # Creates a single-column matrix where the values of that column are as given + # in +column+. + # Matrix.column_vector([4,5,6]) + # => 4 + # 5 + # 6 + # + def Matrix.column_vector(column) + case column + when Vector + Matrix.columns([column.to_a]) + when Array + Matrix.columns([column]) + else + Matrix.columns([[column]]) + end + end + + # + # This method is used by the other methods that create matrices, and is of no + # use to general users. + # + def initialize(init_method, *argv) + self.send(init_method, *argv) + end + + def init_rows(rows, copy) + if copy + @rows = rows.collect{|row| row.dup} + else + @rows = rows + end + self + end + private :init_rows + + # + # Returns element (+i+,+j+) of the matrix. That is: row +i+, column +j+. + # + def [](i, j) + @rows[i][j] + end + alias element [] + alias component [] + + def []=(i, j, v) + @rows[i][j] = v + end + alias set_element []= + alias set_component []= + private :[]=, :set_element, :set_component + + # + # Returns the number of rows. + # + def row_size + @rows.size + end + + # + # Returns the number of columns. Note that it is possible to construct a + # matrix with uneven columns (e.g. Matrix[ [1,2,3], [4,5] ]), but this is + # mathematically unsound. This method uses the first row to determine the + # result. + # + def column_size + @rows[0].size + end + + # + # Returns row vector number +i+ of the matrix as a Vector (starting at 0 like + # an array). When a block is given, the elements of that vector are iterated. + # + def row(i) # :yield: e + if block_given? + for e in @rows[i] + yield e + end + else + Vector.elements(@rows[i]) + end + end + + # + # Returns column vector number +j+ of the matrix as a Vector (starting at 0 + # like an array). When a block is given, the elements of that vector are + # iterated. + # + def column(j) # :yield: e + if block_given? + 0.upto(row_size - 1) do |i| + yield @rows[i][j] + end + else + col = (0 .. row_size - 1).collect {|i| + @rows[i][j] + } + Vector.elements(col, false) + end + end + + # + # Returns a matrix that is the result of iteration of the given block over all + # elements of the matrix. + # Matrix[ [1,2], [3,4] ].collect { |e| e**2 } + # => 1 4 + # 9 16 + # + def collect # :yield: e + rows = @rows.collect{|row| row.collect{|e| yield e}} + Matrix.rows(rows, false) + end + alias map collect + + # + # Returns a section of the matrix. The parameters are either: + # * start_row, nrows, start_col, ncols; OR + # * col_range, row_range + # + # Matrix.diagonal(9, 5, -3).minor(0..1, 0..2) + # => 9 0 0 + # 0 5 0 + # + def minor(*param) + case param.size + when 2 + from_row = param[0].first + size_row = param[0].end - from_row + size_row += 1 unless param[0].exclude_end? + from_col = param[1].first + size_col = param[1].end - from_col + size_col += 1 unless param[1].exclude_end? + when 4 + from_row = param[0] + size_row = param[1] + from_col = param[2] + size_col = param[3] + else + Matrix.Raise ArgumentError, param.inspect + end + + rows = @rows[from_row, size_row].collect{|row| + row[from_col, size_col] + } + Matrix.rows(rows, false) + end + + #-- + # TESTING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Returns +true+ if this is a regular matrix. + # + def regular? + square? and rank == column_size + end + + # + # Returns +true+ is this is a singular (i.e. non-regular) matrix. + # + def singular? + not regular? + end + + # + # Returns +true+ is this is a square matrix. See note in column_size about this + # being unreliable, though. + # + def square? + column_size == row_size + end + + #-- + # OBJECT METHODS -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Returns +true+ if and only if the two matrices contain equal elements. + # + def ==(other) + return false unless Matrix === other + + other.compare_by_row_vectors(@rows) + end + def eql?(other) + return false unless Matrix === other + + other.compare_by_row_vectors(@rows, :eql?) + end + + # + # Not really intended for general consumption. + # + def compare_by_row_vectors(rows, comparison = :==) + return false unless @rows.size == rows.size + + 0.upto(@rows.size - 1) do |i| + return false unless @rows[i].send(comparison, rows[i]) + end + true + end + + # + # Returns a clone of the matrix, so that the contents of each do not reference + # identical objects. + # + def clone + Matrix.rows(@rows) + end + + # + # Returns a hash-code for the matrix. + # + def hash + value = 0 + for row in @rows + for e in row + value ^= e.hash + end + end + return value + end + + #-- + # ARITHMETIC -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Matrix multiplication. + # Matrix[[2,4], [6,8]] * Matrix.identity(2) + # => 2 4 + # 6 8 + # + def *(m) # m is matrix or vector or number + case(m) + when Numeric + rows = @rows.collect {|row| + row.collect {|e| + e * m + } + } + return Matrix.rows(rows, false) + when Vector + m = Matrix.column_vector(m) + r = self * m + return r.column(0) + when Matrix + Matrix.Raise ErrDimensionMismatch if column_size != m.row_size + + rows = (0 .. row_size - 1).collect {|i| + (0 .. m.column_size - 1).collect {|j| + vij = 0 + 0.upto(column_size - 1) do |k| + vij += self[i, k] * m[k, j] + end + vij + } + } + return Matrix.rows(rows, false) + else + x, y = m.coerce(self) + return x * y + end + end + + # + # Matrix addition. + # Matrix.scalar(2,5) + Matrix[[1,0], [-4,7]] + # => 6 0 + # -4 12 + # + def +(m) + case m + when Numeric + Matrix.Raise ErrOperationNotDefined, "+" + when Vector + m = Matrix.column_vector(m) + when Matrix + else + x, y = m.coerce(self) + return x + y + end + + Matrix.Raise ErrDimensionMismatch unless row_size == m.row_size and column_size == m.column_size + + rows = (0 .. row_size - 1).collect {|i| + (0 .. column_size - 1).collect {|j| + self[i, j] + m[i, j] + } + } + Matrix.rows(rows, false) + end + + # + # Matrix subtraction. + # Matrix[[1,5], [4,2]] - Matrix[[9,3], [-4,1]] + # => -8 2 + # 8 1 + # + def -(m) + case m + when Numeric + Matrix.Raise ErrOperationNotDefined, "-" + when Vector + m = Matrix.column_vector(m) + when Matrix + else + x, y = m.coerce(self) + return x - y + end + + Matrix.Raise ErrDimensionMismatch unless row_size == m.row_size and column_size == m.column_size + + rows = (0 .. row_size - 1).collect {|i| + (0 .. column_size - 1).collect {|j| + self[i, j] - m[i, j] + } + } + Matrix.rows(rows, false) + end + + # + # Matrix division (multiplication by the inverse). + # Matrix[[7,6], [3,9]] / Matrix[[2,9], [3,1]] + # => -7 1 + # -3 -6 + # + def /(other) + case other + when Numeric + rows = @rows.collect {|row| + row.collect {|e| + e / other + } + } + return Matrix.rows(rows, false) + when Matrix + return self * other.inverse + else + x, y = other.coerce(self) + return x / y + end + end + + # + # Returns the inverse of the matrix. + # Matrix[[1, 2], [2, 1]].inverse + # => -1 1 + # 0 -1 + # + def inverse + Matrix.Raise ErrDimensionMismatch unless square? + Matrix.I(row_size).inverse_from(self) + end + alias inv inverse + + # + # Not for public consumption? + # + def inverse_from(src) + size = row_size - 1 + a = src.to_a + + for k in 0..size + i = k + akk = a[k][k].abs + ((k+1)..size).each do |j| + v = a[j][k].abs + if v > akk + i = j + akk = v + end + end + Matrix.Raise ErrNotRegular if akk == 0 + if i != k + a[i], a[k] = a[k], a[i] + @rows[i], @rows[k] = @rows[k], @rows[i] + end + akk = a[k][k] + + for i in 0 .. size + next if i == k + q = a[i][k].quo(akk) + a[i][k] = 0 + + for j in (k + 1).. size + a[i][j] -= a[k][j] * q + end + for j in 0..size + @rows[i][j] -= @rows[k][j] * q + end + end + + for j in (k + 1).. size + a[k][j] = a[k][j].quo(akk) + end + for j in 0..size + @rows[k][j] = @rows[k][j].quo(akk) + end + end + self + end + #alias reciprocal inverse + + # + # Matrix exponentiation. Defined for integer powers only. Equivalent to + # multiplying the matrix by itself N times. + # Matrix[[7,6], [3,9]] ** 2 + # => 67 96 + # 48 99 + # + def ** (other) + if other.kind_of?(Integer) + x = self + if other <= 0 + x = self.inverse + return Matrix.identity(self.column_size) if other == 0 + other = -other + end + z = x + n = other - 1 + while n != 0 + while (div, mod = n.divmod(2) + mod == 0) + x = x * x + n = div + end + z *= x + n -= 1 + end + z + elsif other.kind_of?(Float) || defined?(Rational) && other.kind_of?(Rational) + Matrix.Raise ErrOperationNotDefined, "**" + else + Matrix.Raise ErrOperationNotDefined, "**" + end + end + + #-- + # MATRIX FUNCTIONS -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Returns the determinant of the matrix. If the matrix is not square, the + # result is 0. This method's algorism is Gaussian elimination method + # and using Numeric#quo(). Beware that using Float values, with their + # usual lack of precision, can affect the value returned by this method. Use + # Rational values or Matrix#det_e instead if this is important to you. + # + # Matrix[[7,6], [3,9]].determinant + # => 63.0 + # + def determinant + return 0 unless square? + + size = row_size - 1 + a = to_a + + det = 1 + k = 0 + loop do + if (akk = a[k][k]) == 0 + i = k + loop do + return 0 if (i += 1) > size + break unless a[i][k] == 0 + end + a[i], a[k] = a[k], a[i] + akk = a[k][k] + det *= -1 + end + + for i in k + 1 .. size + q = a[i][k].quo(akk) + (k + 1).upto(size) do |j| + a[i][j] -= a[k][j] * q + end + end + det *= akk + break unless (k += 1) <= size + end + det + end + alias det determinant + + # + # Returns the determinant of the matrix. If the matrix is not square, the + # result is 0. This method's algorism is Gaussian elimination method. + # This method uses Euclidean algorism. If all elements are integer, + # really exact value. But, if an element is a float, can't return + # exact value. + # + # Matrix[[7,6], [3,9]].determinant + # => 63 + # + def determinant_e + return 0 unless square? + + size = row_size - 1 + a = to_a + + det = 1 + k = 0 + loop do + if a[k][k].zero? + i = k + loop do + return 0 if (i += 1) > size + break unless a[i][k].zero? + end + a[i], a[k] = a[k], a[i] + det *= -1 + end + + for i in (k + 1)..size + q = a[i][k].quo(a[k][k]) + k.upto(size) do |j| + a[i][j] -= a[k][j] * q + end + unless a[i][k].zero? + a[i], a[k] = a[k], a[i] + det *= -1 + redo + end + end + det *= a[k][k] + break unless (k += 1) <= size + end + det + end + alias det_e determinant_e + + # + # Returns the rank of the matrix. Beware that using Float values, + # probably return faild value. Use Rational values or Matrix#rank_e + # for getting exact result. + # + # Matrix[[7,6], [3,9]].rank + # => 2 + # + def rank + if column_size > row_size + a = transpose.to_a + a_column_size = row_size + a_row_size = column_size + else + a = to_a + a_column_size = column_size + a_row_size = row_size + end + rank = 0 + k = 0 + begin + if (akk = a[k][k]) == 0 + i = k + exists = true + loop do + if (i += 1) > a_row_size - 1 + exists = false + break + end + break unless a[i][k] == 0 + end + if exists + a[i], a[k] = a[k], a[i] + akk = a[k][k] + else + i = k + exists = true + loop do + if (i += 1) > a_column_size - 1 + exists = false + break + end + break unless a[k][i] == 0 + end + if exists + k.upto(a_row_size - 1) do |j| + a[j][k], a[j][i] = a[j][i], a[j][k] + end + akk = a[k][k] + else + next + end + end + end + + for i in (k + 1)..(a_row_size - 1) + q = a[i][k].quo(akk) + for j in (k + 1)..(a_column_size - 1) + a[i][j] -= a[k][j] * q + end + end + rank += 1 + end while (k += 1) <= a_column_size - 1 + return rank + end + + # + # Returns the rank of the matrix. This method uses Euclidean + # algorism. If all elements are integer, really exact value. But, if + # an element is a float, can't return exact value. + # + # Matrix[[7,6], [3,9]].rank + # => 2 + # + def rank_e + a = to_a + a_column_size = column_size + a_row_size = row_size + pi = 0 + (0 ... a_column_size).each do |j| + if i = (pi ... a_row_size).find{|i0| !a[i0][j].zero?} + if i != pi + a[pi], a[i] = a[i], a[pi] + end + (pi + 1 ... a_row_size).each do |k| + q = a[k][j].quo(a[pi][j]) + (pi ... a_column_size).each do |j0| + a[k][j0] -= q * a[pi][j0] + end + if k > pi && !a[k][j].zero? + a[k], a[pi] = a[pi], a[k] + redo + end + end + pi += 1 + end + end + pi + end + + + # + # Returns the trace (sum of diagonal elements) of the matrix. + # Matrix[[7,6], [3,9]].trace + # => 16 + # + def trace + tr = 0 + 0.upto(column_size - 1) do |i| + tr += @rows[i][i] + end + tr + end + alias tr trace + + # + # Returns the transpose of the matrix. + # Matrix[[1,2], [3,4], [5,6]] + # => 1 2 + # 3 4 + # 5 6 + # Matrix[[1,2], [3,4], [5,6]].transpose + # => 1 3 5 + # 2 4 6 + # + def transpose + Matrix.columns(@rows) + end + alias t transpose + + #-- + # CONVERTING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # FIXME: describe #coerce. + # + def coerce(other) + case other + when Numeric + return Scalar.new(other), self + else + raise TypeError, "#{self.class} can't be coerced into #{other.class}" + end + end + + # + # Returns an array of the row vectors of the matrix. See Vector. + # + def row_vectors + rows = (0 .. row_size - 1).collect {|i| + row(i) + } + rows + end + + # + # Returns an array of the column vectors of the matrix. See Vector. + # + def column_vectors + columns = (0 .. column_size - 1).collect {|i| + column(i) + } + columns + end + + # + # Returns an array of arrays that describe the rows of the matrix. + # + def to_a + @rows.collect{|row| row.collect{|e| e}} + end + + def elements_to_f + collect{|e| e.to_f} + end + + def elements_to_i + collect{|e| e.to_i} + end + + def elements_to_r + collect{|e| e.to_r} + end + + #-- + # PRINTING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Overrides Object#to_s + # + def to_s + "Matrix[" + @rows.collect{|row| + "[" + row.collect{|e| e.to_s}.join(", ") + "]" + }.join(", ")+"]" + end + + # + # Overrides Object#inspect + # + def inspect + "Matrix"+@rows.inspect + end + + # Private CLASS + + class Scalar < Numeric # :nodoc: + include ExceptionForMatrix + + def initialize(value) + @value = value + end + + # ARITHMETIC + def +(other) + case other + when Numeric + Scalar.new(@value + other) + when Vector, Matrix + Scalar.Raise WrongArgType, other.class, "Numeric or Scalar" + when Scalar + Scalar.new(@value + other.value) + else + x, y = other.coerce(self) + x + y + end + end + + def -(other) + case other + when Numeric + Scalar.new(@value - other) + when Vector, Matrix + Scalar.Raise WrongArgType, other.class, "Numeric or Scalar" + when Scalar + Scalar.new(@value - other.value) + else + x, y = other.coerce(self) + x - y + end + end + + def *(other) + case other + when Numeric + Scalar.new(@value * other) + when Vector, Matrix + other.collect{|e| @value * e} + else + x, y = other.coerce(self) + x * y + end + end + + def / (other) + case other + when Numeric + Scalar.new(@value / other) + when Vector + Scalar.Raise WrongArgType, other.class, "Numeric or Scalar or Matrix" + when Matrix + self * other.inverse + else + x, y = other.coerce(self) + x.quo(y) + end + end + + def ** (other) + case other + when Numeric + Scalar.new(@value ** other) + when Vector + Scalar.Raise WrongArgType, other.class, "Numeric or Scalar or Matrix" + when Matrix + other.powered_by(self) + else + x, y = other.coerce(self) + x ** y + end + end + end +end + + +# +# The +Vector+ class represents a mathematical vector, which is useful in its own right, and +# also constitutes a row or column of a Matrix. +# +# == Method Catalogue +# +# To create a Vector: +# * Vector.[](*array) +# * Vector.elements(array, copy = true) +# +# To access elements: +# * [](i) +# +# To enumerate the elements: +# * #each2(v) +# * #collect2(v) +# +# Vector arithmetic: +# * *(x) "is matrix or number" +# * +(v) +# * -(v) +# +# Vector functions: +# * #inner_product(v) +# * #collect +# * #map +# * #map2(v) +# * #r +# * #size +# +# Conversion to other data types: +# * #covector +# * #to_a +# * #coerce(other) +# +# String representations: +# * #to_s +# * #inspect +# +class Vector + include ExceptionForMatrix + + #INSTANCE CREATION + + private_class_method :new + + # + # Creates a Vector from a list of elements. + # Vector[7, 4, ...] + # + def Vector.[](*array) + new(:init_elements, array, copy = false) + end + + # + # Creates a vector from an Array. The optional second argument specifies + # whether the array itself or a copy is used internally. + # + def Vector.elements(array, copy = true) + new(:init_elements, array, copy) + end + + # + # For internal use. + # + def initialize(method, array, copy) + self.send(method, array, copy) + end + + # + # For internal use. + # + def init_elements(array, copy) + if copy + @elements = array.dup + else + @elements = array + end + end + + # ACCESSING + + # + # Returns element number +i+ (starting at zero) of the vector. + # + def [](i) + @elements[i] + end + alias element [] + alias component [] + + def []=(i, v) + @elements[i]= v + end + alias set_element []= + alias set_component []= + private :[]=, :set_element, :set_component + + # + # Returns the number of elements in the vector. + # + def size + @elements.size + end + + #-- + # ENUMERATIONS -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Iterate over the elements of this vector and +v+ in conjunction. + # + def each2(v) # :yield: e1, e2 + Vector.Raise ErrDimensionMismatch if size != v.size + 0.upto(size - 1) do |i| + yield @elements[i], v[i] + end + end + + # + # Collects (as in Enumerable#collect) over the elements of this vector and +v+ + # in conjunction. + # + def collect2(v) # :yield: e1, e2 + Vector.Raise ErrDimensionMismatch if size != v.size + (0 .. size - 1).collect do |i| + yield @elements[i], v[i] + end + end + + #-- + # COMPARING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Returns +true+ iff the two vectors have the same elements in the same order. + # + def ==(other) + return false unless Vector === other + + other.compare_by(@elements) + end + def eql?(other) + return false unless Vector === other + + other.compare_by(@elements, :eql?) + end + + # + # For internal use. + # + def compare_by(elements, comparison = :==) + @elements.send(comparison, elements) + end + + # + # Return a copy of the vector. + # + def clone + Vector.elements(@elements) + end + + # + # Return a hash-code for the vector. + # + def hash + @elements.hash + end + + #-- + # ARITHMETIC -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Multiplies the vector by +x+, where +x+ is a number or another vector. + # + def *(x) + case x + when Numeric + els = @elements.collect{|e| e * x} + Vector.elements(els, false) + when Matrix + Matrix.column_vector(self) * x + else + s, x = x.coerce(self) + s * x + end + end + + # + # Vector addition. + # + def +(v) + case v + when Vector + Vector.Raise ErrDimensionMismatch if size != v.size + els = collect2(v) {|v1, v2| + v1 + v2 + } + Vector.elements(els, false) + when Matrix + Matrix.column_vector(self) + v + else + s, x = v.coerce(self) + s + x + end + end + + # + # Vector subtraction. + # + def -(v) + case v + when Vector + Vector.Raise ErrDimensionMismatch if size != v.size + els = collect2(v) {|v1, v2| + v1 - v2 + } + Vector.elements(els, false) + when Matrix + Matrix.column_vector(self) - v + else + s, x = v.coerce(self) + s - x + end + end + + #-- + # VECTOR FUNCTIONS -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Returns the inner product of this vector with the other. + # Vector[4,7].inner_product Vector[10,1] => 47 + # + def inner_product(v) + Vector.Raise ErrDimensionMismatch if size != v.size + + p = 0 + each2(v) {|v1, v2| + p += v1 * v2 + } + p + end + + # + # Like Array#collect. + # + def collect # :yield: e + els = @elements.collect {|v| + yield v + } + Vector.elements(els, false) + end + alias map collect + + # + # Like Vector#collect2, but returns a Vector instead of an Array. + # + def map2(v) # :yield: e1, e2 + els = collect2(v) {|v1, v2| + yield v1, v2 + } + Vector.elements(els, false) + end + + # + # Returns the modulus (Pythagorean distance) of the vector. + # Vector[5,8,2].r => 9.643650761 + # + def r + v = 0 + for e in @elements + v += e*e + end + return Math.sqrt(v) + end + + #-- + # CONVERTING + #++ + + # + # Creates a single-row matrix from this vector. + # + def covector + Matrix.row_vector(self) + end + + # + # Returns the elements of the vector in an array. + # + def to_a + @elements.dup + end + + def elements_to_f + collect{|e| e.to_f} + end + + def elements_to_i + collect{|e| e.to_i} + end + + def elements_to_r + collect{|e| e.to_r} + end + + # + # FIXME: describe Vector#coerce. + # + def coerce(other) + case other + when Numeric + return Matrix::Scalar.new(other), self + else + raise TypeError, "#{self.class} can't be coerced into #{other.class}" + end + end + + #-- + # PRINTING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + #++ + + # + # Overrides Object#to_s + # + def to_s + "Vector[" + @elements.join(", ") + "]" + end + + # + # Overrides Object#inspect + # + def inspect + str = "Vector"+@elements.inspect + end +end + + +# Documentation comments: +# - Matrix#coerce and Vector#coerce need to be documented diff --git a/lib/minitest/autorun.rb b/lib/minitest/autorun.rb new file mode 100644 index 0000000..a9f9c67 --- /dev/null +++ b/lib/minitest/autorun.rb @@ -0,0 +1,9 @@ +############################################################ +# This file is imported from a different project. +# DO NOT make modifications in this repo. +# File a patch instead and assign it to Ryan Davis +############################################################ + +require 'minitest/unit' + +MiniTest::Unit.autorun diff --git a/lib/minitest/mock.rb b/lib/minitest/mock.rb new file mode 100644 index 0000000..54af28c --- /dev/null +++ b/lib/minitest/mock.rb @@ -0,0 +1,37 @@ +############################################################ +# This file is imported from a different project. +# DO NOT make modifications in this repo. +# File a patch instead and assign it to Ryan Davis +############################################################ + +class MockExpectationError < StandardError; end + +module MiniTest + class Mock + def initialize + @expected_calls = {} + @actual_calls = Hash.new {|h,k| h[k] = [] } + end + + def expect(name, retval, args=[]) + n, r, a = name, retval, args # for the closure below + @expected_calls[name] = { :retval => retval, :args => args } + self.class.__send__(:define_method, name) { |*x| + raise ArgumentError unless @expected_calls[n][:args].size == x.size + @actual_calls[n] << { :retval => r, :args => x } + retval + } + self + end + + def verify + @expected_calls.each_key do |name| + expected = @expected_calls[name] + msg = "expected #{name}, #{expected.inspect}" + raise MockExpectationError, msg unless + @actual_calls.has_key? name and @actual_calls[name].include?(expected) + end + true + end + end +end diff --git a/lib/minitest/spec.rb b/lib/minitest/spec.rb new file mode 100644 index 0000000..2158ec0 --- /dev/null +++ b/lib/minitest/spec.rb @@ -0,0 +1,89 @@ +############################################################ +# This file is imported from a different project. +# DO NOT make modifications in this repo. +# File a patch instead and assign it to Ryan Davis +############################################################ + +#!/usr/bin/ruby -w + +require 'minitest/unit' + +class Module + def infect_with_assertions pos_prefix, neg_prefix, skip_re, map = {} + MiniTest::Assertions.public_instance_methods(false).each do |meth| + meth = meth.to_s + + new_name = case meth + when /^assert/ then + meth.sub(/^assert/, pos_prefix.to_s) + when /^refute/ then + meth.sub(/^refute/, neg_prefix.to_s) + end + next unless new_name + next if new_name =~ skip_re + + regexp, replacement = map.find { |re, _| new_name =~ re } + new_name.sub! regexp, replacement if replacement + + # warn "%-22p -> %p %p" % [meth, new_name, regexp] + self.class_eval <<-EOM + def #{new_name} *args, &block + return MiniTest::Spec.current.#{meth}(*args, &self) if Proc === self + return MiniTest::Spec.current.#{meth}(args.first, self) if args.size == 1 + return MiniTest::Spec.current.#{meth}(self, *args) + end + EOM + end + end +end + +Object.infect_with_assertions(:must, :wont, + /^(must|wont)$|wont_(throw)| + must_(block|not?_|nothing|raise$)/x, + /(must_throw)s/ => '\1', + /(?!not)_same/ => '_be_same_as', + /_in_/ => '_be_within_', + /_operator/ => '_be', + /_includes/ => '_include', + /(must|wont)_(.*_of|nil|empty)/ => '\1_be_\2', + /must_raises/ => 'must_raise') + +class Object + alias :must_be_close_to :must_be_within_delta + alias :wont_be_close_to :wont_be_within_delta +end + +module Kernel + def describe desc, &block + cls = Class.new(MiniTest::Spec) + Object.const_set desc.to_s.split(/\W+/).map { |s| s.capitalize }.join, cls + + cls.class_eval(&block) + end + private :describe +end + +class MiniTest::Spec < MiniTest::Unit::TestCase + def self.current + @@current_spec + end + + def initialize name + super + @@current_spec = self + end + + def self.before(type = :each, &block) + raise "unsupported before type: #{type}" unless type == :each + define_method :setup, &block + end + + def self.after(type = :each, &block) + raise "unsupported after type: #{type}" unless type == :each + define_method :teardown, &block + end + + def self.it desc, &block + define_method "test_#{desc.gsub(/\W+/, '_').downcase}", &block + end +end diff --git a/lib/minitest/unit.rb b/lib/minitest/unit.rb new file mode 100644 index 0000000..0f71126 --- /dev/null +++ b/lib/minitest/unit.rb @@ -0,0 +1,497 @@ +############################################################ +# This file is imported from a different project. +# DO NOT make modifications in this repo. +# File a patch instead and assign it to Ryan Davis +############################################################ + +## +# +# Totally minimal drop-in replacement for test-unit +# +# TODO: refute -> debunk, prove/rebut, show/deny... lots of possibilities + +module MiniTest + class Assertion < Exception; end + class Skip < Assertion; end + + file = if RUBY_VERSION =~ /^1\.9/ then # bt's expanded, but __FILE__ isn't :( + File.expand_path __FILE__ + elsif __FILE__ =~ /^[^\.]/ then # assume both relative + require 'pathname' + pwd = Pathname.new Dir.pwd + pn = Pathname.new File.expand_path(__FILE__) + pn = File.join(".", pn.relative_path_from(pwd)) unless pn.relative? + pn.to_s + else # assume both are expanded + __FILE__ + end + + # './lib' in project dir, or '/usr/local/blahblah' if installed + MINI_DIR = File.dirname(File.dirname(file)) + + def self.filter_backtrace bt + return ["No backtrace"] unless bt + + new_bt = [] + bt.each do |line| + break if line.rindex(MINI_DIR, 0) + new_bt << line + end + + new_bt = bt.reject { |line| line.rindex(MINI_DIR, 0) } if new_bt.empty? + new_bt = bt.dup if new_bt.empty? + new_bt + end + + module Assertions + def mu_pp(obj) + s = obj.inspect + s = s.force_encoding(Encoding.default_external) if defined? Encoding + s + end + + def _assertions= n + @_assertions = n + end + + def _assertions + @_assertions ||= 0 + end + + def assert test, msg = nil + msg ||= "Failed assertion, no message given." + self._assertions += 1 + unless test then + msg = msg.call if Proc === msg + raise MiniTest::Assertion, msg + end + true + end + + def assert_block msg = nil + msg = message(msg) { "Expected block to return true value" } + assert yield, msg + end + + def assert_empty obj, msg = nil + msg = message(msg) { "Expected #{obj.inspect} to be empty" } + assert_respond_to obj, :empty? + assert obj.empty?, msg + end + + def assert_equal exp, act, msg = nil + msg = message(msg) { "Expected #{mu_pp(exp)}, not #{mu_pp(act)}" } + assert(exp == act, msg) + end + + def assert_in_delta exp, act, delta = 0.001, msg = nil + n = (exp - act).abs + msg = message(msg) { "Expected #{exp} - #{act} (#{n}) to be < #{delta}" } + assert delta >= n, msg + end + + def assert_in_epsilon a, b, epsilon = 0.001, msg = nil + assert_in_delta a, b, [a, b].min * epsilon, msg + end + + def assert_includes collection, obj, msg = nil + msg = message(msg) { "Expected #{mu_pp(collection)} to include #{mu_pp(obj)}" } + assert_respond_to collection, :include? + assert collection.include?(obj), msg + end + + def assert_instance_of cls, obj, msg = nil + msg = message(msg) { "Expected #{mu_pp(obj)} to be an instance of #{cls}, not #{obj.class}" } + flip = (Module === obj) && ! (Module === cls) # HACK for specs + obj, cls = cls, obj if flip + assert obj.instance_of?(cls), msg + end + + def assert_kind_of cls, obj, msg = nil # TODO: merge with instance_of + msg = message(msg) { + "Expected #{mu_pp(obj)} to be a kind of #{cls}, not #{obj.class}" } + flip = (Module === obj) && ! (Module === cls) # HACK for specs + obj, cls = cls, obj if flip + assert obj.kind_of?(cls), msg + end + + def assert_match exp, act, msg = nil + msg = message(msg) { "Expected #{mu_pp(exp)} to match #{mu_pp(act)}" } + assert_respond_to act, :"=~" + exp = /#{Regexp.escape(exp)}/ if String === exp && String === act + assert exp =~ act, msg + end + + def assert_nil obj, msg = nil + msg = message(msg) { "Expected #{mu_pp(obj)} to be nil" } + assert obj.nil?, msg + end + + def assert_operator o1, op, o2, msg = nil + msg = message(msg) { "Expected #{mu_pp(o1)} to be #{op} #{mu_pp(o2)}" } + assert o1.__send__(op, o2), msg + end + + def assert_raises *exp + msg = String === exp.last ? exp.pop : nil + should_raise = false + begin + yield + should_raise = true + rescue Exception => e + assert(exp.any? { |ex| + ex.instance_of?(Module) ? e.kind_of?(ex) : ex == e.class + }, exception_details(e, "#{mu_pp(exp)} exception expected, not")) + + return e + end + + exp = exp.first if exp.size == 1 + flunk "#{mu_pp(exp)} expected but nothing was raised." if should_raise + end + + def assert_respond_to obj, meth, msg = nil + msg = message(msg) { + "Expected #{mu_pp(obj)} (#{obj.class}) to respond to ##{meth}" + } + flip = (Symbol === obj) && ! (Symbol === meth) # HACK for specs + obj, meth = meth, obj if flip + assert obj.respond_to?(meth), msg + end + + def assert_same exp, act, msg = nil + msg = message(msg) { + data = [mu_pp(act), act.object_id, mu_pp(exp), exp.object_id] + "Expected %s (0x%x) to be the same as %s (0x%x)" % data + } + assert exp.equal?(act), msg + end + + def assert_send send_ary, m = nil + recv, msg, *args = send_ary + m = message(m) { + "Expected #{mu_pp(recv)}.#{msg}(*#{mu_pp(args)}) to return true" } + assert recv.__send__(msg, *args), m + end + + def assert_throws sym, msg = nil + default = "Expected #{mu_pp(sym)} to have been thrown" + caught = true + catch(sym) do + begin + yield + rescue ArgumentError => e # 1.9 exception + default += ", not #{e.message.split(/ /).last}" + rescue NameError => e # 1.8 exception + default += ", not #{e.name.inspect}" + end + caught = false + end + + assert caught, message(msg) { default } + end + + def capture_io + require 'stringio' + + orig_stdout, orig_stderr = $stdout, $stderr + captured_stdout, captured_stderr = StringIO.new, StringIO.new + $stdout, $stderr = captured_stdout, captured_stderr + + yield + + return captured_stdout.string, captured_stderr.string + ensure + $stdout = orig_stdout + $stderr = orig_stderr + end + + def exception_details e, msg + "#{msg}\nClass: <#{e.class}>\nMessage: <#{e.message.inspect}>\n---Backtrace---\n#{MiniTest::filter_backtrace(e.backtrace).join("\n")}\n---------------" + end + + def flunk msg = nil + msg ||= "Epic Fail!" + assert false, msg + end + + def message msg = nil, &default + proc { + if msg then + msg = msg.to_s unless String === msg + msg += '.' unless msg.empty? + msg += "\n#{default.call}." + msg.strip + else + "#{default.call}." + end + } + end + + # used for counting assertions + def pass msg = nil + assert true + end + + def refute test, msg = nil + msg ||= "Failed refutation, no message given" + not assert(! test, msg) + end + + def refute_empty obj, msg = nil + msg = message(msg) { "Expected #{obj.inspect} to not be empty" } + assert_respond_to obj, :empty? + refute obj.empty?, msg + end + + def refute_equal exp, act, msg = nil + msg = message(msg) { "Expected #{mu_pp(act)} to not be equal to #{mu_pp(exp)}" } + refute exp == act, msg + end + + def refute_in_delta exp, act, delta = 0.001, msg = nil + n = (exp - act).abs + msg = message(msg) { "Expected #{exp} - #{act} (#{n}) to not be < #{delta}" } + refute delta > n, msg + end + + def refute_in_epsilon a, b, epsilon = 0.001, msg = nil + refute_in_delta a, b, a * epsilon, msg + end + + def refute_includes collection, obj, msg = nil + msg = message(msg) { "Expected #{mu_pp(collection)} to not include #{mu_pp(obj)}" } + assert_respond_to collection, :include? + refute collection.include?(obj), msg + end + + def refute_instance_of cls, obj, msg = nil + msg = message(msg) { "Expected #{mu_pp(obj)} to not be an instance of #{cls}" } + flip = (Module === obj) && ! (Module === cls) # HACK for specs + obj, cls = cls, obj if flip + refute obj.instance_of?(cls), msg + end + + def refute_kind_of cls, obj, msg = nil # TODO: merge with instance_of + msg = message(msg) { "Expected #{mu_pp(obj)} to not be a kind of #{cls}" } + flip = (Module === obj) && ! (Module === cls) # HACK for specs + obj, cls = cls, obj if flip + refute obj.kind_of?(cls), msg + end + + def refute_match exp, act, msg = nil + msg = message(msg) { "Expected #{mu_pp(exp)} to not match #{mu_pp(act)}" } + assert_respond_to act, :"=~" + exp = /#{Regexp.escape(exp)}/ if String === exp && String === act + refute exp =~ act, msg + end + + def refute_nil obj, msg = nil + msg = message(msg) { "Expected #{mu_pp(obj)} to not be nil" } + refute obj.nil?, msg + end + + def refute_operator o1, op, o2, msg = nil + msg = message(msg) { "Expected #{mu_pp(o1)} to not be #{op} #{mu_pp(o2)}" } + refute o1.__send__(op, o2), msg + end + + def refute_respond_to obj, meth, msg = nil + msg = message(msg) { "Expected #{mu_pp(obj)} to not respond to #{meth}" } + flip = (Symbol === obj) && ! (Symbol === meth) # HACK for specs + obj, meth = meth, obj if flip + refute obj.respond_to?(meth), msg + end + + def refute_same exp, act, msg = nil + msg = message(msg) { "Expected #{mu_pp(act)} to not be the same as #{mu_pp(exp)}" } + refute exp.equal?(act), msg + end + + def skip msg = nil, bt = caller + msg ||= "Skipped, no message given" + raise MiniTest::Skip, msg, bt + end + end + + class Unit + VERSION = "1.3.1" + + attr_accessor :report, :failures, :errors, :skips + attr_accessor :test_count, :assertion_count + + @@installed_at_exit ||= false + @@out = $stdout + + def self.autorun + at_exit { + next if $! # don't run if there was an exception + exit_code = MiniTest::Unit.new.run(ARGV) + exit false if exit_code && exit_code != 0 + } unless @@installed_at_exit + @@installed_at_exit = true + end + + def self.output= stream + @@out = stream + end + + def location e + last_before_assertion = "" + e.backtrace.reverse_each do |s| + break if s =~ /in .(assert|refute|flunk|pass|fail|raise)/ + last_before_assertion = s + end + last_before_assertion.sub(/:in .*$/, '') + end + + def puke klass, meth, e + e = case e + when MiniTest::Skip then + @skips += 1 + "Skipped:\n#{meth}(#{klass}) [#{location e}]:\n#{e.message}\n" + when MiniTest::Assertion then + @failures += 1 + "Failure:\n#{meth}(#{klass}) [#{location e}]:\n#{e.message}\n" + else + @errors += 1 + bt = MiniTest::filter_backtrace(e.backtrace).join("\n ") + "Error:\n#{meth}(#{klass}):\n#{e.class}: #{e.message}\n #{bt}\n" + end + @report << e + e[0, 1] + end + + def initialize + @report = [] + @errors = @failures = @skips = 0 + @verbose = false + end + + ## + # Top level driver, controls all output and filtering. + + def run args = [] + @verbose = args.delete('-v') + + filter = if args.first =~ /^(-n|--name)$/ then + args.shift + arg = args.shift + arg =~ /\/(.*)\// ? Regexp.new($1) : arg + else + /./ # anything - ^test_ already filtered by #tests + end + + @@out.puts "Loaded suite #{$0.sub(/\.rb$/, '')}\nStarted" + + start = Time.now + run_test_suites filter + + @@out.puts + @@out.puts "Finished in #{'%.6f' % (Time.now - start)} seconds." + + @report.each_with_index do |msg, i| + @@out.puts "\n%3d) %s" % [i + 1, msg] + end + + @@out.puts + + format = "%d tests, %d assertions, %d failures, %d errors, %d skips" + @@out.puts format % [test_count, assertion_count, failures, errors, skips] + + return failures + errors if @test_count > 0 # or return nil... + end + + def run_test_suites filter = /./ + @test_count, @assertion_count = 0, 0 + old_sync, @@out.sync = @@out.sync, true if @@out.respond_to? :sync= + TestCase.test_suites.each do |suite| + suite.test_methods.grep(filter).each do |test| + inst = suite.new test + inst._assertions = 0 + @@out.print "#{suite}##{test}: " if @verbose + + t = Time.now if @verbose + result = inst.run(self) + + @@out.print "%.2f s: " % (Time.now - t) if @verbose + @@out.print result + @@out.puts if @verbose + @test_count += 1 + @assertion_count += inst._assertions + end + end + @@out.sync = old_sync if @@out.respond_to? :sync= + [@test_count, @assertion_count] + end + + class TestCase + attr_reader :name + + def run runner + result = '.' + begin + @passed = nil + self.setup + self.__send__ self.name + @passed = true + rescue Exception => e + @passed = false + result = runner.puke(self.class, self.name, e) + ensure + begin + self.teardown + rescue Exception => e + result = runner.puke(self.class, self.name, e) + end + end + result + end + + def initialize name + @name = name + @passed = nil + end + + def self.reset + @@test_suites = {} + end + + reset + + def self.inherited klass + @@test_suites[klass] = true + end + + def self.test_order + :random + end + + def self.test_suites + @@test_suites.keys.sort_by { |ts| ts.name } + end + + def self.test_methods + methods = public_instance_methods(true).grep(/^test/).map { |m| + m.to_s + }.sort + + if self.test_order == :random then + max = methods.size + methods = methods.sort_by { rand(max) } + end + + methods + end + + def setup; end + def teardown; end + + def passed? + @passed + end + + include MiniTest::Assertions + end # class TestCase + end # class Test +end # module Mini diff --git a/lib/mkmf.rb b/lib/mkmf.rb new file mode 100644 index 0000000..06aeca9 --- /dev/null +++ b/lib/mkmf.rb @@ -0,0 +1,1958 @@ +# module to create Makefile for extension modules +# invoke like: ruby -r mkmf extconf.rb + +require 'rbconfig' +require 'fileutils' +require 'shellwords' + +CONFIG = RbConfig::MAKEFILE_CONFIG +ORIG_LIBPATH = ENV['LIB'] + +CXX_EXT = %w[cc cxx cpp] +if /mswin|bccwin|mingw|os2/ !~ CONFIG['build_os'] + CXX_EXT.concat(%w[C]) +end +SRC_EXT = %w[c m] << CXX_EXT +$static = nil +$config_h = '$(arch_hdrdir)/ruby/config.h' +$default_static = $static + +unless defined? $configure_args + $configure_args = {} + args = CONFIG["configure_args"] + if ENV["CONFIGURE_ARGS"] + args << " " << ENV["CONFIGURE_ARGS"] + end + for arg in Shellwords::shellwords(args) + arg, val = arg.split('=', 2) + next unless arg + arg.tr!('_', '-') + if arg.sub!(/^(?!--)/, '--') + val or next + arg.downcase! + end + next if /^--(?:top|topsrc|src|cur)dir$/ =~ arg + $configure_args[arg] = val || true + end + for arg in ARGV + arg, val = arg.split('=', 2) + next unless arg + arg.tr!('_', '-') + if arg.sub!(/^(?!--)/, '--') + val or next + arg.downcase! + end + $configure_args[arg] = val || true + end +end + +$libdir = CONFIG["libdir"] +$rubylibdir = CONFIG["rubylibdir"] +$archdir = CONFIG["archdir"] +$sitedir = CONFIG["sitedir"] +$sitelibdir = CONFIG["sitelibdir"] +$sitearchdir = CONFIG["sitearchdir"] +$vendordir = CONFIG["vendordir"] +$vendorlibdir = CONFIG["vendorlibdir"] +$vendorarchdir = CONFIG["vendorarchdir"] + +$mswin = /mswin/ =~ RUBY_PLATFORM +$bccwin = /bccwin/ =~ RUBY_PLATFORM +$mingw = /mingw/ =~ RUBY_PLATFORM +$cygwin = /cygwin/ =~ RUBY_PLATFORM +$netbsd = /netbsd/ =~ RUBY_PLATFORM +$os2 = /os2/ =~ RUBY_PLATFORM +$beos = /beos/ =~ RUBY_PLATFORM +$haiku = /haiku/ =~ RUBY_PLATFORM +$solaris = /solaris/ =~ RUBY_PLATFORM +$dest_prefix_pattern = (File::PATH_SEPARATOR == ';' ? /\A([[:alpha:]]:)?/ : /\A/) + +# :stopdoc: + +def config_string(key, config = CONFIG) + s = config[key] and !s.empty? and block_given? ? yield(s) : s +end + +def dir_re(dir) + Regexp.new('\$(?:\('+dir+'\)|\{'+dir+'\})(?:\$(?:\(target_prefix\)|\{target_prefix\}))?') +end + +def relative_from(path, base) + dir = File.join(path, "") + if File.expand_path(dir) == File.expand_path(dir, base) + path + else + File.join(base, path) + end +end + +INSTALL_DIRS = [ + [dir_re('commondir'), "$(RUBYCOMMONDIR)"], + [dir_re('sitedir'), "$(RUBYCOMMONDIR)"], + [dir_re('vendordir'), "$(RUBYCOMMONDIR)"], + [dir_re('rubylibdir'), "$(RUBYLIBDIR)"], + [dir_re('archdir'), "$(RUBYARCHDIR)"], + [dir_re('sitelibdir'), "$(RUBYLIBDIR)"], + [dir_re('vendorlibdir'), "$(RUBYLIBDIR)"], + [dir_re('sitearchdir'), "$(RUBYARCHDIR)"], + [dir_re('vendorarchdir'), "$(RUBYARCHDIR)"], + [dir_re('rubyhdrdir'), "$(RUBYHDRDIR)"], + [dir_re('sitehdrdir'), "$(SITEHDRDIR)"], + [dir_re('vendorhdrdir'), "$(VENDORHDRDIR)"], + [dir_re('bindir'), "$(BINDIR)"], +] + +def install_dirs(target_prefix = nil) + if $extout + dirs = [ + ['BINDIR', '$(extout)/bin'], + ['RUBYCOMMONDIR', '$(extout)/common'], + ['RUBYLIBDIR', '$(RUBYCOMMONDIR)$(target_prefix)'], + ['RUBYARCHDIR', '$(extout)/$(arch)$(target_prefix)'], + ['HDRDIR', '$(extout)/include/ruby$(target_prefix)'], + ['ARCHHDRDIR', '$(extout)/include/$(arch)/ruby$(target_prefix)'], + ['extout', "#$extout"], + ['extout_prefix', "#$extout_prefix"], + ] + elsif $extmk + dirs = [ + ['BINDIR', '$(bindir)'], + ['RUBYCOMMONDIR', '$(rubylibdir)'], + ['RUBYLIBDIR', '$(rubylibdir)$(target_prefix)'], + ['RUBYARCHDIR', '$(archdir)$(target_prefix)'], + ['HDRDIR', '$(rubyhdrdir)/ruby$(target_prefix)'], + ['ARCHHDRDIR', '$(rubyhdrdir)/$(arch)/ruby$(target_prefix)'], + ] + elsif $configure_args.has_key?('--vendor') + dirs = [ + ['BINDIR', '$(bindir)'], + ['RUBYCOMMONDIR', '$(vendordir)$(target_prefix)'], + ['RUBYLIBDIR', '$(vendorlibdir)$(target_prefix)'], + ['RUBYARCHDIR', '$(vendorarchdir)$(target_prefix)'], + ['HDRDIR', '$(rubyhdrdir)/ruby$(target_prefix)'], + ['ARCHHDRDIR', '$(rubyhdrdir)/$(arch)/ruby$(target_prefix)'], + ] + else + dirs = [ + ['BINDIR', '$(bindir)'], + ['RUBYCOMMONDIR', '$(sitedir)$(target_prefix)'], + ['RUBYLIBDIR', '$(sitelibdir)$(target_prefix)'], + ['RUBYARCHDIR', '$(sitearchdir)$(target_prefix)'], + ['HDRDIR', '$(rubyhdrdir)/ruby$(target_prefix)'], + ['ARCHHDRDIR', '$(rubyhdrdir)/$(arch)/ruby$(target_prefix)'], + ] + end + dirs << ['target_prefix', (target_prefix ? "/#{target_prefix}" : "")] + dirs +end + +def map_dir(dir, map = nil) + map ||= INSTALL_DIRS + map.inject(dir) {|d, (orig, new)| d.gsub(orig, new)} +end + +topdir = File.dirname(libdir = File.dirname(__FILE__)) +extdir = File.expand_path("ext", topdir) +path = File.expand_path($0) +$extmk = path[0, topdir.size+1] == topdir+"/" && %r"\A(ext|enc|tool)\z" =~ File.dirname(path[topdir.size+1..-1]) +if not $extmk and File.exist?(($hdrdir = RbConfig::CONFIG["rubyhdrdir"]) + "/ruby/ruby.h") + $topdir = $hdrdir + $top_srcdir = $hdrdir + $arch_hdrdir = $hdrdir + "/$(arch)" +elsif File.exist?(($hdrdir = ($top_srcdir ||= topdir) + "/include") + "/ruby.h") + $topdir ||= RbConfig::CONFIG["topdir"] + $arch_hdrdir = "$(extout)/include/$(arch)" +else + abort "mkmf.rb can't find header files for ruby at #{$hdrdir}/ruby.h" +end + +OUTFLAG = CONFIG['OUTFLAG'] +COUTFLAG = CONFIG['COUTFLAG'] +CPPOUTFILE = CONFIG['CPPOUTFILE'] + +CONFTEST_C = "conftest.c".freeze + +class String + # Wraps a string in escaped quotes if it contains whitespace. + def quote + /\s/ =~ self ? "\"#{self}\"" : "#{self}" + end + + # Generates a string used as cpp macro name. + def tr_cpp + strip.upcase.tr_s("^A-Z0-9_", "_") + end +end +class Array + # Wraps all strings in escaped quotes if they contain whitespace. + def quote + map {|s| s.quote} + end +end + +def rm_f(*files) + opt = ([files.pop] if Hash === files.last) + FileUtils.rm_f(Dir[*files.flatten(1)], *opt) +end + +def rm_rf(*files) + opt = ([files.pop] if Hash === files.last) + FileUtils.rm_rf(Dir[*files.flatten(1)], *opt) +end + +# Returns time stamp of the +target+ file if it exists and is newer +# than or equal to all of +times+. +def modified?(target, times) + (t = File.mtime(target)) rescue return nil + Array === times or times = [times] + t if times.all? {|n| n <= t} +end + +def merge_libs(*libs) + libs.inject([]) do |x, y| + xy = x & y + xn = yn = 0 + y = y.inject([]) {|ary, e| ary.last == e ? ary : ary << e} + y.each_with_index do |v, yi| + if xy.include?(v) + xi = [x.index(v), xn].max() + x[xi, 1] = y[yn..yi] + xn, yn = xi + (yi - yn + 1), yi + 1 + end + end + x.concat(y[yn..-1] || []) + end +end + +# This is a custom logging module. It generates an mkmf.log file when you +# run your extconf.rb script. This can be useful for debugging unexpected +# failures. +# +# This module and its associated methods are meant for internal use only. +# +module Logging + @log = nil + @logfile = 'mkmf.log' + @orgerr = $stderr.dup + @orgout = $stdout.dup + @postpone = 0 + @quiet = $extmk + + def self::log_open + @log ||= File::open(@logfile, 'wb') + @log.sync = true + end + + def self::open + log_open + $stderr.reopen(@log) + $stdout.reopen(@log) + yield + ensure + $stderr.reopen(@orgerr) + $stdout.reopen(@orgout) + end + + def self::message(*s) + log_open + @log.printf(*s) + end + + def self::logfile file + @logfile = file + if @log and not @log.closed? + @log.flush + @log.close + @log = nil + end + end + + def self::postpone + tmplog = "mkmftmp#{@postpone += 1}.log" + open do + log, *save = @log, @logfile, @orgout, @orgerr + @log, @logfile, @orgout, @orgerr = nil, tmplog, log, log + begin + log.print(open {yield}) + @log.close + File::open(tmplog) {|t| FileUtils.copy_stream(t, log)} + ensure + @log, @logfile, @orgout, @orgerr = log, *save + @postpone -= 1 + rm_f tmplog + end + end + end + + class << self + attr_accessor :quiet + end +end + +def xsystem command + varpat = /\$\((\w+)\)|\$\{(\w+)\}/ + if varpat =~ command + vars = Hash.new {|h, k| h[k] = ''; ENV[k]} + command = command.dup + nil while command.gsub!(varpat) {vars[$1||$2]} + end + Logging::open do + puts command.quote + system(command) + end +end + +def xpopen command, *mode, &block + Logging::open do + case mode[0] + when nil, /^r/ + puts "#{command} |" + else + puts "| #{command}" + end + IO.popen(command, *mode, &block) + end +end + +def log_src(src) + src = src.split(/^/) + fmt = "%#{src.size.to_s.size}d: %s" + Logging::message <<"EOM" +checked program was: +/* begin */ +EOM + src.each_with_index {|line, no| Logging::message fmt, no+1, line} + Logging::message <<"EOM" +/* end */ + +EOM +end + +def create_tmpsrc(src) + src = "#{COMMON_HEADERS}\n#{src}" + src = yield(src) if block_given? + src.gsub!(/[ \t]+$/, '') + src.gsub!(/\A\n+|^\n+$/, '') + src.sub!(/[^\n]\z/, "\\&\n") + count = 0 + begin + open(CONFTEST_C, "wb") do |cfile| + cfile.print src + end + rescue Errno::EACCES + if (count += 1) < 5 + sleep 0.2 + retry + end + end + src +end + +def have_devel? + unless defined? $have_devel + $have_devel = true + $have_devel = try_link(MAIN_DOES_NOTHING) + end + $have_devel +end + +def try_do(src, command, &b) + unless have_devel? + raise < $hdrdir.quote, + 'src' => CONFTEST_C, + 'arch_hdrdir' => "#$arch_hdrdir", + 'top_srcdir' => $top_srcdir.quote, + 'INCFLAGS' => "#$INCFLAGS", + 'CPPFLAGS' => "#$CPPFLAGS", + 'CFLAGS' => "#$CFLAGS", + 'ARCH_FLAG' => "#$ARCH_FLAG", + 'LDFLAGS' => "#$LDFLAGS #{ldflags}", + 'LIBPATH' => libpathflag(libpath), + 'LOCAL_LIBS' => "#$LOCAL_LIBS #$libs", + 'LIBS' => "#$LIBRUBYARG_STATIC #{opt} #$LIBS") + RbConfig::expand(TRY_LINK.dup, conf) +end + +def cc_command(opt="") + conf = RbConfig::CONFIG.merge('hdrdir' => $hdrdir.quote, 'srcdir' => $srcdir.quote, + 'arch_hdrdir' => "#$arch_hdrdir", + 'top_srcdir' => $top_srcdir.quote) + RbConfig::expand("$(CC) #$INCFLAGS #$CPPFLAGS #$CFLAGS #$ARCH_FLAG #{opt} -c #{CONFTEST_C}", + conf) +end + +def cpp_command(outfile, opt="") + conf = RbConfig::CONFIG.merge('hdrdir' => $hdrdir.quote, 'srcdir' => $srcdir.quote, + 'arch_hdrdir' => "#$arch_hdrdir", + 'top_srcdir' => $top_srcdir.quote) + RbConfig::expand("$(CPP) #$INCFLAGS #$CPPFLAGS #$CFLAGS #{opt} #{CONFTEST_C} #{outfile}", + conf) +end + +def libpathflag(libpath=$DEFLIBPATH|$LIBPATH) + libpath.map{|x| + case x + when "$(topdir)", /\A\./ + LIBPATHFLAG + else + LIBPATHFLAG+RPATHFLAG + end % x.quote + }.join +end + +def try_link0(src, opt="", &b) + try_do(src, link_command("", opt), &b) +end + +def try_link(src, opt="", &b) + try_link0(src, opt, &b) +ensure + rm_f "conftest*", "c0x32*" +end + +def try_compile(src, opt="", &b) + try_do(src, cc_command(opt), &b) +ensure + rm_f "conftest*" +end + +def try_cpp(src, opt="", &b) + try_do(src, cpp_command(CPPOUTFILE, opt), &b) +ensure + rm_f "conftest*" +end + +def cpp_include(header) + if header + header = [header] unless header.kind_of? Array + header.map {|h| "#include <#{h}>\n"}.join + else + "" + end +end + +def with_cppflags(flags) + cppflags = $CPPFLAGS + $CPPFLAGS = flags + ret = yield +ensure + $CPPFLAGS = cppflags unless ret +end + +def with_cflags(flags) + cflags = $CFLAGS + $CFLAGS = flags + ret = yield +ensure + $CFLAGS = cflags unless ret +end + +def with_ldflags(flags) + ldflags = $LDFLAGS + $LDFLAGS = flags + ret = yield +ensure + $LDFLAGS = ldflags unless ret +end + +def try_static_assert(expr, headers = nil, opt = "", &b) + headers = cpp_include(headers) + try_compile(< 0", headers, opt) + # positive constant + elsif try_static_assert("#{const} < 0", headers, opt) + neg = true + const = "-(#{const})" + elsif try_static_assert("#{const} == 0", headers, opt) + return 0 + else + # not a constant + return nil + end + upper = 1 + lower = 0 + until try_static_assert("#{const} <= #{upper}", headers, opt) + lower = upper + upper <<= 1 + end + return nil unless lower + while upper > lower + 1 + mid = (upper + lower) / 2 + if try_static_assert("#{const} > #{mid}", headers, opt) + lower = mid + else + upper = mid + end + end + upper = -upper if neg + return upper + else + src = %{#{includes} +#include +/*top*/ +int conftest_const = (int)(#{const}); +int main() {printf("%d\\n", conftest_const); return 0;} +} + if try_link0(src, opt, &b) + xpopen("./conftest") do |f| + return Integer(f.gets) + end + end + end + nil +end + +def try_func(func, libs, headers = nil, &b) + headers = cpp_include(headers) + try_link(<<"SRC", libs, &b) or +#{headers} +/*top*/ +#{MAIN_DOES_NOTHING} +int t() { void ((*volatile p)()); p = (void ((*)()))#{func}; return 0; } +SRC + try_link(<<"SRC", libs, &b) +#{headers} +/*top*/ +#{MAIN_DOES_NOTHING} +int t() { #{func}(); return 0; } +SRC +end + +def try_var(var, headers = nil, &b) + headers = cpp_include(headers) + try_compile(<<"SRC", &b) +#{headers} +/*top*/ +#{MAIN_DOES_NOTHING} +int t() { const volatile void *volatile p; p = &(&#{var})[0]; return 0; } +SRC +end + +def egrep_cpp(pat, src, opt = "", &b) + src = create_tmpsrc(src, &b) + xpopen(cpp_command('', opt)) do |f| + if Regexp === pat + puts(" ruby -ne 'print if #{pat.inspect}'") + f.grep(pat) {|l| + puts "#{f.lineno}: #{l}" + return true + } + false + else + puts(" egrep '#{pat}'") + begin + stdin = $stdin.dup + $stdin.reopen(f) + system("egrep", pat) + ensure + $stdin.reopen(stdin) + end + end + end +ensure + rm_f "conftest*" + log_src(src) +end + +# This is used internally by the have_macro? method. +def macro_defined?(macro, src, opt = "", &b) + src = src.sub(/[^\n]\z/, "\\&\n") + try_compile(src + <<"SRC", opt, &b) +/*top*/ +#ifndef #{macro} +# error +>>>>>> #{macro} undefined <<<<<< +#endif +SRC +end + +def try_run(src, opt = "", &b) + if try_link0(src, opt, &b) + xsystem("./conftest") + else + nil + end +ensure + rm_f "conftest*" +end + +def install_files(mfile, ifiles, map = nil, srcprefix = nil) + ifiles or return + ifiles.empty? and return + srcprefix ||= '$(srcdir)' + RbConfig::expand(srcdir = srcprefix.dup) + dirs = [] + path = Hash.new {|h, i| h[i] = dirs.push([i])[-1]} + ifiles.each do |files, dir, prefix| + dir = map_dir(dir, map) + prefix &&= %r|\A#{Regexp.quote(prefix)}/?| + if /\A\.\// =~ files + # install files which are in current working directory. + files = files[2..-1] + len = nil + else + # install files which are under the $(srcdir). + files = File.join(srcdir, files) + len = srcdir.size + end + f = nil + Dir.glob(files) do |fx| + f = fx + f[0..len] = "" if len + case File.basename(f) + when *$NONINSTALLFILES + next + end + d = File.dirname(f) + d.sub!(prefix, "") if prefix + d = (d.empty? || d == ".") ? dir : File.join(dir, d) + f = File.join(srcprefix, f) if len + path[d] << f + end + unless len or f + d = File.dirname(files) + d.sub!(prefix, "") if prefix + d = (d.empty? || d == ".") ? dir : File.join(dir, d) + path[d] << files + end + end + dirs +end + +def install_rb(mfile, dest, srcdir = nil) + install_files(mfile, [["lib/**/*.rb", dest, "lib"]], nil, srcdir) +end + +def append_library(libs, lib) # :no-doc: + format(LIBARG, lib) + " " + libs +end + +def message(*s) + unless Logging.quiet and not $VERBOSE + printf(*s) + $stdout.flush + end +end + +# This emits a string to stdout that allows users to see the results of the +# various have* and find* methods as they are tested. +# +# Internal use only. +# +def checking_for(m, fmt = nil) + f = caller[0][/in `(.*)'$/, 1] and f << ": " #` for vim #' + m = "checking #{/\Acheck/ =~ f ? '' : 'for '}#{m}... " + message "%s", m + a = r = nil + Logging::postpone do + r = yield + a = (fmt ? fmt % r : r ? "yes" : "no") << "\n" + "#{f}#{m}-------------------- #{a}\n" + end + message(a) + Logging::message "--------------------\n\n" + r +end + +def checking_message(target, place = nil, opt = nil) + [["in", place], ["with", opt]].inject("#{target}") do |msg, (pre, noun)| + if noun + [[:to_str], [:join, ","], [:to_s]].each do |meth, *args| + if noun.respond_to?(meth) + break noun = noun.send(meth, *args) + end + end + msg << " #{pre} #{noun}" unless noun.empty? + end + msg + end +end + +# :startdoc: + +# Returns whether or not +macro+ is defined either in the common header +# files or within any +headers+ you provide. +# +# Any options you pass to +opt+ are passed along to the compiler. +# +def have_macro(macro, headers = nil, opt = "", &b) + checking_for checking_message(macro, headers, opt) do + macro_defined?(macro, cpp_include(headers), opt, &b) + end +end + +# Returns whether or not the given entry point +func+ can be found within +# +lib+. If +func+ is nil, the 'main()' entry point is used by default. +# If found, it adds the library to list of libraries to be used when linking +# your extension. +# +# If +headers+ are provided, it will include those header files as the +# header files it looks in when searching for +func+. +# +# The real name of the library to be linked can be altered by +# '--with-FOOlib' configuration option. +# +def have_library(lib, func = nil, headers = nil, &b) + func = "main" if !func or func.empty? + lib = with_config(lib+'lib', lib) + checking_for checking_message("#{func}()", LIBARG%lib) do + if COMMON_LIBS.include?(lib) + true + else + libs = append_library($libs, lib) + if try_func(func, libs, headers, &b) + $libs = libs + true + else + false + end + end + end +end + +# Returns whether or not the entry point +func+ can be found within the library +# +lib+ in one of the +paths+ specified, where +paths+ is an array of strings. +# If +func+ is nil , then the main() function is used as the entry point. +# +# If +lib+ is found, then the path it was found on is added to the list of +# library paths searched and linked against. +# +def find_library(lib, func, *paths, &b) + func = "main" if !func or func.empty? + lib = with_config(lib+'lib', lib) + paths = paths.collect {|path| path.split(File::PATH_SEPARATOR)}.flatten + checking_for "#{func}() in #{LIBARG%lib}" do + libpath = $LIBPATH + libs = append_library($libs, lib) + begin + until r = try_func(func, libs, &b) or paths.empty? + $LIBPATH = libpath | [paths.shift] + end + if r + $libs = libs + libpath = nil + end + ensure + $LIBPATH = libpath if libpath + end + r + end +end + +# Returns whether or not the function +func+ can be found in the common +# header files, or within any +headers+ that you provide. If found, a +# macro is passed as a preprocessor constant to the compiler using the +# function name, in uppercase, prepended with 'HAVE_'. +# +# For example, if have_func('foo') returned true, then the HAVE_FOO +# preprocessor macro would be passed to the compiler. +# +def have_func(func, headers = nil, &b) + checking_for checking_message("#{func}()", headers) do + if try_func(func, $libs, headers, &b) + $defs.push(format("-DHAVE_%s", func.tr_cpp)) + true + else + false + end + end +end + +# Returns whether or not the variable +var+ can be found in the common +# header files, or within any +headers+ that you provide. If found, a +# macro is passed as a preprocessor constant to the compiler using the +# variable name, in uppercase, prepended with 'HAVE_'. +# +# For example, if have_var('foo') returned true, then the HAVE_FOO +# preprocessor macro would be passed to the compiler. +# +def have_var(var, headers = nil, &b) + checking_for checking_message(var, headers) do + if try_var(var, headers, &b) + $defs.push(format("-DHAVE_%s", var.tr_cpp)) + true + else + false + end + end +end + +# Returns whether or not the given +header+ file can be found on your system. +# If found, a macro is passed as a preprocessor constant to the compiler using +# the header file name, in uppercase, prepended with 'HAVE_'. +# +# For example, if have_header('foo.h') returned true, then the HAVE_FOO_H +# preprocessor macro would be passed to the compiler. +# +def have_header(header, &b) + checking_for header do + if try_cpp(cpp_include(header), &b) + $defs.push(format("-DHAVE_%s", header.tr("a-z./\055", "A-Z___"))) + true + else + false + end + end +end + +# Instructs mkmf to search for the given +header+ in any of the +paths+ +# provided, and returns whether or not it was found in those paths. +# +# If the header is found then the path it was found on is added to the list +# of included directories that are sent to the compiler (via the -I switch). +# +def find_header(header, *paths) + message = checking_message(header, paths) + header = cpp_include(header) + checking_for message do + if try_cpp(header) + true + else + found = false + paths.each do |dir| + opt = "-I#{dir}".quote + if try_cpp(header, opt) + $INCFLAGS << " " << opt + found = true + break + end + end + found + end + end +end + +# Returns whether or not the struct of type +type+ contains +member+. If +# it does not, or the struct type can't be found, then false is returned. You +# may optionally specify additional +headers+ in which to look for the struct +# (in addition to the common header files). +# +# If found, a macro is passed as a preprocessor constant to the compiler using +# the type name and the member name, in uppercase, prepended with 'HAVE_'. +# +# For example, if have_struct_member('struct foo', 'bar') returned true, then the +# HAVE_STRUCT_FOO_BAR preprocessor macro would be passed to the compiler. +# +# HAVE_ST_BAR is also defined for backward compatibility. +# +def have_struct_member(type, member, headers = nil, &b) + checking_for checking_message("#{type}.#{member}", headers) do + if try_compile(<<"SRC", &b) +#{cpp_include(headers)} +/*top*/ +#{MAIN_DOES_NOTHING} +int s = (char *)&((#{type}*)0)->#{member} - (char *)0; +SRC + $defs.push(format("-DHAVE_%s_%s", type.tr_cpp, member.tr_cpp)) + $defs.push(format("-DHAVE_ST_%s", member.tr_cpp)) # backward compatibility + true + else + false + end + end +end + +def try_type(type, headers = nil, opt = "", &b) + if try_compile(<<"SRC", opt, &b) +#{cpp_include(headers)} +/*top*/ +typedef #{type} conftest_type; +int conftestval[sizeof(conftest_type)?1:-1]; +SRC + $defs.push(format("-DHAVE_TYPE_%s", type.tr_cpp)) + true + else + false + end +end + +# Returns whether or not the static type +type+ is defined. You may +# optionally pass additional +headers+ to check against in addition to the +# common header files. +# +# You may also pass additional flags to +opt+ which are then passed along to +# the compiler. +# +# If found, a macro is passed as a preprocessor constant to the compiler using +# the type name, in uppercase, prepended with 'HAVE_TYPE_'. +# +# For example, if have_type('foo') returned true, then the HAVE_TYPE_FOO +# preprocessor macro would be passed to the compiler. +# +def have_type(type, headers = nil, opt = "", &b) + checking_for checking_message(type, headers, opt) do + try_type(type, headers, opt, &b) + end +end + +# Returns where the static type +type+ is defined. +# +# You may also pass additional flags to +opt+ which are then passed along to +# the compiler. +# +# See also +have_type+. +# +def find_type(type, opt, *headers, &b) + opt ||= "" + fmt = "not found" + def fmt.%(x) + x ? x.respond_to?(:join) ? x.join(",") : x : self + end + checking_for checking_message(type, nil, opt), fmt do + headers.find do |h| + try_type(type, h, opt, &b) + end + end +end + +def try_const(const, headers = nil, opt = "", &b) + const, type = *const + if try_compile(<<"SRC", opt, &b) +#{cpp_include(headers)} +/*top*/ +typedef #{type || 'int'} conftest_type; +conftest_type conftestval = #{type ? '' : '(int)'}#{const}; +SRC + $defs.push(format("-DHAVE_CONST_%s", const.tr_cpp)) + true + else + false + end +end + +# Returns whether or not the constant +const+ is defined. You may +# optionally pass the +type+ of +const+ as [const, type], +# like as: +# +# have_const(%w[PTHREAD_MUTEX_INITIALIZER pthread_mutex_t], "pthread.h") +# +# You may also pass additional +headers+ to check against in addition +# to the common header files, and additional flags to +opt+ which are +# then passed along to the compiler. +# +# If found, a macro is passed as a preprocessor constant to the compiler using +# the type name, in uppercase, prepended with 'HAVE_CONST_'. +# +# For example, if have_const('foo') returned true, then the HAVE_CONST_FOO +# preprocessor macro would be passed to the compiler. +# +def have_const(const, headers = nil, opt = "", &b) + checking_for checking_message([*const].compact.join(' '), headers, opt) do + try_const(const, headers, opt, &b) + end +end + +# Returns the size of the given +type+. You may optionally specify additional +# +headers+ to search in for the +type+. +# +# If found, a macro is passed as a preprocessor constant to the compiler using +# the type name, in uppercase, prepended with 'SIZEOF_', followed by the type +# name, followed by '=X' where 'X' is the actual size. +# +# For example, if check_sizeof('mystruct') returned 12, then the +# SIZEOF_MYSTRUCT=12 preprocessor macro would be passed to the compiler. +# +def check_sizeof(type, headers = nil, &b) + expr = "sizeof(#{type})" + fmt = "%d" + def fmt.%(x) + x ? super : "failed" + end + checking_for checking_message("size of #{type}", headers), fmt do + if size = try_constant(expr, headers, &b) + $defs.push(format("-DSIZEOF_%s=%d", type.tr_cpp, size)) + size + end + end +end + +# :stopdoc: + +# Used internally by the what_type? method to determine if +type+ is a scalar +# pointer. +def scalar_ptr_type?(type, member = nil, headers = nil, &b) + try_compile(<<"SRC", &b) # pointer +#{cpp_include(headers)} +/*top*/ +volatile #{type} conftestval; +#{MAIN_DOES_NOTHING} +int t() {return (int)(1-*(conftestval#{member ? ".#{member}" : ""}));} +SRC +end + +# Used internally by the what_type? method to determine if +type+ is a scalar +# pointer. +def scalar_type?(type, member = nil, headers = nil, &b) + try_compile(<<"SRC", &b) # pointer +#{cpp_include(headers)} +/*top*/ +volatile #{type} conftestval; +#{MAIN_DOES_NOTHING} +int t() {return (int)(1-(conftestval#{member ? ".#{member}" : ""}));} +SRC +end + +def what_type?(type, member = nil, headers = nil, &b) + m = "#{type}" + name = type + if member + m << "." << member + name = "(((#{type} *)0)->#{member})" + end + fmt = "seems %s" + def fmt.%(x) + x ? super : "unknown" + end + checking_for checking_message(m, headers), fmt do + if scalar_ptr_type?(type, member, headers, &b) + if try_static_assert("sizeof(*#{name}) == 1", headers) + "string" + end + elsif scalar_type?(type, member, headers, &b) + if try_static_assert("sizeof(#{name}) > sizeof(long)", headers) + "long long" + elsif try_static_assert("sizeof(#{name}) > sizeof(int)", headers) + "long" + elsif try_static_assert("sizeof(#{name}) > sizeof(short)", headers) + "int" + elsif try_static_assert("sizeof(#{name}) > 1", headers) + "short" + else + "char" + end + end + end +end + +# This method is used internally by the find_executable method. +# +# Internal use only. +# +def find_executable0(bin, path = nil) + ext = config_string('EXEEXT') + if File.expand_path(bin) == bin + return bin if File.executable?(bin) + ext and File.executable?(file = bin + ext) and return file + return nil + end + if path ||= ENV['PATH'] + path = path.split(File::PATH_SEPARATOR) + else + path = %w[/usr/local/bin /usr/ucb /usr/bin /bin] + end + file = nil + path.each do |dir| + return file if File.executable?(file = File.join(dir, bin)) + return file if ext and File.executable?(file << ext) + end + nil +end + +# :startdoc: + +# Searches for the executable +bin+ on +path+. The default path is your +# PATH environment variable. If that isn't defined, it will resort to +# searching /usr/local/bin, /usr/ucb, /usr/bin and /bin. +# +# If found, it will return the full path, including the executable name, +# of where it was found. +# +# Note that this method does not actually affect the generated Makefile. +# +def find_executable(bin, path = nil) + checking_for checking_message(bin, path) do + find_executable0(bin, path) + end +end + +# :stopdoc: + +def arg_config(config, default=nil, &block) + $arg_config << [config, default] + defaults = [] + if default + defaults << default + elsif !block + defaults << nil + end + $configure_args.fetch(config.tr('_', '-'), *defaults, &block) +end + +# :startdoc: + +# Tests for the presence of a --with-config or --without-config +# option. Returns true if the with option is given, false if the without +# option is given, and the default value otherwise. +# +# This can be useful for adding custom definitions, such as debug information. +# +# Example: +# +# if with_config("debug") +# $defs.push("-DOSSL_DEBUG") unless $defs.include? "-DOSSL_DEBUG" +# end +# +def with_config(config, default=nil) + config = config.sub(/^--with[-_]/, '') + val = arg_config("--with-"+config) do + if arg_config("--without-"+config) + false + elsif block_given? + yield(config, default) + else + break default + end + end + case val + when "yes" + true + when "no" + false + else + val + end +end + +# Tests for the presence of an --enable-config or +# --disable-config option. Returns true if the enable option is given, +# false if the disable option is given, and the default value otherwise. +# +# This can be useful for adding custom definitions, such as debug information. +# +# Example: +# +# if enable_config("debug") +# $defs.push("-DOSSL_DEBUG") unless $defs.include? "-DOSSL_DEBUG" +# end +# +def enable_config(config, default=nil) + if arg_config("--enable-"+config) + true + elsif arg_config("--disable-"+config) + false + elsif block_given? + yield(config, default) + else + return default + end +end + +# Generates a header file consisting of the various macro definitions generated +# by other methods such as have_func and have_header. These are then wrapped in +# a custom #ifndef based on the +header+ file name, which defaults to +# 'extconf.h'. +# +# For example: +# +# # extconf.rb +# require 'mkmf' +# have_func('realpath') +# have_header('sys/utime.h') +# create_header +# create_makefile('foo') +# +# The above script would generate the following extconf.h file: +# +# #ifndef EXTCONF_H +# #define EXTCONF_H +# #define HAVE_REALPATH 1 +# #define HAVE_SYS_UTIME_H 1 +# #endif +# +# Given that the create_header method generates a file based on definitions +# set earlier in your extconf.rb file, you will probably want to make this +# one of the last methods you call in your script. +# +def create_header(header = "extconf.h") + message "creating %s\n", header + sym = header.tr("a-z./\055", "A-Z___") + hdr = ["#ifndef #{sym}\n#define #{sym}\n"] + for line in $defs + case line + when /^-D([^=]+)(?:=(.*))?/ + hdr << "#define #$1 #{$2 ? Shellwords.shellwords($2)[0] : 1}\n" + when /^-U(.*)/ + hdr << "#undef #$1\n" + end + end + hdr << "#endif\n" + hdr = hdr.join + unless (IO.read(header) == hdr rescue false) + open(header, "w") do |hfile| + hfile.write(hdr) + end + end + $extconf_h = header +end + +# Sets a +target+ name that the user can then use to configure various 'with' +# options with on the command line by using that name. For example, if the +# target is set to "foo", then the user could use the --with-foo-dir command +# line option. +# +# You may pass along additional 'include' or 'lib' defaults via the +idefault+ +# and +ldefault+ parameters, respectively. +# +# Note that dir_config only adds to the list of places to search for libraries +# and include files. It does not link the libraries into your application. +# +def dir_config(target, idefault=nil, ldefault=nil) + if dir = with_config(target + "-dir", (idefault unless ldefault)) + defaults = Array === dir ? dir : dir.split(File::PATH_SEPARATOR) + idefault = ldefault = nil + end + + idir = with_config(target + "-include", idefault) + $arg_config.last[1] ||= "${#{target}-dir}/include" + ldir = with_config(target + "-lib", ldefault) + $arg_config.last[1] ||= "${#{target}-dir}/lib" + + idirs = idir ? Array === idir ? idir : idir.split(File::PATH_SEPARATOR) : [] + if defaults + idirs.concat(defaults.collect {|d| d + "/include"}) + idir = ([idir] + idirs).compact.join(File::PATH_SEPARATOR) + end + unless idirs.empty? + idirs.collect! {|d| "-I" + d} + idirs -= Shellwords.shellwords($CPPFLAGS) + unless idirs.empty? + $CPPFLAGS = (idirs.quote << $CPPFLAGS).join(" ") + end + end + + ldirs = ldir ? Array === ldir ? ldir : ldir.split(File::PATH_SEPARATOR) : [] + if defaults + ldirs.concat(defaults.collect {|d| d + "/lib"}) + ldir = ([ldir] + ldirs).compact.join(File::PATH_SEPARATOR) + end + $LIBPATH = ldirs | $LIBPATH + + [idir, ldir] +end + +# :stopdoc: + +# Handles meta information about installed libraries. Uses your platform's +# pkg-config program if it has one. +def pkg_config(pkg) + if pkgconfig = with_config("#{pkg}-config") and find_executable0(pkgconfig) + # iff package specific config command is given + get = proc {|opt| `#{pkgconfig} --#{opt}`.chomp} + elsif ($PKGCONFIG ||= + (pkgconfig = with_config("pkg-config", ("pkg-config" unless CROSS_COMPILING))) && + find_executable0(pkgconfig) && pkgconfig) and + system("#{$PKGCONFIG} --exists #{pkg}") + # default to pkg-config command + get = proc {|opt| `#{$PKGCONFIG} --#{opt} #{pkg}`.chomp} + elsif find_executable0(pkgconfig = "#{pkg}-config") + # default to package specific config command, as a last resort. + get = proc {|opt| `#{pkgconfig} --#{opt}`.chomp} + end + if get + cflags = get['cflags'] + ldflags = get['libs'] + libs = get['libs-only-l'] + ldflags = (Shellwords.shellwords(ldflags) - Shellwords.shellwords(libs)).quote.join(" ") + $CFLAGS += " " << cflags + $LDFLAGS += " " << ldflags + $libs += " " << libs + Logging::message "package configuration for %s\n", pkg + Logging::message "cflags: %s\nldflags: %s\nlibs: %s\n\n", + cflags, ldflags, libs + [cflags, ldflags, libs] + else + Logging::message "package configuration for %s is not found\n", pkg + nil + end +end + +def with_destdir(dir) + dir = dir.sub($dest_prefix_pattern, '') + /\A\$[\(\{]/ =~ dir ? dir : "$(DESTDIR)"+dir +end + +# Converts forward slashes to backslashes. Aimed at MS Windows. +# +# Internal use only. +# +def winsep(s) + s.tr('/', '\\') +end + +# Converts native path to format acceptable in Makefile +# +# Internal use only. +# +if !CROSS_COMPILING + case CONFIG['build_os'] + when 'mingw32' + def mkintpath(path) + # mingw uses make from msys and it needs special care + # converts from C:\some\path to /C/some/path + path = path.dup + path.tr!('\\', '/') + path.sub!(/\A([A-Za-z]):(?=\/)/, '/\1') + path + end + end +end +unless defined?(mkintpath) + def mkintpath(path) + path + end +end + +def configuration(srcdir) + mk = [] + vpath = $VPATH.dup + if !CROSS_COMPILING + case CONFIG['build_os'] + when 'cygwin' + if CONFIG['target_os'] != 'cygwin' + vpath = vpath.map {|p| p.sub(/.*/, '$(shell cygpath -u \&)')} + end + end + end + CONFIG["hdrdir"] ||= $hdrdir + mk << %{ +SHELL = /bin/sh + +#### Start of system configuration section. #### +#{"top_srcdir = " + $top_srcdir.sub(%r"\A#{Regexp.quote($topdir)}/", "$(topdir)/") if $extmk} +srcdir = #{srcdir.gsub(/\$\((srcdir)\)|\$\{(srcdir)\}/) {mkintpath(CONFIG[$1||$2])}.quote} +topdir = #{mkintpath($extmk ? CONFIG["topdir"] : $topdir).quote} +hdrdir = #{mkintpath(CONFIG["hdrdir"]).quote} +arch_hdrdir = #{$arch_hdrdir} +VPATH = #{vpath.join(CONFIG['PATH_SEPARATOR'])} +} + if $extmk + mk << "RUBYLIB = -\nRUBYOPT = -r$(top_srcdir)/ext/purelib.rb\n" + end + if destdir = CONFIG["prefix"][$dest_prefix_pattern, 1] + mk << "\nDESTDIR = #{destdir}\n" + end + CONFIG.each do |key, var| + next unless /prefix$/ =~ key + mk << "#{key} = #{with_destdir(var)}\n" + end + CONFIG.each do |key, var| + next if /^abs_/ =~ key + next if /^(?:src|top|hdr)dir$/ =~ key + next unless /dir$/ =~ key + mk << "#{key} = #{with_destdir(var)}\n" + end + if !$extmk and !$configure_args.has_key?('--ruby') and + sep = config_string('BUILD_FILE_SEPARATOR') + sep = ":/=#{sep}" + else + sep = "" + end + possible_command = (proc {|s| s if /top_srcdir/ !~ s} unless $extmk) + extconf_h = $extconf_h ? "-DRUBY_EXTCONF_H=\\\"$(RUBY_EXTCONF_H)\\\" " : $defs.join(" ") << " " + mk << %{ +CC = #{CONFIG['CC']} +CXX = #{CONFIG['CXX']} +LIBRUBY = #{CONFIG['LIBRUBY']} +LIBRUBY_A = #{CONFIG['LIBRUBY_A']} +LIBRUBYARG_SHARED = #$LIBRUBYARG_SHARED +LIBRUBYARG_STATIC = #$LIBRUBYARG_STATIC +OUTFLAG = #{OUTFLAG} +COUTFLAG = #{COUTFLAG} + +RUBY_EXTCONF_H = #{$extconf_h} +cflags = #{CONFIG['cflags']} +optflags = #{CONFIG['optflags']} +debugflags = #{CONFIG['debugflags']} +warnflags = #{CONFIG['warnflags']} +CFLAGS = #{$static ? '' : CONFIG['CCDLFLAGS']} #$CFLAGS #$ARCH_FLAG +INCFLAGS = -I. #$INCFLAGS +DEFS = #{CONFIG['DEFS']} +CPPFLAGS = #{extconf_h}#{$CPPFLAGS} +CXXFLAGS = $(CFLAGS) #{CONFIG['CXXFLAGS']} +ldflags = #{$LDFLAGS} +dldflags = #{$DLDFLAGS} +archflag = #{$ARCH_FLAG} +DLDFLAGS = $(ldflags) $(dldflags) $(archflag) +LDSHARED = #{CONFIG['LDSHARED']} +LDSHAREDXX = #{config_string('LDSHAREDXX') || '$(LDSHARED)'} +AR = #{CONFIG['AR']} +EXEEXT = #{CONFIG['EXEEXT']} + +RUBY_INSTALL_NAME = #{CONFIG['RUBY_INSTALL_NAME']} +RUBY_SO_NAME = #{CONFIG['RUBY_SO_NAME']} +arch = #{CONFIG['arch']} +sitearch = #{CONFIG['sitearch']} +ruby_version = #{RbConfig::CONFIG['ruby_version']} +ruby = #{$ruby} +RUBY = $(ruby#{sep}) +RM = #{config_string('RM', &possible_command) || '$(RUBY) -run -e rm -- -f'} +RM_RF = #{'$(RUBY) -run -e rm -- -rf'} +RMDIRS = #{config_string('RMDIRS', &possible_command) || '$(RUBY) -run -e rmdir -- -p'} +MAKEDIRS = #{config_string('MAKEDIRS', &possible_command) || '@$(RUBY) -run -e mkdir -- -p'} +INSTALL = #{config_string('INSTALL', &possible_command) || '@$(RUBY) -run -e install -- -vp'} +INSTALL_PROG = #{config_string('INSTALL_PROG') || '$(INSTALL) -m 0755'} +INSTALL_DATA = #{config_string('INSTALL_DATA') || '$(INSTALL) -m 0644'} +COPY = #{config_string('CP', &possible_command) || '@$(RUBY) -run -e cp -- -v'} + +#### End of system configuration section. #### + +preload = #{defined?($preload) && $preload ? $preload.join(' ') : ''} +} + if $nmake == ?b + mk.each do |x| + x.gsub!(/^(MAKEDIRS|INSTALL_(?:PROG|DATA))+\s*=.*\n/) do + "!ifndef " + $1 + "\n" + + $& + + "!endif\n" + end + end + end + mk +end +# :startdoc: + +def dummy_makefile(srcdir) + configuration(srcdir) << <>$(INSTALLED_LIST)\n" + end + end + else + mfile.puts "Makefile" + end + mfile.print("install-rb: pre-install-rb install-rb-default\n") + mfile.print("install-rb-default: pre-install-rb-default\n") + mfile.print("pre-install-rb: Makefile\n") + mfile.print("pre-install-rb-default: Makefile\n") + for sfx, i in [["-default", [["lib/**/*.rb", "$(RUBYLIBDIR)", "lib"]]], ["", $INSTALLFILES]] + files = install_files(mfile, i, nil, srcprefix) or next + for dir, *files in files + unless dirs.include?(dir) + dirs << dir + mfile.print "pre-install-rb#{sfx}: #{dir}\n" + end + for f in files + dest = "#{dir}/#{File.basename(f)}" + mfile.print("install-rb#{sfx}: #{dest}\n") + mfile.print("#{dest}: #{f}\n") + mfile.print("\t$(#{$extout ? 'COPY' : 'INSTALL_DATA'}) ") + mfile.print("#{fseprepl[f]} $(@D#{sep})\n") + if defined?($installed_list) and !$extout + mfile.print("\t@echo #{dest}>>$(INSTALLED_LIST)\n") + end + if $extout + mfile.print("clean-rb#{sfx}::\n") + mfile.print("\t@-$(RM) #{fseprepl[dest]}\n") + end + end + end + if $extout + dirs.uniq! + dirs.reverse! + unless dirs.empty? + mfile.print("clean-rb#{sfx}::\n") + for dir in dirs + mfile.print("\t@-$(RMDIRS) #{fseprepl[dir]}\n") + end + end + end + end + dirs.unshift(sodir) if target and !dirs.include?(sodir) + dirs.each {|d| mfile.print "#{d}:\n\t$(MAKEDIRS) $@\n"} + + mfile.print <<-SITEINSTALL + +site-install: site-install-so site-install-rb +site-install-so: install-so +site-install-rb: install-rb + + SITEINSTALL + + return unless target + + mfile.puts SRC_EXT.collect {|ext| ".path.#{ext} = $(VPATH)"} if $nmake == ?b + mfile.print ".SUFFIXES: .#{SRC_EXT.join(' .')} .#{$OBJEXT}\n" + mfile.print "\n" + + CXX_EXT.each do |ext| + COMPILE_RULES.each do |rule| + mfile.printf(rule, ext, $OBJEXT) + mfile.printf("\n\t%s\n\n", COMPILE_CXX) + end + end + %w[c].each do |ext| + COMPILE_RULES.each do |rule| + mfile.printf(rule, ext, $OBJEXT) + mfile.printf("\n\t%s\n\n", COMPILE_C) + end + end + + mfile.print "$(RUBYARCHDIR)/" if $extout + mfile.print "$(DLLIB): " + mfile.print "$(DEFFILE) " if makedef + mfile.print "$(OBJS) Makefile\n" + mfile.print "\t@-$(RM) $(@#{sep})\n" + mfile.print "\t@-$(MAKEDIRS) $(@D)\n" if $extout + link_so = LINK_SO.gsub(/^/, "\t") + if srcs.any?(&%r"\.(?:#{CXX_EXT.join('|')})\z".method(:===)) + link_so = link_so.sub(/\bLDSHARED\b/, '\&XX') + end + mfile.print link_so, "\n\n" + unless $static.nil? + mfile.print "$(STATIC_LIB): $(OBJS)\n\t@-$(RM) $(@#{sep})\n\t" + mfile.print "$(AR) #{config_string('ARFLAGS') || 'cru '}$@ $(OBJS)" + config_string('RANLIB') do |ranlib| + mfile.print "\n\t@-#{ranlib} $(DLLIB) 2> /dev/null || true" + end + end + mfile.print "\n\n" + if makedef + mfile.print "$(DEFFILE): #{origdef}\n" + mfile.print "\t$(RUBY) #{makedef} #{origdef} > $@\n\n" + end + + depend = File.join(srcdir, "depend") + if File.exist?(depend) + mfile.print("###\n", *depend_rules(File.read(depend))) + else + headers = %w[$(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h] + if RULE_SUBST + headers.each {|h| h.sub!(/.*/, &RULE_SUBST.method(:%))} + end + headers << $config_h + headers << '$(RUBY_EXTCONF_H)' if $extconf_h + mfile.print "$(OBJS): ", headers.join(' '), "\n" + end + + $makefile_created = true +ensure + mfile.close if mfile +end + +# :stopdoc: + +def init_mkmf(config = CONFIG) + $makefile_created = false + $arg_config = [] + $enable_shared = config['ENABLE_SHARED'] == 'yes' + $defs = [] + $extconf_h = nil + $CFLAGS = with_config("cflags", arg_config("CFLAGS", config["CFLAGS"])).dup + $ARCH_FLAG = with_config("arch_flag", arg_config("ARCH_FLAG", config["ARCH_FLAG"])).dup + $CPPFLAGS = with_config("cppflags", arg_config("CPPFLAGS", config["CPPFLAGS"])).dup + $LDFLAGS = with_config("ldflags", arg_config("LDFLAGS", config["LDFLAGS"])).dup + $INCFLAGS = "-I$(arch_hdrdir)" + $INCFLAGS << " -I$(hdrdir)/ruby/backward" unless $extmk + $INCFLAGS << " -I$(hdrdir) -I$(srcdir)" + $DLDFLAGS = with_config("dldflags", arg_config("DLDFLAGS", config["DLDFLAGS"])).dup + $LIBEXT = config['LIBEXT'].dup + $OBJEXT = config["OBJEXT"].dup + $LIBS = "#{config['LIBS']} #{config['DLDLIBS']}" + $LIBRUBYARG = "" + $LIBRUBYARG_STATIC = config['LIBRUBYARG_STATIC'] + $LIBRUBYARG_SHARED = config['LIBRUBYARG_SHARED'] + $DEFLIBPATH = $extmk ? ["$(topdir)"] : CROSS_COMPILING ? [] : ["$(libdir)"] + $DEFLIBPATH.unshift(".") + $LIBPATH = [] + $INSTALLFILES = [] + $NONINSTALLFILES = [/~\z/, /\A#.*#\z/, /\A\.#/, /\.bak\z/i, /\.orig\z/, /\.rej\z/, /\.l[ao]\z/, /\.o\z/] + $VPATH = %w[$(srcdir) $(arch_hdrdir)/ruby $(hdrdir)/ruby] + + $objs = nil + $srcs = nil + $libs = "" + if $enable_shared or RbConfig.expand(config["LIBRUBY"].dup) != RbConfig.expand(config["LIBRUBY_A"].dup) + $LIBRUBYARG = config['LIBRUBYARG'] + end + + $LOCAL_LIBS = "" + + $cleanfiles = config_string('CLEANFILES') {|s| Shellwords.shellwords(s)} || [] + $cleanfiles << "mkmf.log" + $distcleanfiles = config_string('DISTCLEANFILES') {|s| Shellwords.shellwords(s)} || [] + $distcleandirs = config_string('DISTCLEANDIRS') {|s| Shellwords.shellwords(s)} || [] + + $extout ||= nil + $extout_prefix ||= nil + + $arg_config.clear + dir_config("opt") +end + +FailedMessage = <"} +end +COMMON_HEADERS = hdr.join("\n") +COMMON_LIBS = config_string('COMMON_LIBS', &split) || [] + +COMPILE_RULES = config_string('COMPILE_RULES', &split) || %w[.%s.%s:] +RULE_SUBST = config_string('RULE_SUBST') +COMPILE_C = config_string('COMPILE_C') || '$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<' +COMPILE_CXX = config_string('COMPILE_CXX') || '$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<' +TRY_LINK = config_string('TRY_LINK') || + "$(CC) #{OUTFLAG}conftest $(INCFLAGS) $(CPPFLAGS) " \ + "$(CFLAGS) $(src) $(LIBPATH) $(LDFLAGS) $(ARCH_FLAG) $(LOCAL_LIBS) $(LIBS)" +LINK_SO = config_string('LINK_SO') || + if CONFIG["DLEXT"] == $OBJEXT + "ld $(DLDFLAGS) -r -o $@ $(OBJS)\n" + else + "$(LDSHARED) #{OUTFLAG}$@ $(OBJS) " \ + "$(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)" + end +LIBPATHFLAG = config_string('LIBPATHFLAG') || ' -L"%s"' +RPATHFLAG = config_string('RPATHFLAG') || '' +LIBARG = config_string('LIBARG') || '-l%s' +MAIN_DOES_NOTHING = config_string('MAIN_DOES_NOTHING') || 'int main() {return 0;}' + +sep = config_string('BUILD_FILE_SEPARATOR') {|s| ":/=#{s}" if sep != "/"} || "" +CLEANINGS = " +clean-rb-default:: +clean-rb:: +clean-so:: +clean: clean-so clean-rb-default clean-rb +\t\t@-$(RM) $(CLEANLIBS#{sep}) $(CLEANOBJS#{sep}) $(CLEANFILES#{sep}) + +distclean-rb-default:: +distclean-rb:: +distclean-so:: +distclean: clean distclean-so distclean-rb-default distclean-rb +\t\t@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log +\t\t@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES#{sep}) +\t\t@-$(RMDIRS) $(DISTCLEANDIRS#{sep}) + +realclean: distclean +" + +if not $extmk and /\A(extconf|makefile).rb\z/ =~ File.basename($0) + END {mkmf_failed($0)} +end diff --git a/lib/monitor.rb b/lib/monitor.rb new file mode 100644 index 0000000..2f2bb16 --- /dev/null +++ b/lib/monitor.rb @@ -0,0 +1,265 @@ +=begin + += monitor.rb + +Copyright (C) 2001 Shugo Maeda + +This library is distributed under the terms of the Ruby license. +You can freely distribute/modify this library. + +== example + +This is a simple example. + + require 'monitor.rb' + + buf = [] + buf.extend(MonitorMixin) + empty_cond = buf.new_cond + + # consumer + Thread.start do + loop do + buf.synchronize do + empty_cond.wait_while { buf.empty? } + print buf.shift + end + end + end + + # producer + while line = ARGF.gets + buf.synchronize do + buf.push(line) + empty_cond.signal + end + end + +The consumer thread waits for the producer thread to push a line +to buf while buf.empty?, and the producer thread (main thread) +reads a line from ARGF and push it to buf, then call +empty_cond.signal. + +=end + +require 'thread' + +# +# Adds monitor functionality to an arbitrary object by mixing the module with +# +include+. For example: +# +# require 'monitor' +# +# buf = [] +# buf.extend(MonitorMixin) +# empty_cond = buf.new_cond +# +# # consumer +# Thread.start do +# loop do +# buf.synchronize do +# empty_cond.wait_while { buf.empty? } +# print buf.shift +# end +# end +# end +# +# # producer +# while line = ARGF.gets +# buf.synchronize do +# buf.push(line) +# empty_cond.signal +# end +# end +# +# The consumer thread waits for the producer thread to push a line +# to buf while buf.empty?, and the producer thread (main thread) +# reads a line from ARGF and push it to buf, then call +# empty_cond.signal. +# +module MonitorMixin + # + # FIXME: This isn't documented in Nutshell. + # + # Since MonitorMixin.new_cond returns a ConditionVariable, and the example + # above calls while_wait and signal, this class should be documented. + # + class ConditionVariable + class Timeout < Exception; end + + def wait(timeout = nil) + if timeout + raise NotImplementedError, "timeout is not implemented yet" + end + @monitor.__send__(:mon_check_owner) + count = @monitor.__send__(:mon_exit_for_cond) + begin + @cond.wait(@monitor.instance_variable_get("@mon_mutex")) + return true + ensure + @monitor.__send__(:mon_enter_for_cond, count) + end + end + + def wait_while + while yield + wait + end + end + + def wait_until + until yield + wait + end + end + + def signal + @monitor.__send__(:mon_check_owner) + @cond.signal + end + + def broadcast + @monitor.__send__(:mon_check_owner) + @cond.broadcast + end + + def count_waiters + raise NotImplementedError + end + + private + + def initialize(monitor) + @monitor = monitor + @cond = ::ConditionVariable.new + end + end + + def self.extend_object(obj) + super(obj) + obj.__send__(:mon_initialize) + end + + # + # Attempts to enter exclusive section. Returns +false+ if lock fails. + # + def mon_try_enter + if @mon_owner != Thread.current + unless @mon_mutex.try_lock + return false + end + @mon_owner = Thread.current + end + @mon_count += 1 + return true + end + # For backward compatibility + alias try_mon_enter mon_try_enter + + # + # Enters exclusive section. + # + def mon_enter + if @mon_owner != Thread.current + @mon_mutex.lock + @mon_owner = Thread.current + end + @mon_count += 1 + end + + # + # Leaves exclusive section. + # + def mon_exit + mon_check_owner + @mon_count -=1 + if @mon_count == 0 + @mon_owner = nil + @mon_mutex.unlock + end + end + + # + # Enters exclusive section and executes the block. Leaves the exclusive + # section automatically when the block exits. See example under + # +MonitorMixin+. + # + def mon_synchronize + mon_enter + begin + yield + ensure + mon_exit + end + end + alias synchronize mon_synchronize + + # + # FIXME: This isn't documented in Nutshell. + # + def new_cond + return ConditionVariable.new(self) + end + + private + + def initialize(*args) + super + mon_initialize + end + + def mon_initialize + @mon_owner = nil + @mon_count = 0 + @mon_mutex = Mutex.new + end + + def mon_check_owner + if @mon_owner != Thread.current + raise ThreadError, "current thread not owner" + end + end + + def mon_enter_for_cond(count) + @mon_owner = Thread.current + @mon_count = count + end + + def mon_exit_for_cond + count = @mon_count + @mon_owner = nil + @mon_count = 0 + return count + end +end + +class Monitor + include MonitorMixin + alias try_enter try_mon_enter + alias enter mon_enter + alias exit mon_exit +end + + +# Documentation comments: +# - All documentation comes from Nutshell. +# - MonitorMixin.new_cond appears in the example, but is not documented in +# Nutshell. +# - All the internals (internal modules Accessible and Initializable, class +# ConditionVariable) appear in RDoc. It might be good to hide them, by +# making them private, or marking them :nodoc:, etc. +# - The entire example from the RD section at the top is replicated in the RDoc +# comment for MonitorMixin. Does the RD section need to remain? +# - RDoc doesn't recognise aliases, so we have mon_synchronize documented, but +# not synchronize. +# - mon_owner is in Nutshell, but appears as an accessor in a separate module +# here, so is hard/impossible to RDoc. Some other useful accessors +# (mon_count and some queue stuff) are also in this module, and don't appear +# directly in the RDoc output. +# - in short, it may be worth changing the code layout in this file to make the +# documentation easier + +# Local variables: +# mode: Ruby +# tab-width: 8 +# End: diff --git a/lib/mutex_m.rb b/lib/mutex_m.rb new file mode 100644 index 0000000..f46f866 --- /dev/null +++ b/lib/mutex_m.rb @@ -0,0 +1,91 @@ +# +# mutex_m.rb - +# $Release Version: 3.0$ +# $Revision: 1.7 $ +# Original from mutex.rb +# by Keiju ISHITSUKA(keiju@ishitsuka.com) +# modified by matz +# patched by akira yamada +# +# -- +# Usage: +# require "mutex_m.rb" +# obj = Object.new +# obj.extend Mutex_m +# ... +# extended object can be handled like Mutex +# or +# class Foo +# include Mutex_m +# ... +# end +# obj = Foo.new +# this obj can be handled like Mutex +# + +require 'thread' + +module Mutex_m + def Mutex_m.define_aliases(cl) + cl.module_eval %q{ + alias locked? mu_locked? + alias lock mu_lock + alias unlock mu_unlock + alias try_lock mu_try_lock + alias synchronize mu_synchronize + } + end + + def Mutex_m.append_features(cl) + super + define_aliases(cl) unless cl.instance_of?(Module) + end + + def Mutex_m.extend_object(obj) + super + obj.mu_extended + end + + def mu_extended + unless (defined? locked? and + defined? lock and + defined? unlock and + defined? try_lock and + defined? synchronize) + Mutex_m.define_aliases(class<. +# +# Documentation by Gavin Sinclair, sourced from "Programming Ruby" (Hunt/Thomas) +# and "Ruby In a Nutshell" (Matsumoto), used with permission. +# +# This library is distributed under the terms of the Ruby license. +# You can freely distribute/modify this library. +# +# It is included in the Ruby standard library. +# +# See the Net::FTP class for an overview. +# + +require "socket" +require "monitor" + +module Net + + # :stopdoc: + class FTPError < StandardError; end + class FTPReplyError < FTPError; end + class FTPTempError < FTPError; end + class FTPPermError < FTPError; end + class FTPProtoError < FTPError; end + # :startdoc: + + # + # This class implements the File Transfer Protocol. If you have used a + # command-line FTP program, and are familiar with the commands, you will be + # able to use this class easily. Some extra features are included to take + # advantage of Ruby's style and strengths. + # + # == Example + # + # require 'net/ftp' + # + # === Example 1 + # + # ftp = Net::FTP.new('ftp.netlab.co.jp') + # ftp.login + # files = ftp.chdir('pub/lang/ruby/contrib') + # files = ftp.list('n*') + # ftp.getbinaryfile('nif.rb-0.91.gz', 'nif.gz', 1024) + # ftp.close + # + # === Example 2 + # + # Net::FTP.open('ftp.netlab.co.jp') do |ftp| + # ftp.login + # files = ftp.chdir('pub/lang/ruby/contrib') + # files = ftp.list('n*') + # ftp.getbinaryfile('nif.rb-0.91.gz', 'nif.gz', 1024) + # end + # + # == Major Methods + # + # The following are the methods most likely to be useful to users: + # - FTP.open + # - #getbinaryfile + # - #gettextfile + # - #putbinaryfile + # - #puttextfile + # - #chdir + # - #nlst + # - #size + # - #rename + # - #delete + # + class FTP + include MonitorMixin + + # :stopdoc: + FTP_PORT = 21 + CRLF = "\r\n" + DEFAULT_BLOCKSIZE = 4096 + # :startdoc: + + # When +true+, transfers are performed in binary mode. Default: +true+. + attr_reader :binary + + # When +true+, the connection is in passive mode. Default: +false+. + attr_accessor :passive + + # When +true+, all traffic to and from the server is written + # to +$stdout+. Default: +false+. + attr_accessor :debug_mode + + # Sets or retrieves the +resume+ status, which decides whether incomplete + # transfers are resumed or restarted. Default: +false+. + attr_accessor :resume + + # The server's welcome message. + attr_reader :welcome + + # The server's last response code. + attr_reader :last_response_code + alias lastresp last_response_code + + # The server's last response. + attr_reader :last_response + + # + # A synonym for FTP.new, but with a mandatory host parameter. + # + # If a block is given, it is passed the +FTP+ object, which will be closed + # when the block finishes, or when an exception is raised. + # + def FTP.open(host, user = nil, passwd = nil, acct = nil) + if block_given? + ftp = new(host, user, passwd, acct) + begin + yield ftp + ensure + ftp.close + end + else + new(host, user, passwd, acct) + end + end + + # + # Creates and returns a new +FTP+ object. If a +host+ is given, a connection + # is made. Additionally, if the +user+ is given, the given user name, + # password, and (optionally) account are used to log in. See #login. + # + def initialize(host = nil, user = nil, passwd = nil, acct = nil) + super() + @binary = false + @passive = false + @debug_mode = false + @resume = false + if host + connect(host) + if user + login(user, passwd, acct) + end + end + end + + def binary=(newmode) + if newmode != @binary + @binary = newmode + @binary ? voidcmd("TYPE I") : voidcmd("TYPE A") + end + end + + def with_binary(newmode) + oldmode = binary + self.binary = newmode + begin + yield + ensure + self.binary = oldmode + end + end + private :with_binary + + # Obsolete + def return_code + $stderr.puts("warning: Net::FTP#return_code is obsolete and do nothing") + return "\n" + end + + # Obsolete + def return_code=(s) + $stderr.puts("warning: Net::FTP#return_code= is obsolete and do nothing") + end + + def open_socket(host, port) + if defined? SOCKSSocket and ENV["SOCKS_SERVER"] + @passive = true + return SOCKSSocket.open(host, port) + else + return TCPSocket.open(host, port) + end + end + private :open_socket + + # + # Establishes an FTP connection to host, optionally overriding the default + # port. If the environment variable +SOCKS_SERVER+ is set, sets up the + # connection through a SOCKS proxy. Raises an exception (typically + # Errno::ECONNREFUSED) if the connection cannot be established. + # + def connect(host, port = FTP_PORT) + if @debug_mode + print "connect: ", host, ", ", port, "\n" + end + synchronize do + @sock = open_socket(host, port) + voidresp + end + end + + # + # WRITEME or make private + # + def set_socket(sock, get_greeting = true) + synchronize do + @sock = sock + if get_greeting + voidresp + end + end + end + + def sanitize(s) + if s =~ /^PASS /i + return s[0, 5] + "*" * (s.length - 5) + else + return s + end + end + private :sanitize + + def putline(line) + if @debug_mode + print "put: ", sanitize(line), "\n" + end + line = line + CRLF + @sock.write(line) + end + private :putline + + def getline + line = @sock.readline # if get EOF, raise EOFError + line.sub!(/(\r\n|\n|\r)\z/n, "") + if @debug_mode + print "get: ", sanitize(line), "\n" + end + return line + end + private :getline + + def getmultiline + line = getline + buff = line + if line[3] == ?- + code = line[0, 3] + begin + line = getline + buff << "\n" << line + end until line[0, 3] == code and line[3] != ?- + end + return buff << "\n" + end + private :getmultiline + + def getresp + @last_response = getmultiline + @last_response_code = @last_response[0, 3] + case @last_response_code + when /\A[123]/ + return @last_response + when /\A4/ + raise FTPTempError, @last_response + when /\A5/ + raise FTPPermError, @last_response + else + raise FTPProtoError, @last_response + end + end + private :getresp + + def voidresp + resp = getresp + if resp[0] != ?2 + raise FTPReplyError, resp + end + end + private :voidresp + + # + # Sends a command and returns the response. + # + def sendcmd(cmd) + synchronize do + putline(cmd) + return getresp + end + end + + # + # Sends a command and expect a response beginning with '2'. + # + def voidcmd(cmd) + synchronize do + putline(cmd) + voidresp + end + end + + def sendport(host, port) + af = (@sock.peeraddr)[0] + if af == "AF_INET" + cmd = "PORT " + (host.split(".") + port.divmod(256)).join(",") + elsif af == "AF_INET6" + cmd = sprintf("EPRT |2|%s|%d|", host, port) + else + raise FTPProtoError, host + end + voidcmd(cmd) + end + private :sendport + + def makeport + sock = TCPServer.open(@sock.addr[3], 0) + port = sock.addr[1] + host = sock.addr[3] + resp = sendport(host, port) + return sock + end + private :makeport + + def makepasv + if @sock.peeraddr[0] == "AF_INET" + host, port = parse227(sendcmd("PASV")) + else + host, port = parse229(sendcmd("EPSV")) + # host, port = parse228(sendcmd("LPSV")) + end + return host, port + end + private :makepasv + + def transfercmd(cmd, rest_offset = nil) + if @passive + host, port = makepasv + conn = open_socket(host, port) + if @resume and rest_offset + resp = sendcmd("REST " + rest_offset.to_s) + if resp[0] != ?3 + raise FTPReplyError, resp + end + end + resp = sendcmd(cmd) + # skip 2XX for some ftp servers + resp = getresp if resp[0] == ?2 + if resp[0] != ?1 + raise FTPReplyError, resp + end + else + sock = makeport + if @resume and rest_offset + resp = sendcmd("REST " + rest_offset.to_s) + if resp[0] != ?3 + raise FTPReplyError, resp + end + end + resp = sendcmd(cmd) + # skip 2XX for some ftp servers + resp = getresp if resp[0] == ?2 + if resp[0] != ?1 + raise FTPReplyError, resp + end + conn = sock.accept + sock.close + end + return conn + end + private :transfercmd + + def getaddress + thishost = Socket.gethostname + if not thishost.index(".") + thishost = Socket.gethostbyname(thishost)[0] + end + if ENV.has_key?("LOGNAME") + realuser = ENV["LOGNAME"] + elsif ENV.has_key?("USER") + realuser = ENV["USER"] + else + realuser = "anonymous" + end + return realuser + "@" + thishost + end + private :getaddress + + # + # Logs in to the remote host. The session must have been previously + # connected. If +user+ is the string "anonymous" and the +password+ is + # +nil+, a password of user@host is synthesized. If the +acct+ + # parameter is not +nil+, an FTP ACCT command is sent following the + # successful login. Raises an exception on error (typically + # Net::FTPPermError). + # + def login(user = "anonymous", passwd = nil, acct = nil) + if user == "anonymous" and passwd == nil + passwd = getaddress + end + + resp = "" + synchronize do + resp = sendcmd('USER ' + user) + if resp[0] == ?3 + raise FTPReplyError, resp if passwd.nil? + resp = sendcmd('PASS ' + passwd) + end + if resp[0] == ?3 + raise FTPReplyError, resp if acct.nil? + resp = sendcmd('ACCT ' + acct) + end + end + if resp[0] != ?2 + raise FTPReplyError, resp + end + @welcome = resp + self.binary = true + end + + # + # Puts the connection into binary (image) mode, issues the given command, + # and fetches the data returned, passing it to the associated block in + # chunks of +blocksize+ characters. Note that +cmd+ is a server command + # (such as "RETR myfile"). + # + def retrbinary(cmd, blocksize, rest_offset = nil) # :yield: data + synchronize do + with_binary(true) do + conn = transfercmd(cmd, rest_offset) + loop do + data = conn.read(blocksize) + break if data == nil + yield(data) + end + conn.close + voidresp + end + end + end + + # + # Puts the connection into ASCII (text) mode, issues the given command, and + # passes the resulting data, one line at a time, to the associated block. If + # no block is given, prints the lines. Note that +cmd+ is a server command + # (such as "RETR myfile"). + # + def retrlines(cmd) # :yield: line + synchronize do + with_binary(false) do + conn = transfercmd(cmd) + loop do + line = conn.gets + break if line == nil + if line[-2, 2] == CRLF + line = line[0 .. -3] + elsif line[-1] == ?\n + line = line[0 .. -2] + end + yield(line) + end + conn.close + voidresp + end + end + end + + # + # Puts the connection into binary (image) mode, issues the given server-side + # command (such as "STOR myfile"), and sends the contents of the file named + # +file+ to the server. If the optional block is given, it also passes it + # the data, in chunks of +blocksize+ characters. + # + def storbinary(cmd, file, blocksize, rest_offset = nil, &block) # :yield: data + if rest_offset + file.seek(rest_offset, IO::SEEK_SET) + end + synchronize do + with_binary(true) do + conn = transfercmd(cmd, rest_offset) + loop do + buf = file.read(blocksize) + break if buf == nil + conn.write(buf) + yield(buf) if block + end + conn.close + voidresp + end + end + rescue Errno::EPIPE + # EPIPE, in this case, means that the data connection was unexpectedly + # terminated. Rather than just raising EPIPE to the caller, check the + # response on the control connection. If getresp doesn't raise a more + # appropriate exception, re-raise the original exception. + getresp + raise + end + + # + # Puts the connection into ASCII (text) mode, issues the given server-side + # command (such as "STOR myfile"), and sends the contents of the file + # named +file+ to the server, one line at a time. If the optional block is + # given, it also passes it the lines. + # + def storlines(cmd, file, &block) # :yield: line + synchronize do + with_binary(false) do + conn = transfercmd(cmd) + loop do + buf = file.gets + break if buf == nil + if buf[-2, 2] != CRLF + buf = buf.chomp + CRLF + end + conn.write(buf) + yield(buf) if block + end + conn.close + voidresp + end + end + rescue Errno::EPIPE + # EPIPE, in this case, means that the data connection was unexpectedly + # terminated. Rather than just raising EPIPE to the caller, check the + # response on the control connection. If getresp doesn't raise a more + # appropriate exception, re-raise the original exception. + getresp + raise + end + + # + # Retrieves +remotefile+ in binary mode, storing the result in +localfile+. + # If +localfile+ is nil, returns retrieved data. + # If a block is supplied, it is passed the retrieved data in +blocksize+ + # chunks. + # + def getbinaryfile(remotefile, localfile = File.basename(remotefile), + blocksize = DEFAULT_BLOCKSIZE) # :yield: data + result = nil + if localfile + if @resume + rest_offset = File.size?(localfile) + f = open(localfile, "a") + else + rest_offset = nil + f = open(localfile, "w") + end + elsif !block_given? + result = "" + end + begin + f.binmode if localfile + retrbinary("RETR " + remotefile, blocksize, rest_offset) do |data| + f.write(data) if localfile + yield(data) if block_given? + result.concat(data) if result + end + return result + ensure + f.close if localfile + end + end + + # + # Retrieves +remotefile+ in ASCII (text) mode, storing the result in + # +localfile+. + # If +localfile+ is nil, returns retrieved data. + # If a block is supplied, it is passed the retrieved data one + # line at a time. + # + def gettextfile(remotefile, localfile = File.basename(remotefile)) # :yield: line + result = nil + if localfile + f = open(localfile, "w") + elsif !block_given? + result = "" + end + begin + retrlines("RETR " + remotefile) do |line| + f.puts(line) if localfile + yield(line) if block_given? + result.concat(line + "\n") if result + end + return result + ensure + f.close if localfile + end + end + + # + # Retrieves +remotefile+ in whatever mode the session is set (text or + # binary). See #gettextfile and #getbinaryfile. + # + def get(remotefile, localfile = File.basename(remotefile), + blocksize = DEFAULT_BLOCKSIZE, &block) # :yield: data + if @binary + getbinaryfile(remotefile, localfile, blocksize, &block) + else + gettextfile(remotefile, localfile, &block) + end + end + + # + # Transfers +localfile+ to the server in binary mode, storing the result in + # +remotefile+. If a block is supplied, calls it, passing in the transmitted + # data in +blocksize+ chunks. + # + def putbinaryfile(localfile, remotefile = File.basename(localfile), + blocksize = DEFAULT_BLOCKSIZE, &block) # :yield: data + if @resume + begin + rest_offset = size(remotefile) + rescue Net::FTPPermError + rest_offset = nil + end + else + rest_offset = nil + end + f = open(localfile) + begin + f.binmode + storbinary("STOR " + remotefile, f, blocksize, rest_offset, &block) + ensure + f.close + end + end + + # + # Transfers +localfile+ to the server in ASCII (text) mode, storing the result + # in +remotefile+. If callback or an associated block is supplied, calls it, + # passing in the transmitted data one line at a time. + # + def puttextfile(localfile, remotefile = File.basename(localfile), &block) # :yield: line + f = open(localfile) + begin + storlines("STOR " + remotefile, f, &block) + ensure + f.close + end + end + + # + # Transfers +localfile+ to the server in whatever mode the session is set + # (text or binary). See #puttextfile and #putbinaryfile. + # + def put(localfile, remotefile = File.basename(localfile), + blocksize = DEFAULT_BLOCKSIZE, &block) + if @binary + putbinaryfile(localfile, remotefile, blocksize, &block) + else + puttextfile(localfile, remotefile, &block) + end + end + + # + # Sends the ACCT command. TODO: more info. + # + def acct(account) + cmd = "ACCT " + account + voidcmd(cmd) + end + + # + # Returns an array of filenames in the remote directory. + # + def nlst(dir = nil) + cmd = "NLST" + if dir + cmd = cmd + " " + dir + end + files = [] + retrlines(cmd) do |line| + files.push(line) + end + return files + end + + # + # Returns an array of file information in the directory (the output is like + # `ls -l`). If a block is given, it iterates through the listing. + # + def list(*args, &block) # :yield: line + cmd = "LIST" + args.each do |arg| + cmd = cmd + " " + arg + end + if block + retrlines(cmd, &block) + else + lines = [] + retrlines(cmd) do |line| + lines << line + end + return lines + end + end + alias ls list + alias dir list + + # + # Renames a file on the server. + # + def rename(fromname, toname) + resp = sendcmd("RNFR " + fromname) + if resp[0] != ?3 + raise FTPReplyError, resp + end + voidcmd("RNTO " + toname) + end + + # + # Deletes a file on the server. + # + def delete(filename) + resp = sendcmd("DELE " + filename) + if resp[0, 3] == "250" + return + elsif resp[0] == ?5 + raise FTPPermError, resp + else + raise FTPReplyError, resp + end + end + + # + # Changes the (remote) directory. + # + def chdir(dirname) + if dirname == ".." + begin + voidcmd("CDUP") + return + rescue FTPPermError => e + if e.message[0, 3] != "500" + raise e + end + end + end + cmd = "CWD " + dirname + voidcmd(cmd) + end + + # + # Returns the size of the given (remote) filename. + # + def size(filename) + with_binary(true) do + resp = sendcmd("SIZE " + filename) + if resp[0, 3] != "213" + raise FTPReplyError, resp + end + return resp[3..-1].strip.to_i + end + end + + MDTM_REGEXP = /^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)/ # :nodoc: + + # + # Returns the last modification time of the (remote) file. If +local+ is + # +true+, it is returned as a local time, otherwise it's a UTC time. + # + def mtime(filename, local = false) + str = mdtm(filename) + ary = str.scan(MDTM_REGEXP)[0].collect {|i| i.to_i} + return local ? Time.local(*ary) : Time.gm(*ary) + end + + # + # Creates a remote directory. + # + def mkdir(dirname) + resp = sendcmd("MKD " + dirname) + return parse257(resp) + end + + # + # Removes a remote directory. + # + def rmdir(dirname) + voidcmd("RMD " + dirname) + end + + # + # Returns the current remote directory. + # + def pwd + resp = sendcmd("PWD") + return parse257(resp) + end + alias getdir pwd + + # + # Returns system information. + # + def system + resp = sendcmd("SYST") + if resp[0, 3] != "215" + raise FTPReplyError, resp + end + return resp[4 .. -1] + end + + # + # Aborts the previous command (ABOR command). + # + def abort + line = "ABOR" + CRLF + print "put: ABOR\n" if @debug_mode + @sock.send(line, Socket::MSG_OOB) + resp = getmultiline + unless ["426", "226", "225"].include?(resp[0, 3]) + raise FTPProtoError, resp + end + return resp + end + + # + # Returns the status (STAT command). + # + def status + line = "STAT" + CRLF + print "put: STAT\n" if @debug_mode + @sock.send(line, Socket::MSG_OOB) + return getresp + end + + # + # Issues the MDTM command. TODO: more info. + # + def mdtm(filename) + resp = sendcmd("MDTM " + filename) + if resp[0, 3] == "213" + return resp[3 .. -1].strip + end + end + + # + # Issues the HELP command. + # + def help(arg = nil) + cmd = "HELP" + if arg + cmd = cmd + " " + arg + end + sendcmd(cmd) + end + + # + # Exits the FTP session. + # + def quit + voidcmd("QUIT") + end + + # + # Issues a NOOP command. + # + def noop + voidcmd("NOOP") + end + + # + # Issues a SITE command. + # + def site(arg) + cmd = "SITE " + arg + voidcmd(cmd) + end + + # + # Closes the connection. Further operations are impossible until you open + # a new connection with #connect. + # + def close + @sock.close if @sock and not @sock.closed? + end + + # + # Returns +true+ iff the connection is closed. + # + def closed? + @sock == nil or @sock.closed? + end + + def parse227(resp) + if resp[0, 3] != "227" + raise FTPReplyError, resp + end + left = resp.index("(") + right = resp.index(")") + if left == nil or right == nil + raise FTPProtoError, resp + end + numbers = resp[left + 1 .. right - 1].split(",") + if numbers.length != 6 + raise FTPProtoError, resp + end + host = numbers[0, 4].join(".") + port = (numbers[4].to_i << 8) + numbers[5].to_i + return host, port + end + private :parse227 + + def parse228(resp) + if resp[0, 3] != "228" + raise FTPReplyError, resp + end + left = resp.index("(") + right = resp.index(")") + if left == nil or right == nil + raise FTPProtoError, resp + end + numbers = resp[left + 1 .. right - 1].split(",") + if numbers[0] == "4" + if numbers.length != 9 || numbers[1] != "4" || numbers[2 + 4] != "2" + raise FTPProtoError, resp + end + host = numbers[2, 4].join(".") + port = (numbers[7].to_i << 8) + numbers[8].to_i + elsif numbers[0] == "6" + if numbers.length != 21 || numbers[1] != "16" || numbers[2 + 16] != "2" + raise FTPProtoError, resp + end + v6 = ["", "", "", "", "", "", "", ""] + for i in 0 .. 7 + v6[i] = sprintf("%02x%02x", numbers[(i * 2) + 2].to_i, + numbers[(i * 2) + 3].to_i) + end + host = v6[0, 8].join(":") + port = (numbers[19].to_i << 8) + numbers[20].to_i + end + return host, port + end + private :parse228 + + def parse229(resp) + if resp[0, 3] != "229" + raise FTPReplyError, resp + end + left = resp.index("(") + right = resp.index(")") + if left == nil or right == nil + raise FTPProtoError, resp + end + numbers = resp[left + 1 .. right - 1].split(resp[left + 1, 1]) + if numbers.length != 4 + raise FTPProtoError, resp + end + port = numbers[3].to_i + host = (@sock.peeraddr())[3] + return host, port + end + private :parse229 + + def parse257(resp) + if resp[0, 3] != "257" + raise FTPReplyError, resp + end + if resp[3, 2] != ' "' + return "" + end + dirname = "" + i = 5 + n = resp.length + while i < n + c = resp[i, 1] + i = i + 1 + if c == '"' + if i > n or resp[i, 1] != '"' + break + end + i = i + 1 + end + dirname = dirname + c + end + return dirname + end + private :parse257 + end + +end + + +# Documentation comments: +# - sourced from pickaxe and nutshell, with improvements (hopefully) +# - three methods should be private (search WRITEME) +# - two methods need more information (search TODO) diff --git a/lib/net/http.rb b/lib/net/http.rb new file mode 100644 index 0000000..7e35290 --- /dev/null +++ b/lib/net/http.rb @@ -0,0 +1,2399 @@ +# +# = net/http.rb +# +# Copyright (c) 1999-2007 Yukihiro Matsumoto +# Copyright (c) 1999-2007 Minero Aoki +# Copyright (c) 2001 GOTOU Yuuzou +# +# Written and maintained by Minero Aoki . +# HTTPS support added by GOTOU Yuuzou . +# +# This file is derived from "http-access.rb". +# +# Documented by Minero Aoki; converted to RDoc by William Webber. +# +# This program is free software. You can re-distribute and/or +# modify this program under the same terms of ruby itself --- +# Ruby Distribution License or GNU General Public License. +# +# See Net::HTTP for an overview and examples. +# +# NOTE: You can find Japanese version of this document here: +# http://www.ruby-lang.org/ja/man/html/net_http.html +# +#-- +# $Id: http.rb 25620 2009-11-01 15:48:31Z yugui $ +#++ + +require 'net/protocol' +require 'uri' + +module Net #:nodoc: + + # :stopdoc: + class HTTPBadResponse < StandardError; end + class HTTPHeaderSyntaxError < StandardError; end + # :startdoc: + + # == What Is This Library? + # + # This library provides your program functions to access WWW + # documents via HTTP, Hyper Text Transfer Protocol version 1.1. + # For details of HTTP, refer [RFC2616] + # (http://www.ietf.org/rfc/rfc2616.txt). + # + # == Examples + # + # === Getting Document From WWW Server + # + # Example #1: Simple GET+print + # + # require 'net/http' + # Net::HTTP.get_print 'www.example.com', '/index.html' + # + # Example #2: Simple GET+print by URL + # + # require 'net/http' + # require 'uri' + # Net::HTTP.get_print URI.parse('http://www.example.com/index.html') + # + # Example #3: More generic GET+print + # + # require 'net/http' + # require 'uri' + # + # url = URI.parse('http://www.example.com/index.html') + # res = Net::HTTP.start(url.host, url.port) {|http| + # http.get('/index.html') + # } + # puts res.body + # + # Example #4: More generic GET+print + # + # require 'net/http' + # + # url = URI.parse('http://www.example.com/index.html') + # req = Net::HTTP::Get.new(url.path) + # res = Net::HTTP.start(url.host, url.port) {|http| + # http.request(req) + # } + # puts res.body + # + # === Posting Form Data + # + # require 'net/http' + # require 'uri' + # + # #1: Simple POST + # res = Net::HTTP.post_form(URI.parse('http://www.example.com/search.cgi'), + # {'q' => 'ruby', 'max' => '50'}) + # puts res.body + # + # #2: POST with basic authentication + # res = Net::HTTP.post_form(URI.parse('http://jack:pass@www.example.com/todo.cgi'), + # {'from' => '2005-01-01', + # 'to' => '2005-03-31'}) + # puts res.body + # + # #3: Detailed control + # url = URI.parse('http://www.example.com/todo.cgi') + # req = Net::HTTP::Post.new(url.path) + # req.basic_auth 'jack', 'pass' + # req.set_form_data({'from' => '2005-01-01', 'to' => '2005-03-31'}, ';') + # res = Net::HTTP.new(url.host, url.port).start {|http| http.request(req) } + # case res + # when Net::HTTPSuccess, Net::HTTPRedirection + # # OK + # else + # res.error! + # end + # + # #4: Multiple values + # res = Net::HTTP.post_form(URI.parse('http://www.example.com/search.cgi'), + # {'q' => ['ruby', 'perl'], 'max' => '50'}) + # puts res.body + # + # === Accessing via Proxy + # + # Net::HTTP.Proxy creates http proxy class. It has same + # methods of Net::HTTP but its instances always connect to + # proxy, instead of given host. + # + # require 'net/http' + # + # proxy_addr = 'your.proxy.host' + # proxy_port = 8080 + # : + # Net::HTTP::Proxy(proxy_addr, proxy_port).start('www.example.com') {|http| + # # always connect to your.proxy.addr:8080 + # : + # } + # + # Since Net::HTTP.Proxy returns Net::HTTP itself when proxy_addr is nil, + # there's no need to change code if there's proxy or not. + # + # There are two additional parameters in Net::HTTP.Proxy which allow to + # specify proxy user name and password: + # + # Net::HTTP::Proxy(proxy_addr, proxy_port, proxy_user = nil, proxy_pass = nil) + # + # You may use them to work with authorization-enabled proxies: + # + # require 'net/http' + # require 'uri' + # + # proxy_host = 'your.proxy.host' + # proxy_port = 8080 + # uri = URI.parse(ENV['http_proxy']) + # proxy_user, proxy_pass = uri.userinfo.split(/:/) if uri.userinfo + # Net::HTTP::Proxy(proxy_host, proxy_port, + # proxy_user, proxy_pass).start('www.example.com') {|http| + # # always connect to your.proxy.addr:8080 using specified username and password + # : + # } + # + # Note that net/http never rely on HTTP_PROXY environment variable. + # If you want to use proxy, set it explicitly. + # + # === Following Redirection + # + # require 'net/http' + # require 'uri' + # + # def fetch(uri_str, limit = 10) + # # You should choose better exception. + # raise ArgumentError, 'HTTP redirect too deep' if limit == 0 + # + # response = Net::HTTP.get_response(URI.parse(uri_str)) + # case response + # when Net::HTTPSuccess then response + # when Net::HTTPRedirection then fetch(response['location'], limit - 1) + # else + # response.error! + # end + # end + # + # print fetch('http://www.ruby-lang.org') + # + # Net::HTTPSuccess and Net::HTTPRedirection is a HTTPResponse class. + # All HTTPResponse objects belong to its own response class which + # indicate HTTP result status. For details of response classes, + # see section "HTTP Response Classes". + # + # === Basic Authentication + # + # require 'net/http' + # + # Net::HTTP.start('www.example.com') {|http| + # req = Net::HTTP::Get.new('/secret-page.html') + # req.basic_auth 'account', 'password' + # response = http.request(req) + # print response.body + # } + # + # === HTTP Request Classes + # + # Here is HTTP request class hierarchy. + # + # Net::HTTPRequest + # Net::HTTP::Get + # Net::HTTP::Head + # Net::HTTP::Post + # Net::HTTP::Put + # Net::HTTP::Proppatch + # Net::HTTP::Lock + # Net::HTTP::Unlock + # Net::HTTP::Options + # Net::HTTP::Propfind + # Net::HTTP::Delete + # Net::HTTP::Move + # Net::HTTP::Copy + # Net::HTTP::Mkcol + # Net::HTTP::Trace + # + # === HTTP Response Classes + # + # Here is HTTP response class hierarchy. + # All classes are defined in Net module. + # + # HTTPResponse + # HTTPUnknownResponse + # HTTPInformation # 1xx + # HTTPContinue # 100 + # HTTPSwitchProtocl # 101 + # HTTPSuccess # 2xx + # HTTPOK # 200 + # HTTPCreated # 201 + # HTTPAccepted # 202 + # HTTPNonAuthoritativeInformation # 203 + # HTTPNoContent # 204 + # HTTPResetContent # 205 + # HTTPPartialContent # 206 + # HTTPRedirection # 3xx + # HTTPMultipleChoice # 300 + # HTTPMovedPermanently # 301 + # HTTPFound # 302 + # HTTPSeeOther # 303 + # HTTPNotModified # 304 + # HTTPUseProxy # 305 + # HTTPTemporaryRedirect # 307 + # HTTPClientError # 4xx + # HTTPBadRequest # 400 + # HTTPUnauthorized # 401 + # HTTPPaymentRequired # 402 + # HTTPForbidden # 403 + # HTTPNotFound # 404 + # HTTPMethodNotAllowed # 405 + # HTTPNotAcceptable # 406 + # HTTPProxyAuthenticationRequired # 407 + # HTTPRequestTimeOut # 408 + # HTTPConflict # 409 + # HTTPGone # 410 + # HTTPLengthRequired # 411 + # HTTPPreconditionFailed # 412 + # HTTPRequestEntityTooLarge # 413 + # HTTPRequestURITooLong # 414 + # HTTPUnsupportedMediaType # 415 + # HTTPRequestedRangeNotSatisfiable # 416 + # HTTPExpectationFailed # 417 + # HTTPServerError # 5xx + # HTTPInternalServerError # 500 + # HTTPNotImplemented # 501 + # HTTPBadGateway # 502 + # HTTPServiceUnavailable # 503 + # HTTPGatewayTimeOut # 504 + # HTTPVersionNotSupported # 505 + # + # == Switching Net::HTTP versions + # + # You can use net/http.rb 1.1 features (bundled with Ruby 1.6) + # by calling HTTP.version_1_1. Calling Net::HTTP.version_1_2 + # allows you to use 1.2 features again. + # + # # example + # Net::HTTP.start {|http1| ...(http1 has 1.2 features)... } + # + # Net::HTTP.version_1_1 + # Net::HTTP.start {|http2| ...(http2 has 1.1 features)... } + # + # Net::HTTP.version_1_2 + # Net::HTTP.start {|http3| ...(http3 has 1.2 features)... } + # + # This function is NOT thread-safe. + # + class HTTP < Protocol + + # :stopdoc: + Revision = %q$Revision: 25620 $.split[1] + HTTPVersion = '1.1' + @newimpl = true + begin + require 'zlib' + require 'stringio' #for our purposes (unpacking gzip) lump these together + HAVE_ZLIB=true + rescue LoadError + HAVE_ZLIB=false + end + # :startdoc: + + # Turns on net/http 1.2 (ruby 1.8) features. + # Defaults to ON in ruby 1.8. + # + # I strongly recommend to call this method always. + # + # require 'net/http' + # Net::HTTP.version_1_2 + # + def HTTP.version_1_2 + @newimpl = true + end + + # Turns on net/http 1.1 (ruby 1.6) features. + # Defaults to OFF in ruby 1.8. + def HTTP.version_1_1 + @newimpl = false + end + + # true if net/http is in version 1.2 mode. + # Defaults to true. + def HTTP.version_1_2? + @newimpl + end + + # true if net/http is in version 1.1 compatible mode. + # Defaults to true. + def HTTP.version_1_1? + not @newimpl + end + + class << HTTP + alias is_version_1_1? version_1_1? #:nodoc: + alias is_version_1_2? version_1_2? #:nodoc: + end + + # + # short cut methods + # + + # + # Get body from target and output it to +$stdout+. The + # target can either be specified as (+uri+), or as + # (+host+, +path+, +port+ = 80); so: + # + # Net::HTTP.get_print URI.parse('http://www.example.com/index.html') + # + # or: + # + # Net::HTTP.get_print 'www.example.com', '/index.html' + # + def HTTP.get_print(uri_or_host, path = nil, port = nil) + get_response(uri_or_host, path, port) {|res| + res.read_body do |chunk| + $stdout.print chunk + end + } + nil + end + + # Send a GET request to the target and return the response + # as a string. The target can either be specified as + # (+uri+), or as (+host+, +path+, +port+ = 80); so: + # + # print Net::HTTP.get(URI.parse('http://www.example.com/index.html')) + # + # or: + # + # print Net::HTTP.get('www.example.com', '/index.html') + # + def HTTP.get(uri_or_host, path = nil, port = nil) + get_response(uri_or_host, path, port).body + end + + # Send a GET request to the target and return the response + # as a Net::HTTPResponse object. The target can either be specified as + # (+uri+), or as (+host+, +path+, +port+ = 80); so: + # + # res = Net::HTTP.get_response(URI.parse('http://www.example.com/index.html')) + # print res.body + # + # or: + # + # res = Net::HTTP.get_response('www.example.com', '/index.html') + # print res.body + # + def HTTP.get_response(uri_or_host, path = nil, port = nil, &block) + if path + host = uri_or_host + new(host, port || HTTP.default_port).start {|http| + return http.request_get(path, &block) + } + else + uri = uri_or_host + new(uri.host, uri.port).start {|http| + return http.request_get(uri.request_uri, &block) + } + end + end + + # Posts HTML form data to the +URL+. + # Form data must be represented as a Hash of String to String, e.g: + # + # { "cmd" => "search", "q" => "ruby", "max" => "50" } + # + # This method also does Basic Authentication iff +URL+.user exists. + # + # Example: + # + # require 'net/http' + # require 'uri' + # + # HTTP.post_form URI.parse('http://www.example.com/search.cgi'), + # { "q" => "ruby", "max" => "50" } + # + def HTTP.post_form(url, params) + req = Post.new(url.path) + req.form_data = params + req.basic_auth url.user, url.password if url.user + new(url.host, url.port).start {|http| + http.request(req) + } + end + + # + # HTTP session management + # + + # The default port to use for HTTP requests; defaults to 80. + def HTTP.default_port + http_default_port() + end + + # The default port to use for HTTP requests; defaults to 80. + def HTTP.http_default_port + 80 + end + + # The default port to use for HTTPS requests; defaults to 443. + def HTTP.https_default_port + 443 + end + + def HTTP.socket_type #:nodoc: obsolete + BufferedIO + end + + # creates a new Net::HTTP object and opens its TCP connection and + # HTTP session. If the optional block is given, the newly + # created Net::HTTP object is passed to it and closed when the + # block finishes. In this case, the return value of this method + # is the return value of the block. If no block is given, the + # return value of this method is the newly created Net::HTTP object + # itself, and the caller is responsible for closing it upon completion. + def HTTP.start(address, port = nil, p_addr = nil, p_port = nil, p_user = nil, p_pass = nil, &block) # :yield: +http+ + new(address, port, p_addr, p_port, p_user, p_pass).start(&block) + end + + class << HTTP + alias newobj new + end + + # Creates a new Net::HTTP object. + # If +proxy_addr+ is given, creates an Net::HTTP object with proxy support. + # This method does not open the TCP connection. + def HTTP.new(address, port = nil, p_addr = nil, p_port = nil, p_user = nil, p_pass = nil) + h = Proxy(p_addr, p_port, p_user, p_pass).newobj(address, port) + h.instance_eval { + @newimpl = ::Net::HTTP.version_1_2? + } + h + end + + # Creates a new Net::HTTP object for the specified +address+. + # This method does not open the TCP connection. + def initialize(address, port = nil) + @address = address + @port = (port || HTTP.default_port) + @curr_http_version = HTTPVersion + @no_keepalive_server = false + @close_on_empty_response = false + @socket = nil + @started = false + @open_timeout = nil + @read_timeout = 60 + @debug_output = nil + @use_ssl = false + @ssl_context = nil + @enable_post_connection_check = true + @compression = nil + @sspi_enabled = false + if defined?(SSL_ATTRIBUTES) + SSL_ATTRIBUTES.each do |name| + instance_variable_set "@#{name}", nil + end + end + end + + def inspect + "#<#{self.class} #{@address}:#{@port} open=#{started?}>" + end + + # *WARNING* This method causes serious security hole. + # Never use this method in production code. + # + # Set an output stream for debugging. + # + # http = Net::HTTP.new + # http.set_debug_output $stderr + # http.start { .... } + # + def set_debug_output(output) + warn 'Net::HTTP#set_debug_output called after HTTP started' if started? + @debug_output = output + end + + # The host name to connect to. + attr_reader :address + + # The port number to connect to. + attr_reader :port + + # Seconds to wait until connection is opened. + # If the HTTP object cannot open a connection in this many seconds, + # it raises a TimeoutError exception. + attr_accessor :open_timeout + + # Seconds to wait until reading one block (by one read(2) call). + # If the HTTP object cannot open a connection in this many seconds, + # it raises a TimeoutError exception. + attr_reader :read_timeout + + # Setter for the read_timeout attribute. + def read_timeout=(sec) + @socket.read_timeout = sec if @socket + @read_timeout = sec + end + + # returns true if the HTTP session is started. + def started? + @started + end + + alias active? started? #:nodoc: obsolete + + attr_accessor :close_on_empty_response + + # returns true if use SSL/TLS with HTTP. + def use_ssl? + false # redefined in net/https + end + + # Opens TCP connection and HTTP session. + # + # When this method is called with block, gives a HTTP object + # to the block and closes the TCP connection / HTTP session + # after the block executed. + # + # When called with a block, returns the return value of the + # block; otherwise, returns self. + # + def start # :yield: http + raise IOError, 'HTTP session already opened' if @started + if block_given? + begin + do_start + return yield(self) + ensure + do_finish + end + end + do_start + self + end + + def do_start + connect + @started = true + end + private :do_start + + def connect + D "opening connection to #{conn_address()}..." + s = timeout(@open_timeout) { TCPSocket.open(conn_address(), conn_port()) } + D "opened" + if use_ssl? + ssl_parameters = Hash.new + SSL_ATTRIBUTES.each do |name| + if value = instance_variable_get("@#{name}") + ssl_parameters[name] = value + end + end + @ssl_context = OpenSSL::SSL::SSLContext.new + @ssl_context.set_params(ssl_parameters) + s = OpenSSL::SSL::SSLSocket.new(s, @ssl_context) + s.sync_close = true + end + @socket = BufferedIO.new(s) + @socket.read_timeout = @read_timeout + @socket.debug_output = @debug_output + if use_ssl? + if proxy? + @socket.writeline sprintf('CONNECT %s:%s HTTP/%s', + @address, @port, HTTPVersion) + @socket.writeline "Host: #{@address}:#{@port}" + if proxy_user + credential = ["#{proxy_user}:#{proxy_pass}"].pack('m') + credential.delete!("\r\n") + @socket.writeline "Proxy-Authorization: Basic #{credential}" + end + @socket.writeline '' + HTTPResponse.read_new(@socket).value + end + s.connect + if @ssl_context.verify_mode != OpenSSL::SSL::VERIFY_NONE + s.post_connection_check(@address) + end + end + on_connect + end + private :connect + + def on_connect + end + private :on_connect + + # Finishes HTTP session and closes TCP connection. + # Raises IOError if not started. + def finish + raise IOError, 'HTTP session not yet started' unless started? + do_finish + end + + def do_finish + @started = false + @socket.close if @socket and not @socket.closed? + @socket = nil + end + private :do_finish + + # + # proxy + # + + public + + # no proxy + @is_proxy_class = false + @proxy_addr = nil + @proxy_port = nil + @proxy_user = nil + @proxy_pass = nil + + # Creates an HTTP proxy class. + # Arguments are address/port of proxy host and username/password + # if authorization on proxy server is required. + # You can replace the HTTP class with created proxy class. + # + # If ADDRESS is nil, this method returns self (Net::HTTP). + # + # # Example + # proxy_class = Net::HTTP::Proxy('proxy.example.com', 8080) + # : + # proxy_class.start('www.ruby-lang.org') {|http| + # # connecting proxy.foo.org:8080 + # : + # } + # + def HTTP.Proxy(p_addr, p_port = nil, p_user = nil, p_pass = nil) + return self unless p_addr + delta = ProxyDelta + proxyclass = Class.new(self) + proxyclass.module_eval { + include delta + # with proxy + @is_proxy_class = true + @proxy_address = p_addr + @proxy_port = p_port || default_port() + @proxy_user = p_user + @proxy_pass = p_pass + } + proxyclass + end + + class << HTTP + # returns true if self is a class which was created by HTTP::Proxy. + def proxy_class? + @is_proxy_class + end + + attr_reader :proxy_address + attr_reader :proxy_port + attr_reader :proxy_user + attr_reader :proxy_pass + end + + # True if self is a HTTP proxy class. + def proxy? + self.class.proxy_class? + end + + # Address of proxy host. If self does not use a proxy, nil. + def proxy_address + self.class.proxy_address + end + + # Port number of proxy host. If self does not use a proxy, nil. + def proxy_port + self.class.proxy_port + end + + # User name for accessing proxy. If self does not use a proxy, nil. + def proxy_user + self.class.proxy_user + end + + # User password for accessing proxy. If self does not use a proxy, nil. + def proxy_pass + self.class.proxy_pass + end + + alias proxyaddr proxy_address #:nodoc: obsolete + alias proxyport proxy_port #:nodoc: obsolete + + private + + # without proxy + + def conn_address + address() + end + + def conn_port + port() + end + + def edit_path(path) + path + end + + module ProxyDelta #:nodoc: internal use only + private + + def conn_address + proxy_address() + end + + def conn_port + proxy_port() + end + + def edit_path(path) + use_ssl? ? path : "http://#{addr_port()}#{path}" + end + end + + # + # HTTP operations + # + + public + + # Gets data from +path+ on the connected-to host. + # +initheader+ must be a Hash like { 'Accept' => '*/*', ... }, + # and it defaults to an empty hash. + # If +initheader+ doesn't have the key 'accept-encoding', then + # a value of "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" is used, + # so that gzip compression is used in preference to deflate + # compression, which is used in preference to no compression. + # Ruby doesn't have libraries to support the compress (Lempel-Ziv) + # compression, so that is not supported. The intent of this is + # to reduce bandwidth by default. If this routine sets up + # compression, then it does the decompression also, removing + # the header as well to prevent confusion. Otherwise + # it leaves the body as it found it. + # + # In version 1.1 (ruby 1.6), this method returns a pair of objects, + # a Net::HTTPResponse object and the entity body string. + # In version 1.2 (ruby 1.8), this method returns a Net::HTTPResponse + # object. + # + # If called with a block, yields each fragment of the + # entity body in turn as a string as it is read from + # the socket. Note that in this case, the returned response + # object will *not* contain a (meaningful) body. + # + # +dest+ argument is obsolete. + # It still works but you must not use it. + # + # In version 1.1, this method might raise an exception for + # 3xx (redirect). In this case you can get a HTTPResponse object + # by "anException.response". + # + # In version 1.2, this method never raises exception. + # + # # version 1.1 (bundled with Ruby 1.6) + # response, body = http.get('/index.html') + # + # # version 1.2 (bundled with Ruby 1.8 or later) + # response = http.get('/index.html') + # + # # using block + # File.open('result.txt', 'w') {|f| + # http.get('/~foo/') do |str| + # f.write str + # end + # } + # + def get(path, initheader = {}, dest = nil, &block) # :yield: +body_segment+ + res = nil + if HAVE_ZLIB + unless initheader.keys.any?{|k| k.downcase == "accept-encoding"} + initheader["accept-encoding"] = "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" + @compression = true + end + end + request(Get.new(path, initheader)) {|r| + if r.key?("content-encoding") and @compression + @compression = nil # Clear it till next set. + the_body = r.read_body dest, &block + case r["content-encoding"] + when "gzip" + r.body= Zlib::GzipReader.new(StringIO.new(the_body)).read + r.delete("content-encoding") + when "deflate" + r.body= Zlib::Inflate.inflate(the_body); + r.delete("content-encoding") + when "identity" + ; # nothing needed + else + ; # Don't do anything dramatic, unless we need to later + end + else + r.read_body dest, &block + end + res = r + } + unless @newimpl + res.value + return res, res.body + end + + res + end + + # Gets only the header from +path+ on the connected-to host. + # +header+ is a Hash like { 'Accept' => '*/*', ... }. + # + # This method returns a Net::HTTPResponse object. + # + # In version 1.1, this method might raise an exception for + # 3xx (redirect). On the case you can get a HTTPResponse object + # by "anException.response". + # In version 1.2, this method never raises an exception. + # + # response = nil + # Net::HTTP.start('some.www.server', 80) {|http| + # response = http.head('/index.html') + # } + # p response['content-type'] + # + def head(path, initheader = nil) + res = request(Head.new(path, initheader)) + res.value unless @newimpl + res + end + + # Posts +data+ (must be a String) to +path+. +header+ must be a Hash + # like { 'Accept' => '*/*', ... }. + # + # In version 1.1 (ruby 1.6), this method returns a pair of objects, a + # Net::HTTPResponse object and an entity body string. + # In version 1.2 (ruby 1.8), this method returns a Net::HTTPResponse object. + # + # If called with a block, yields each fragment of the + # entity body in turn as a string as it are read from + # the socket. Note that in this case, the returned response + # object will *not* contain a (meaningful) body. + # + # +dest+ argument is obsolete. + # It still works but you must not use it. + # + # In version 1.1, this method might raise an exception for + # 3xx (redirect). In this case you can get an HTTPResponse object + # by "anException.response". + # In version 1.2, this method never raises exception. + # + # # version 1.1 + # response, body = http.post('/cgi-bin/search.rb', 'query=foo') + # + # # version 1.2 + # response = http.post('/cgi-bin/search.rb', 'query=foo') + # + # # using block + # File.open('result.txt', 'w') {|f| + # http.post('/cgi-bin/search.rb', 'query=foo') do |str| + # f.write str + # end + # } + # + # You should set Content-Type: header field for POST. + # If no Content-Type: field given, this method uses + # "application/x-www-form-urlencoded" by default. + # + def post(path, data, initheader = nil, dest = nil, &block) # :yield: +body_segment+ + res = nil + request(Post.new(path, initheader), data) {|r| + r.read_body dest, &block + res = r + } + unless @newimpl + res.value + return res, res.body + end + res + end + + def put(path, data, initheader = nil) #:nodoc: + res = request(Put.new(path, initheader), data) + res.value unless @newimpl + res + end + + # Sends a PROPPATCH request to the +path+ and gets a response, + # as an HTTPResponse object. + def proppatch(path, body, initheader = nil) + request(Proppatch.new(path, initheader), body) + end + + # Sends a LOCK request to the +path+ and gets a response, + # as an HTTPResponse object. + def lock(path, body, initheader = nil) + request(Lock.new(path, initheader), body) + end + + # Sends a UNLOCK request to the +path+ and gets a response, + # as an HTTPResponse object. + def unlock(path, body, initheader = nil) + request(Unlock.new(path, initheader), body) + end + + # Sends a OPTIONS request to the +path+ and gets a response, + # as an HTTPResponse object. + def options(path, initheader = nil) + request(Options.new(path, initheader)) + end + + # Sends a PROPFIND request to the +path+ and gets a response, + # as an HTTPResponse object. + def propfind(path, body = nil, initheader = {'Depth' => '0'}) + request(Propfind.new(path, initheader), body) + end + + # Sends a DELETE request to the +path+ and gets a response, + # as an HTTPResponse object. + def delete(path, initheader = {'Depth' => 'Infinity'}) + request(Delete.new(path, initheader)) + end + + # Sends a MOVE request to the +path+ and gets a response, + # as an HTTPResponse object. + def move(path, initheader = nil) + request(Move.new(path, initheader)) + end + + # Sends a COPY request to the +path+ and gets a response, + # as an HTTPResponse object. + def copy(path, initheader = nil) + request(Copy.new(path, initheader)) + end + + # Sends a MKCOL request to the +path+ and gets a response, + # as an HTTPResponse object. + def mkcol(path, body = nil, initheader = nil) + request(Mkcol.new(path, initheader), body) + end + + # Sends a TRACE request to the +path+ and gets a response, + # as an HTTPResponse object. + def trace(path, initheader = nil) + request(Trace.new(path, initheader)) + end + + # Sends a GET request to the +path+ and gets a response, + # as an HTTPResponse object. + # + # When called with a block, yields an HTTPResponse object. + # The body of this response will not have been read yet; + # the caller can process it using HTTPResponse#read_body, + # if desired. + # + # Returns the response. + # + # This method never raises Net::* exceptions. + # + # response = http.request_get('/index.html') + # # The entity body is already read here. + # p response['content-type'] + # puts response.body + # + # # using block + # http.request_get('/index.html') {|response| + # p response['content-type'] + # response.read_body do |str| # read body now + # print str + # end + # } + # + def request_get(path, initheader = nil, &block) # :yield: +response+ + request(Get.new(path, initheader), &block) + end + + # Sends a HEAD request to the +path+ and gets a response, + # as an HTTPResponse object. + # + # Returns the response. + # + # This method never raises Net::* exceptions. + # + # response = http.request_head('/index.html') + # p response['content-type'] + # + def request_head(path, initheader = nil, &block) + request(Head.new(path, initheader), &block) + end + + # Sends a POST request to the +path+ and gets a response, + # as an HTTPResponse object. + # + # When called with a block, yields an HTTPResponse object. + # The body of this response will not have been read yet; + # the caller can process it using HTTPResponse#read_body, + # if desired. + # + # Returns the response. + # + # This method never raises Net::* exceptions. + # + # # example + # response = http.request_post('/cgi-bin/nice.rb', 'datadatadata...') + # p response.status + # puts response.body # body is already read + # + # # using block + # http.request_post('/cgi-bin/nice.rb', 'datadatadata...') {|response| + # p response.status + # p response['content-type'] + # response.read_body do |str| # read body now + # print str + # end + # } + # + def request_post(path, data, initheader = nil, &block) # :yield: +response+ + request Post.new(path, initheader), data, &block + end + + def request_put(path, data, initheader = nil, &block) #:nodoc: + request Put.new(path, initheader), data, &block + end + + alias get2 request_get #:nodoc: obsolete + alias head2 request_head #:nodoc: obsolete + alias post2 request_post #:nodoc: obsolete + alias put2 request_put #:nodoc: obsolete + + + # Sends an HTTP request to the HTTP server. + # This method also sends DATA string if DATA is given. + # + # Returns a HTTPResponse object. + # + # This method never raises Net::* exceptions. + # + # response = http.send_request('GET', '/index.html') + # puts response.body + # + def send_request(name, path, data = nil, header = nil) + r = HTTPGenericRequest.new(name,(data ? true : false),true,path,header) + request r, data + end + + # Sends an HTTPRequest object REQUEST to the HTTP server. + # This method also sends DATA string if REQUEST is a post/put request. + # Giving DATA for get/head request causes ArgumentError. + # + # When called with a block, yields an HTTPResponse object. + # The body of this response will not have been read yet; + # the caller can process it using HTTPResponse#read_body, + # if desired. + # + # Returns a HTTPResponse object. + # + # This method never raises Net::* exceptions. + # + def request(req, body = nil, &block) # :yield: +response+ + unless started? + start { + req['connection'] ||= 'close' + return request(req, body, &block) + } + end + if proxy_user() + req.proxy_basic_auth proxy_user(), proxy_pass() unless use_ssl? + end + req.set_body_internal body + res = transport_request(req, &block) + if sspi_auth?(res) + sspi_auth(req) + res = transport_request(req, &block) + end + res + end + + private + + def transport_request(req) + begin_transport req + req.exec @socket, @curr_http_version, edit_path(req.path) + begin + res = HTTPResponse.read_new(@socket) + end while res.kind_of?(HTTPContinue) + res.reading_body(@socket, req.response_body_permitted?) { + yield res if block_given? + } + end_transport req, res + res + end + + def begin_transport(req) + connect if @socket.closed? + if not req.response_body_permitted? and @close_on_empty_response + req['connection'] ||= 'close' + end + req['host'] ||= addr_port() + end + + def end_transport(req, res) + @curr_http_version = res.http_version + if @socket.closed? + D 'Conn socket closed' + elsif not res.body and @close_on_empty_response + D 'Conn close' + @socket.close + elsif keep_alive?(req, res) + D 'Conn keep-alive' + else + D 'Conn close' + @socket.close + end + end + + def keep_alive?(req, res) + return false if req.connection_close? + if @curr_http_version <= '1.0' + res.connection_keep_alive? + else # HTTP/1.1 or later + not res.connection_close? + end + end + + def sspi_auth?(res) + return false unless @sspi_enabled + if res.kind_of?(HTTPProxyAuthenticationRequired) and + proxy? and res["Proxy-Authenticate"].include?("Negotiate") + begin + require 'win32/sspi' + true + rescue LoadError + false + end + else + false + end + end + + def sspi_auth(req) + n = Win32::SSPI::NegotiateAuth.new + req["Proxy-Authorization"] = "Negotiate #{n.get_initial_token}" + # Some versions of ISA will close the connection if this isn't present. + req["Connection"] = "Keep-Alive" + req["Proxy-Connection"] = "Keep-Alive" + res = transport_request(req) + authphrase = res["Proxy-Authenticate"] or return res + req["Proxy-Authorization"] = "Negotiate #{n.complete_authentication(authphrase)}" + rescue => err + raise HTTPAuthenticationError.new('HTTP authentication failed', err) + end + + # + # utils + # + + private + + def addr_port + if use_ssl? + address() + (port == HTTP.https_default_port ? '' : ":#{port()}") + else + address() + (port == HTTP.http_default_port ? '' : ":#{port()}") + end + end + + def D(msg) + return unless @debug_output + @debug_output << msg + @debug_output << "\n" + end + + end + + HTTPSession = HTTP + + + # + # Header module. + # + # Provides access to @header in the mixed-into class as a hash-like + # object, except with case-insensitive keys. Also provides + # methods for accessing commonly-used header values in a more + # convenient format. + # + module HTTPHeader + + def initialize_http_header(initheader) + @header = {} + return unless initheader + initheader.each do |key, value| + warn "net/http: warning: duplicated HTTP header: #{key}" if key?(key) and $VERBOSE + @header[key.downcase] = [value.strip] + end + end + + def size #:nodoc: obsolete + @header.size + end + + alias length size #:nodoc: obsolete + + # Returns the header field corresponding to the case-insensitive key. + # For example, a key of "Content-Type" might return "text/html" + def [](key) + a = @header[key.downcase] or return nil + a.join(', ') + end + + # Sets the header field corresponding to the case-insensitive key. + def []=(key, val) + unless val + @header.delete key.downcase + return val + end + @header[key.downcase] = [val] + end + + # [Ruby 1.8.3] + # Adds header field instead of replace. + # Second argument +val+ must be a String. + # See also #[]=, #[] and #get_fields. + # + # request.add_field 'X-My-Header', 'a' + # p request['X-My-Header'] #=> "a" + # p request.get_fields('X-My-Header') #=> ["a"] + # request.add_field 'X-My-Header', 'b' + # p request['X-My-Header'] #=> "a, b" + # p request.get_fields('X-My-Header') #=> ["a", "b"] + # request.add_field 'X-My-Header', 'c' + # p request['X-My-Header'] #=> "a, b, c" + # p request.get_fields('X-My-Header') #=> ["a", "b", "c"] + # + def add_field(key, val) + if @header.key?(key.downcase) + @header[key.downcase].push val + else + @header[key.downcase] = [val] + end + end + + # [Ruby 1.8.3] + # Returns an array of header field strings corresponding to the + # case-insensitive +key+. This method allows you to get duplicated + # header fields without any processing. See also #[]. + # + # p response.get_fields('Set-Cookie') + # #=> ["session=al98axx; expires=Fri, 31-Dec-1999 23:58:23", + # "query=rubyscript; expires=Fri, 31-Dec-1999 23:58:23"] + # p response['Set-Cookie'] + # #=> "session=al98axx; expires=Fri, 31-Dec-1999 23:58:23, query=rubyscript; expires=Fri, 31-Dec-1999 23:58:23" + # + def get_fields(key) + return nil unless @header[key.downcase] + @header[key.downcase].dup + end + + # Returns the header field corresponding to the case-insensitive key. + # Returns the default value +args+, or the result of the block, or + # raises an IndexErrror if there's no header field named +key+ + # See Hash#fetch + def fetch(key, *args, &block) #:yield: +key+ + a = @header.fetch(key.downcase, *args, &block) + a.kind_of?(Array) ? a.join(', ') : a + end + + # Iterates for each header names and values. + def each_header #:yield: +key+, +value+ + @header.each do |k,va| + yield k, va.join(', ') + end + end + + alias each each_header + + # Iterates for each header names. + def each_name(&block) #:yield: +key+ + @header.each_key(&block) + end + + alias each_key each_name + + # Iterates for each capitalized header names. + def each_capitalized_name(&block) #:yield: +key+ + @header.each_key do |k| + yield capitalize(k) + end + end + + # Iterates for each header values. + def each_value #:yield: +value+ + @header.each_value do |va| + yield va.join(', ') + end + end + + # Removes a header field. + def delete(key) + @header.delete(key.downcase) + end + + # true if +key+ header exists. + def key?(key) + @header.key?(key.downcase) + end + + # Returns a Hash consist of header names and values. + def to_hash + @header.dup + end + + # As for #each_header, except the keys are provided in capitalized form. + def each_capitalized + @header.each do |k,v| + yield capitalize(k), v.join(', ') + end + end + + alias canonical_each each_capitalized + + def capitalize(name) + name.split(/-/).map {|s| s.capitalize }.join('-') + end + private :capitalize + + # Returns an Array of Range objects which represents Range: header field, + # or +nil+ if there is no such header. + def range + return nil unless @header['range'] + self['Range'].split(/,/).map {|spec| + m = /bytes\s*=\s*(\d+)?\s*-\s*(\d+)?/i.match(spec) or + raise HTTPHeaderSyntaxError, "wrong Range: #{spec}" + d1 = m[1].to_i + d2 = m[2].to_i + if m[1] and m[2] then d1..d2 + elsif m[1] then d1..-1 + elsif m[2] then -d2..-1 + else + raise HTTPHeaderSyntaxError, 'range is not specified' + end + } + end + + # Set Range: header from Range (arg r) or beginning index and + # length from it (arg idx&len). + # + # req.range = (0..1023) + # req.set_range 0, 1023 + # + def set_range(r, e = nil) + unless r + @header.delete 'range' + return r + end + r = (r...r+e) if e + case r + when Numeric + n = r.to_i + rangestr = (n > 0 ? "0-#{n-1}" : "-#{-n}") + when Range + first = r.first + last = r.last + last -= 1 if r.exclude_end? + if last == -1 + rangestr = (first > 0 ? "#{first}-" : "-#{-first}") + else + raise HTTPHeaderSyntaxError, 'range.first is negative' if first < 0 + raise HTTPHeaderSyntaxError, 'range.last is negative' if last < 0 + raise HTTPHeaderSyntaxError, 'must be .first < .last' if first > last + rangestr = "#{first}-#{last}" + end + else + raise TypeError, 'Range/Integer is required' + end + @header['range'] = ["bytes=#{rangestr}"] + r + end + + alias range= set_range + + # Returns an Integer object which represents the Content-Length: header field + # or +nil+ if that field is not provided. + def content_length + return nil unless key?('Content-Length') + len = self['Content-Length'].slice(/\d+/) or + raise HTTPHeaderSyntaxError, 'wrong Content-Length format' + len.to_i + end + + def content_length=(len) + unless len + @header.delete 'content-length' + return nil + end + @header['content-length'] = [len.to_i.to_s] + end + + # Returns "true" if the "transfer-encoding" header is present and + # set to "chunked". This is an HTTP/1.1 feature, allowing the + # the content to be sent in "chunks" without at the outset + # stating the entire content length. + def chunked? + return false unless @header['transfer-encoding'] + field = self['Transfer-Encoding'] + (/(?:\A|[^\-\w])chunked(?![\-\w])/i =~ field) ? true : false + end + + # Returns a Range object which represents Content-Range: header field. + # This indicates, for a partial entity body, where this fragment + # fits inside the full entity body, as range of byte offsets. + def content_range + return nil unless @header['content-range'] + m = %ri.match(self['Content-Range']) or + raise HTTPHeaderSyntaxError, 'wrong Content-Range format' + m[1].to_i .. m[2].to_i + 1 + end + + # The length of the range represented in Content-Range: header. + def range_length + r = content_range() or return nil + r.end - r.begin + end + + # Returns a content type string such as "text/html". + # This method returns nil if Content-Type: header field does not exist. + def content_type + return nil unless main_type() + if sub_type() + then "#{main_type()}/#{sub_type()}" + else main_type() + end + end + + # Returns a content type string such as "text". + # This method returns nil if Content-Type: header field does not exist. + def main_type + return nil unless @header['content-type'] + self['Content-Type'].split(';').first.to_s.split('/')[0].to_s.strip + end + + # Returns a content type string such as "html". + # This method returns nil if Content-Type: header field does not exist + # or sub-type is not given (e.g. "Content-Type: text"). + def sub_type + return nil unless @header['content-type'] + main, sub = *self['Content-Type'].split(';').first.to_s.split('/') + return nil unless sub + sub.strip + end + + # Returns content type parameters as a Hash as like + # {"charset" => "iso-2022-jp"}. + def type_params + result = {} + list = self['Content-Type'].to_s.split(';') + list.shift + list.each do |param| + k, v = *param.split('=', 2) + result[k.strip] = v.strip + end + result + end + + # Set Content-Type: header field by +type+ and +params+. + # +type+ must be a String, +params+ must be a Hash. + def set_content_type(type, params = {}) + @header['content-type'] = [type + params.map{|k,v|"; #{k}=#{v}"}.join('')] + end + + alias content_type= set_content_type + + # Set header fields and a body from HTML form data. + # +params+ should be a Hash containing HTML form data. + # Optional argument +sep+ means data record separator. + # + # This method also set Content-Type: header field to + # application/x-www-form-urlencoded. + # + # Example: + # http.form_data = {"q" => "ruby", "lang" => "en"} + # http.form_data = {"q" => ["ruby", "perl"], "lang" => "en"} + # http.set_form_data({"q" => "ruby", "lang" => "en"}, ';') + # + def set_form_data(params, sep = '&') + self.body = params.map {|k, v| encode_kvpair(k, v) }.flatten.join(sep) + self.content_type = 'application/x-www-form-urlencoded' + end + + alias form_data= set_form_data + + def encode_kvpair(k, vs) + Array(vs).map {|v| "#{urlencode(k.to_s)}=#{urlencode(v.to_s)}" } + end + private :encode_kvpair + + def urlencode(str) + str.dup.force_encoding('ASCII-8BIT').gsub(/[^a-zA-Z0-9_\.\-]/){'%%%02x' % $&.ord} + end + private :urlencode + + # Set the Authorization: header for "Basic" authorization. + def basic_auth(account, password) + @header['authorization'] = [basic_encode(account, password)] + end + + # Set Proxy-Authorization: header for "Basic" authorization. + def proxy_basic_auth(account, password) + @header['proxy-authorization'] = [basic_encode(account, password)] + end + + def basic_encode(account, password) + 'Basic ' + ["#{account}:#{password}"].pack('m').delete("\r\n") + end + private :basic_encode + + def connection_close? + tokens(@header['connection']).include?('close') or + tokens(@header['proxy-connection']).include?('close') + end + + def connection_keep_alive? + tokens(@header['connection']).include?('keep-alive') or + tokens(@header['proxy-connection']).include?('keep-alive') + end + + def tokens(vals) + return [] unless vals + vals.map {|v| v.split(',') }.flatten\ + .reject {|str| str.strip.empty? }\ + .map {|tok| tok.strip.downcase } + end + private :tokens + + end + + + # + # Parent of HTTPRequest class. Do not use this directly; use + # a subclass of HTTPRequest. + # + # Mixes in the HTTPHeader module. + # + class HTTPGenericRequest + + include HTTPHeader + + def initialize(m, reqbody, resbody, path, initheader = nil) + @method = m + @request_has_body = reqbody + @response_has_body = resbody + raise ArgumentError, "no HTTP request path given" unless path + raise ArgumentError, "HTTP request path is empty" if path.empty? + @path = path + initialize_http_header initheader + self['Accept'] ||= '*/*' + self['User-Agent'] ||= 'Ruby' + @body = nil + @body_stream = nil + end + + attr_reader :method + attr_reader :path + + def inspect + "\#<#{self.class} #{@method}>" + end + + def request_body_permitted? + @request_has_body + end + + def response_body_permitted? + @response_has_body + end + + def body_exist? + warn "Net::HTTPRequest#body_exist? is obsolete; use response_body_permitted?" if $VERBOSE + response_body_permitted? + end + + attr_reader :body + + def body=(str) + @body = str + @body_stream = nil + str + end + + attr_reader :body_stream + + def body_stream=(input) + @body = nil + @body_stream = input + input + end + + def set_body_internal(str) #:nodoc: internal use only + raise ArgumentError, "both of body argument and HTTPRequest#body set" if str and (@body or @body_stream) + self.body = str if str + end + + # + # write + # + + def exec(sock, ver, path) #:nodoc: internal use only + if @body + send_request_with_body sock, ver, path, @body + elsif @body_stream + send_request_with_body_stream sock, ver, path, @body_stream + else + write_header sock, ver, path + end + end + + private + + def send_request_with_body(sock, ver, path, body) + self.content_length = body.bytesize + delete 'Transfer-Encoding' + supply_default_content_type + write_header sock, ver, path + sock.write body + end + + def send_request_with_body_stream(sock, ver, path, f) + unless content_length() or chunked? + raise ArgumentError, + "Content-Length not given and Transfer-Encoding is not `chunked'" + end + supply_default_content_type + write_header sock, ver, path + if chunked? + while s = f.read(1024) + sock.write(sprintf("%x\r\n", s.length) << s << "\r\n") + end + sock.write "0\r\n\r\n" + else + while s = f.read(1024) + sock.write s + end + end + end + + def supply_default_content_type + return if content_type() + warn 'net/http: warning: Content-Type did not set; using application/x-www-form-urlencoded' if $VERBOSE + set_content_type 'application/x-www-form-urlencoded' + end + + def write_header(sock, ver, path) + buf = "#{@method} #{path} HTTP/#{ver}\r\n" + each_capitalized do |k,v| + buf << "#{k}: #{v}\r\n" + end + buf << "\r\n" + sock.write buf + end + + end + + + # + # HTTP request class. This class wraps request header and entity path. + # You *must* use its subclass, Net::HTTP::Get, Post, Head. + # + class HTTPRequest < HTTPGenericRequest + + # Creates HTTP request object. + def initialize(path, initheader = nil) + super self.class::METHOD, + self.class::REQUEST_HAS_BODY, + self.class::RESPONSE_HAS_BODY, + path, initheader + end + end + + + class HTTP # reopen + # + # HTTP 1.1 methods --- RFC2616 + # + + class Get < HTTPRequest + METHOD = 'GET' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = true + end + + class Head < HTTPRequest + METHOD = 'HEAD' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = false + end + + class Post < HTTPRequest + METHOD = 'POST' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + + class Put < HTTPRequest + METHOD = 'PUT' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + + class Delete < HTTPRequest + METHOD = 'DELETE' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = true + end + + class Options < HTTPRequest + METHOD = 'OPTIONS' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = false + end + + class Trace < HTTPRequest + METHOD = 'TRACE' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = true + end + + # + # WebDAV methods --- RFC2518 + # + + class Propfind < HTTPRequest + METHOD = 'PROPFIND' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + + class Proppatch < HTTPRequest + METHOD = 'PROPPATCH' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + + class Mkcol < HTTPRequest + METHOD = 'MKCOL' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + + class Copy < HTTPRequest + METHOD = 'COPY' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = true + end + + class Move < HTTPRequest + METHOD = 'MOVE' + REQUEST_HAS_BODY = false + RESPONSE_HAS_BODY = true + end + + class Lock < HTTPRequest + METHOD = 'LOCK' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + + class Unlock < HTTPRequest + METHOD = 'UNLOCK' + REQUEST_HAS_BODY = true + RESPONSE_HAS_BODY = true + end + end + + + ### + ### Response + ### + + # HTTP exception class. + # You must use its subclasses. + module HTTPExceptions + def initialize(msg, res) #:nodoc: + super msg + @response = res + end + attr_reader :response + alias data response #:nodoc: obsolete + end + class HTTPError < ProtocolError + include HTTPExceptions + end + class HTTPRetriableError < ProtoRetriableError + include HTTPExceptions + end + class HTTPServerException < ProtoServerError + # We cannot use the name "HTTPServerError", it is the name of the response. + include HTTPExceptions + end + class HTTPFatalError < ProtoFatalError + include HTTPExceptions + end + + + # HTTP response class. This class wraps response header and entity. + # Mixes in the HTTPHeader module, which provides access to response + # header values both via hash-like methods and individual readers. + # Note that each possible HTTP response code defines its own + # HTTPResponse subclass. These are listed below. + # All classes are + # defined under the Net module. Indentation indicates inheritance. + # + # xxx HTTPResponse + # + # 1xx HTTPInformation + # 100 HTTPContinue + # 101 HTTPSwitchProtocol + # + # 2xx HTTPSuccess + # 200 HTTPOK + # 201 HTTPCreated + # 202 HTTPAccepted + # 203 HTTPNonAuthoritativeInformation + # 204 HTTPNoContent + # 205 HTTPResetContent + # 206 HTTPPartialContent + # + # 3xx HTTPRedirection + # 300 HTTPMultipleChoice + # 301 HTTPMovedPermanently + # 302 HTTPFound + # 303 HTTPSeeOther + # 304 HTTPNotModified + # 305 HTTPUseProxy + # 307 HTTPTemporaryRedirect + # + # 4xx HTTPClientError + # 400 HTTPBadRequest + # 401 HTTPUnauthorized + # 402 HTTPPaymentRequired + # 403 HTTPForbidden + # 404 HTTPNotFound + # 405 HTTPMethodNotAllowed + # 406 HTTPNotAcceptable + # 407 HTTPProxyAuthenticationRequired + # 408 HTTPRequestTimeOut + # 409 HTTPConflict + # 410 HTTPGone + # 411 HTTPLengthRequired + # 412 HTTPPreconditionFailed + # 413 HTTPRequestEntityTooLarge + # 414 HTTPRequestURITooLong + # 415 HTTPUnsupportedMediaType + # 416 HTTPRequestedRangeNotSatisfiable + # 417 HTTPExpectationFailed + # + # 5xx HTTPServerError + # 500 HTTPInternalServerError + # 501 HTTPNotImplemented + # 502 HTTPBadGateway + # 503 HTTPServiceUnavailable + # 504 HTTPGatewayTimeOut + # 505 HTTPVersionNotSupported + # + # xxx HTTPUnknownResponse + # + class HTTPResponse + # true if the response has body. + def HTTPResponse.body_permitted? + self::HAS_BODY + end + + def HTTPResponse.exception_type # :nodoc: internal use only + self::EXCEPTION_TYPE + end + end # reopened after + + # :stopdoc: + + class HTTPUnknownResponse < HTTPResponse + HAS_BODY = true + EXCEPTION_TYPE = HTTPError + end + class HTTPInformation < HTTPResponse # 1xx + HAS_BODY = false + EXCEPTION_TYPE = HTTPError + end + class HTTPSuccess < HTTPResponse # 2xx + HAS_BODY = true + EXCEPTION_TYPE = HTTPError + end + class HTTPRedirection < HTTPResponse # 3xx + HAS_BODY = true + EXCEPTION_TYPE = HTTPRetriableError + end + class HTTPClientError < HTTPResponse # 4xx + HAS_BODY = true + EXCEPTION_TYPE = HTTPServerException # for backward compatibility + end + class HTTPServerError < HTTPResponse # 5xx + HAS_BODY = true + EXCEPTION_TYPE = HTTPFatalError # for backward compatibility + end + + class HTTPContinue < HTTPInformation # 100 + HAS_BODY = false + end + class HTTPSwitchProtocol < HTTPInformation # 101 + HAS_BODY = false + end + + class HTTPOK < HTTPSuccess # 200 + HAS_BODY = true + end + class HTTPCreated < HTTPSuccess # 201 + HAS_BODY = true + end + class HTTPAccepted < HTTPSuccess # 202 + HAS_BODY = true + end + class HTTPNonAuthoritativeInformation < HTTPSuccess # 203 + HAS_BODY = true + end + class HTTPNoContent < HTTPSuccess # 204 + HAS_BODY = false + end + class HTTPResetContent < HTTPSuccess # 205 + HAS_BODY = false + end + class HTTPPartialContent < HTTPSuccess # 206 + HAS_BODY = true + end + + class HTTPMultipleChoice < HTTPRedirection # 300 + HAS_BODY = true + end + class HTTPMovedPermanently < HTTPRedirection # 301 + HAS_BODY = true + end + class HTTPFound < HTTPRedirection # 302 + HAS_BODY = true + end + HTTPMovedTemporarily = HTTPFound + class HTTPSeeOther < HTTPRedirection # 303 + HAS_BODY = true + end + class HTTPNotModified < HTTPRedirection # 304 + HAS_BODY = false + end + class HTTPUseProxy < HTTPRedirection # 305 + HAS_BODY = false + end + # 306 unused + class HTTPTemporaryRedirect < HTTPRedirection # 307 + HAS_BODY = true + end + + class HTTPBadRequest < HTTPClientError # 400 + HAS_BODY = true + end + class HTTPUnauthorized < HTTPClientError # 401 + HAS_BODY = true + end + class HTTPPaymentRequired < HTTPClientError # 402 + HAS_BODY = true + end + class HTTPForbidden < HTTPClientError # 403 + HAS_BODY = true + end + class HTTPNotFound < HTTPClientError # 404 + HAS_BODY = true + end + class HTTPMethodNotAllowed < HTTPClientError # 405 + HAS_BODY = true + end + class HTTPNotAcceptable < HTTPClientError # 406 + HAS_BODY = true + end + class HTTPProxyAuthenticationRequired < HTTPClientError # 407 + HAS_BODY = true + end + class HTTPRequestTimeOut < HTTPClientError # 408 + HAS_BODY = true + end + class HTTPConflict < HTTPClientError # 409 + HAS_BODY = true + end + class HTTPGone < HTTPClientError # 410 + HAS_BODY = true + end + class HTTPLengthRequired < HTTPClientError # 411 + HAS_BODY = true + end + class HTTPPreconditionFailed < HTTPClientError # 412 + HAS_BODY = true + end + class HTTPRequestEntityTooLarge < HTTPClientError # 413 + HAS_BODY = true + end + class HTTPRequestURITooLong < HTTPClientError # 414 + HAS_BODY = true + end + HTTPRequestURITooLarge = HTTPRequestURITooLong + class HTTPUnsupportedMediaType < HTTPClientError # 415 + HAS_BODY = true + end + class HTTPRequestedRangeNotSatisfiable < HTTPClientError # 416 + HAS_BODY = true + end + class HTTPExpectationFailed < HTTPClientError # 417 + HAS_BODY = true + end + + class HTTPInternalServerError < HTTPServerError # 500 + HAS_BODY = true + end + class HTTPNotImplemented < HTTPServerError # 501 + HAS_BODY = true + end + class HTTPBadGateway < HTTPServerError # 502 + HAS_BODY = true + end + class HTTPServiceUnavailable < HTTPServerError # 503 + HAS_BODY = true + end + class HTTPGatewayTimeOut < HTTPServerError # 504 + HAS_BODY = true + end + class HTTPVersionNotSupported < HTTPServerError # 505 + HAS_BODY = true + end + + # :startdoc: + + + class HTTPResponse # reopen + + CODE_CLASS_TO_OBJ = { + '1' => HTTPInformation, + '2' => HTTPSuccess, + '3' => HTTPRedirection, + '4' => HTTPClientError, + '5' => HTTPServerError + } + CODE_TO_OBJ = { + '100' => HTTPContinue, + '101' => HTTPSwitchProtocol, + + '200' => HTTPOK, + '201' => HTTPCreated, + '202' => HTTPAccepted, + '203' => HTTPNonAuthoritativeInformation, + '204' => HTTPNoContent, + '205' => HTTPResetContent, + '206' => HTTPPartialContent, + + '300' => HTTPMultipleChoice, + '301' => HTTPMovedPermanently, + '302' => HTTPFound, + '303' => HTTPSeeOther, + '304' => HTTPNotModified, + '305' => HTTPUseProxy, + '307' => HTTPTemporaryRedirect, + + '400' => HTTPBadRequest, + '401' => HTTPUnauthorized, + '402' => HTTPPaymentRequired, + '403' => HTTPForbidden, + '404' => HTTPNotFound, + '405' => HTTPMethodNotAllowed, + '406' => HTTPNotAcceptable, + '407' => HTTPProxyAuthenticationRequired, + '408' => HTTPRequestTimeOut, + '409' => HTTPConflict, + '410' => HTTPGone, + '411' => HTTPLengthRequired, + '412' => HTTPPreconditionFailed, + '413' => HTTPRequestEntityTooLarge, + '414' => HTTPRequestURITooLong, + '415' => HTTPUnsupportedMediaType, + '416' => HTTPRequestedRangeNotSatisfiable, + '417' => HTTPExpectationFailed, + + '500' => HTTPInternalServerError, + '501' => HTTPNotImplemented, + '502' => HTTPBadGateway, + '503' => HTTPServiceUnavailable, + '504' => HTTPGatewayTimeOut, + '505' => HTTPVersionNotSupported + } + + class << HTTPResponse + def read_new(sock) #:nodoc: internal use only + httpv, code, msg = read_status_line(sock) + res = response_class(code).new(httpv, code, msg) + each_response_header(sock) do |k,v| + res.add_field k, v + end + res + end + + private + + def read_status_line(sock) + str = sock.readline + m = /\AHTTP(?:\/(\d+\.\d+))?\s+(\d\d\d)\s*(.*)\z/in.match(str) or + raise HTTPBadResponse, "wrong status line: #{str.dump}" + m.captures + end + + def response_class(code) + CODE_TO_OBJ[code] or + CODE_CLASS_TO_OBJ[code[0,1]] or + HTTPUnknownResponse + end + + def each_response_header(sock) + while true + line = sock.readuntil("\n", true).sub(/\s+\z/, '') + break if line.empty? + m = /\A([^:]+):\s*/.match(line) or + raise HTTPBadResponse, 'wrong header line format' + yield m[1], m.post_match + end + end + end + + # next is to fix bug in RDoc, where the private inside class << self + # spills out. + public + + include HTTPHeader + + def initialize(httpv, code, msg) #:nodoc: internal use only + @http_version = httpv + @code = code + @message = msg + initialize_http_header nil + @body = nil + @read = false + end + + # The HTTP version supported by the server. + attr_reader :http_version + + # HTTP result code string. For example, '302'. You can also + # determine the response type by which response subclass the + # response object is an instance of. + attr_reader :code + + # HTTP result message. For example, 'Not Found'. + attr_reader :message + alias msg message # :nodoc: obsolete + + def inspect + "#<#{self.class} #{@code} #{@message} readbody=#{@read}>" + end + + # For backward compatibility. + # To allow Net::HTTP 1.1 style assignment + # e.g. + # response, body = Net::HTTP.get(....) + # + def to_ary + warn "net/http.rb: warning: Net::HTTP v1.1 style assignment found at #{caller(1)[0]}; use `response = http.get(...)' instead." if $VERBOSE + res = self.dup + class << res + undef to_ary + end + [res, res.body] + end + + # + # response <-> exception relationship + # + + def code_type #:nodoc: + self.class + end + + def error! #:nodoc: + raise error_type().new(@code + ' ' + @message.dump, self) + end + + def error_type #:nodoc: + self.class::EXCEPTION_TYPE + end + + # Raises HTTP error if the response is not 2xx. + def value + error! unless self.kind_of?(HTTPSuccess) + end + + # + # header (for backward compatibility only; DO NOT USE) + # + + def response #:nodoc: + warn "#{caller(1)[0]}: warning: HTTPResponse#response is obsolete" if $VERBOSE + self + end + + def header #:nodoc: + warn "#{caller(1)[0]}: warning: HTTPResponse#header is obsolete" if $VERBOSE + self + end + + def read_header #:nodoc: + warn "#{caller(1)[0]}: warning: HTTPResponse#read_header is obsolete" if $VERBOSE + self + end + + # + # body + # + + def reading_body(sock, reqmethodallowbody) #:nodoc: internal use only + @socket = sock + @body_exist = reqmethodallowbody && self.class.body_permitted? + begin + yield + self.body # ensure to read body + ensure + @socket = nil + end + end + + # Gets entity body. If the block given, yields it to +block+. + # The body is provided in fragments, as it is read in from the socket. + # + # Calling this method a second or subsequent time will return the + # already read string. + # + # http.request_get('/index.html') {|res| + # puts res.read_body + # } + # + # http.request_get('/index.html') {|res| + # p res.read_body.object_id # 538149362 + # p res.read_body.object_id # 538149362 + # } + # + # # using iterator + # http.request_get('/index.html') {|res| + # res.read_body do |segment| + # print segment + # end + # } + # + def read_body(dest = nil, &block) + if @read + raise IOError, "#{self.class}\#read_body called twice" if dest or block + return @body + end + to = procdest(dest, block) + stream_check + if @body_exist + read_body_0 to + @body = to + else + @body = nil + end + @read = true + + @body + end + + # Returns the entity body. + # + # Calling this method a second or subsequent time will return the + # already read string. + # + # http.request_get('/index.html') {|res| + # puts res.body + # } + # + # http.request_get('/index.html') {|res| + # p res.body.object_id # 538149362 + # p res.body.object_id # 538149362 + # } + # + def body + read_body() + end + + # Because it may be necessary to modify the body, Eg, decompression + # this method facilitates that. + def body=(value) + @body = value + end + + alias entity body #:nodoc: obsolete + + private + + def read_body_0(dest) + if chunked? + read_chunked dest + return + end + clen = content_length() + if clen + @socket.read clen, dest, true # ignore EOF + return + end + clen = range_length() + if clen + @socket.read clen, dest + return + end + @socket.read_all dest + end + + def read_chunked(dest) + len = nil + total = 0 + while true + line = @socket.readline + hexlen = line.slice(/[0-9a-fA-F]+/) or + raise HTTPBadResponse, "wrong chunk size line: #{line}" + len = hexlen.hex + break if len == 0 + @socket.read len, dest; total += len + @socket.read 2 # \r\n + end + until @socket.readline.empty? + # none + end + end + + def stream_check + raise IOError, 'attempt to read body out of block' if @socket.closed? + end + + def procdest(dest, block) + raise ArgumentError, 'both arg and block given for HTTP method' \ + if dest and block + if block + ReadAdapter.new(block) + else + dest || '' + end + end + + end + + + # :enddoc: + + #-- + # for backward compatibility + class HTTP + ProxyMod = ProxyDelta + end + module NetPrivate + HTTPRequest = ::Net::HTTPRequest + end + + HTTPInformationCode = HTTPInformation + HTTPSuccessCode = HTTPSuccess + HTTPRedirectionCode = HTTPRedirection + HTTPRetriableCode = HTTPRedirection + HTTPClientErrorCode = HTTPClientError + HTTPFatalErrorCode = HTTPClientError + HTTPServerErrorCode = HTTPServerError + HTTPResponceReceiver = HTTPResponse + +end # module Net diff --git a/lib/net/https.rb b/lib/net/https.rb new file mode 100644 index 0000000..0b29315 --- /dev/null +++ b/lib/net/https.rb @@ -0,0 +1,136 @@ +=begin + += $RCSfile$ -- SSL/TLS enhancement for Net::HTTP. + +== Info + 'OpenSSL for Ruby 2' project + Copyright (C) 2001 GOTOU Yuuzou + All rights reserved. + +== Licence + This program is licenced under the same licence as Ruby. + (See the file 'LICENCE'.) + +== Requirements + This program requires Net 1.2.0 or higher version. + You can get it from RAA or Ruby's CVS repository. + +== Version + $Id: https.rb 18512 2008-08-12 05:20:09Z aamine $ + + 2001-11-06: Contiributed to Ruby/OpenSSL project. + 2004-03-06: Some code is merged in to net/http. + +== Example + +Here is a simple HTTP client: + + require 'net/http' + require 'uri' + + uri = URI.parse(ARGV[0] || 'http://localhost/') + http = Net::HTTP.new(uri.host, uri.port) + http.start { + http.request_get(uri.path) {|res| + print res.body + } + } + +It can be replaced by the following code: + + require 'net/https' + require 'uri' + + uri = URI.parse(ARGV[0] || 'https://localhost/') + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true if uri.scheme == "https" # enable SSL/TLS + http.start { + http.request_get(uri.path) {|res| + print res.body + } + } + +== class Net::HTTP + +=== Instance Methods + +: use_ssl? + returns true if use SSL/TLS with HTTP. + +: use_ssl=((|true_or_false|)) + sets use_ssl. + +: peer_cert + return the X.509 certificates the server presented. + +: key, key=((|key|)) + Sets an OpenSSL::PKey::RSA or OpenSSL::PKey::DSA object. + (This method is appeared in Michal Rokos's OpenSSL extension.) + +: cert, cert=((|cert|)) + Sets an OpenSSL::X509::Certificate object as client certificate + (This method is appeared in Michal Rokos's OpenSSL extension). + +: ca_file, ca_file=((|path|)) + Sets path of a CA certification file in PEM format. + The file can contrain several CA certificats. + +: ca_path, ca_path=((|path|)) + Sets path of a CA certification directory containing certifications + in PEM format. + +: verify_mode, verify_mode=((|mode|)) + Sets the flags for server the certification verification at + begining of SSL/TLS session. + OpenSSL::SSL::VERIFY_NONE or OpenSSL::SSL::VERIFY_PEER is acceptable. + +: verify_callback, verify_callback=((|proc|)) + Sets the verify callback for the server certification verification. + +: verify_depth, verify_depth=((|num|)) + Sets the maximum depth for the certificate chain verification. + +: cert_store, cert_store=((|store|)) + Sets the X509::Store to verify peer certificate. + +: ssl_timeout, ssl_timeout=((|sec|)) + Sets the SSL timeout seconds. + +=end + +require 'net/http' +require 'openssl' + +module Net + class HTTP + remove_method :use_ssl? + def use_ssl? + @use_ssl + end + + # Turn on/off SSL. + # This flag must be set before starting session. + # If you change use_ssl value after session started, + # a Net::HTTP object raises IOError. + def use_ssl=(flag) + flag = (flag ? true : false) + if started? and @use_ssl != flag + raise IOError, "use_ssl value changed, but session already started" + end + @use_ssl = flag + end + + SSL_ATTRIBUTES = %w( + ssl_version key cert ca_file ca_path cert_store ciphers + verify_mode verify_callback verify_depth ssl_timeout + ) + attr_accessor(*SSL_ATTRIBUTES) + + def peer_cert + if not use_ssl? or not @socket + return nil + end + @socket.io.peer_cert + end + end +end diff --git a/lib/net/imap.rb b/lib/net/imap.rb new file mode 100644 index 0000000..ea2b598 --- /dev/null +++ b/lib/net/imap.rb @@ -0,0 +1,3500 @@ +# +# = net/imap.rb +# +# Copyright (C) 2000 Shugo Maeda +# +# This library is distributed under the terms of the Ruby license. +# You can freely distribute/modify this library. +# +# Documentation: Shugo Maeda, with RDoc conversion and overview by William +# Webber. +# +# See Net::IMAP for documentation. +# + + +require "socket" +require "monitor" +require "digest/md5" +require "strscan" +begin + require "openssl/ssl" +rescue LoadError +end + +module Net + + # + # Net::IMAP implements Internet Message Access Protocol (IMAP) client + # functionality. The protocol is described in [IMAP]. + # + # == IMAP Overview + # + # An IMAP client connects to a server, and then authenticates + # itself using either #authenticate() or #login(). Having + # authenticated itself, there is a range of commands + # available to it. Most work with mailboxes, which may be + # arranged in an hierarchical namespace, and each of which + # contains zero or more messages. How this is implemented on + # the server is implementation-dependent; on a UNIX server, it + # will frequently be implemented as a files in mailbox format + # within a hierarchy of directories. + # + # To work on the messages within a mailbox, the client must + # first select that mailbox, using either #select() or (for + # read-only access) #examine(). Once the client has successfully + # selected a mailbox, they enter _selected_ state, and that + # mailbox becomes the _current_ mailbox, on which mail-item + # related commands implicitly operate. + # + # Messages have two sorts of identifiers: message sequence + # numbers, and UIDs. + # + # Message sequence numbers number messages within a mail box + # from 1 up to the number of items in the mail box. If new + # message arrives during a session, it receives a sequence + # number equal to the new size of the mail box. If messages + # are expunged from the mailbox, remaining messages have their + # sequence numbers "shuffled down" to fill the gaps. + # + # UIDs, on the other hand, are permanently guaranteed not to + # identify another message within the same mailbox, even if + # the existing message is deleted. UIDs are required to + # be assigned in ascending (but not necessarily sequential) + # order within a mailbox; this means that if a non-IMAP client + # rearranges the order of mailitems within a mailbox, the + # UIDs have to be reassigned. An IMAP client cannot thus + # rearrange message orders. + # + # == Examples of Usage + # + # === List sender and subject of all recent messages in the default mailbox + # + # imap = Net::IMAP.new('mail.example.com') + # imap.authenticate('LOGIN', 'joe_user', 'joes_password') + # imap.examine('INBOX') + # imap.search(["RECENT"]).each do |message_id| + # envelope = imap.fetch(message_id, "ENVELOPE")[0].attr["ENVELOPE"] + # puts "#{envelope.from[0].name}: \t#{envelope.subject}" + # end + # + # === Move all messages from April 2003 from "Mail/sent-mail" to "Mail/sent-apr03" + # + # imap = Net::IMAP.new('mail.example.com') + # imap.authenticate('LOGIN', 'joe_user', 'joes_password') + # imap.select('Mail/sent-mail') + # if not imap.list('Mail/', 'sent-apr03') + # imap.create('Mail/sent-apr03') + # end + # imap.search(["BEFORE", "30-Apr-2003", "SINCE", "1-Apr-2003"]).each do |message_id| + # imap.copy(message_id, "Mail/sent-apr03") + # imap.store(message_id, "+FLAGS", [:Deleted]) + # end + # imap.expunge + # + # == Thread Safety + # + # Net::IMAP supports concurrent threads. For example, + # + # imap = Net::IMAP.new("imap.foo.net", "imap2") + # imap.authenticate("cram-md5", "bar", "password") + # imap.select("inbox") + # fetch_thread = Thread.start { imap.fetch(1..-1, "UID") } + # search_result = imap.search(["BODY", "hello"]) + # fetch_result = fetch_thread.value + # imap.disconnect + # + # This script invokes the FETCH command and the SEARCH command concurrently. + # + # == Errors + # + # An IMAP server can send three different types of responses to indicate + # failure: + # + # NO:: the attempted command could not be successfully completed. For + # instance, the username/password used for logging in are incorrect; + # the selected mailbox does not exists; etc. + # + # BAD:: the request from the client does not follow the server's + # understanding of the IMAP protocol. This includes attempting + # commands from the wrong client state; for instance, attempting + # to perform a SEARCH command without having SELECTed a current + # mailbox. It can also signal an internal server + # failure (such as a disk crash) has occurred. + # + # BYE:: the server is saying goodbye. This can be part of a normal + # logout sequence, and can be used as part of a login sequence + # to indicate that the server is (for some reason) unwilling + # to accept our connection. As a response to any other command, + # it indicates either that the server is shutting down, or that + # the server is timing out the client connection due to inactivity. + # + # These three error response are represented by the errors + # Net::IMAP::NoResponseError, Net::IMAP::BadResponseError, and + # Net::IMAP::ByeResponseError, all of which are subclasses of + # Net::IMAP::ResponseError. Essentially, all methods that involve + # sending a request to the server can generate one of these errors. + # Only the most pertinent instances have been documented below. + # + # Because the IMAP class uses Sockets for communication, its methods + # are also susceptible to the various errors that can occur when + # working with sockets. These are generally represented as + # Errno errors. For instance, any method that involves sending a + # request to the server and/or receiving a response from it could + # raise an Errno::EPIPE error if the network connection unexpectedly + # goes down. See the socket(7), ip(7), tcp(7), socket(2), connect(2), + # and associated man pages. + # + # Finally, a Net::IMAP::DataFormatError is thrown if low-level data + # is found to be in an incorrect format (for instance, when converting + # between UTF-8 and UTF-16), and Net::IMAP::ResponseParseError is + # thrown if a server response is non-parseable. + # + # + # == References + # + # [[IMAP]] + # M. Crispin, "INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1", + # RFC 2060, December 1996. (Note: since obsoleted by RFC 3501) + # + # [[LANGUAGE-TAGS]] + # Alvestrand, H., "Tags for the Identification of + # Languages", RFC 1766, March 1995. + # + # [[MD5]] + # Myers, J., and M. Rose, "The Content-MD5 Header Field", RFC + # 1864, October 1995. + # + # [[MIME-IMB]] + # Freed, N., and N. Borenstein, "MIME (Multipurpose Internet + # Mail Extensions) Part One: Format of Internet Message Bodies", RFC + # 2045, November 1996. + # + # [[RFC-822]] + # Crocker, D., "Standard for the Format of ARPA Internet Text + # Messages", STD 11, RFC 822, University of Delaware, August 1982. + # + # [[RFC-2087]] + # Myers, J., "IMAP4 QUOTA extension", RFC 2087, January 1997. + # + # [[RFC-2086]] + # Myers, J., "IMAP4 ACL extension", RFC 2086, January 1997. + # + # [[RFC-2195]] + # Klensin, J., Catoe, R., and Krumviede, P., "IMAP/POP AUTHorize Extension + # for Simple Challenge/Response", RFC 2195, September 1997. + # + # [[SORT-THREAD-EXT]] + # Crispin, M., "INTERNET MESSAGE ACCESS PROTOCOL - SORT and THREAD + # Extensions", draft-ietf-imapext-sort, May 2003. + # + # [[OSSL]] + # http://www.openssl.org + # + # [[RSSL]] + # http://savannah.gnu.org/projects/rubypki + # + # [[UTF7]] + # Goldsmith, D. and Davis, M., "UTF-7: A Mail-Safe Transformation Format of + # Unicode", RFC 2152, May 1997. + # + class IMAP + include MonitorMixin + if defined?(OpenSSL) + include OpenSSL + include SSL + end + + # Returns an initial greeting response from the server. + attr_reader :greeting + + # Returns recorded untagged responses. For example: + # + # imap.select("inbox") + # p imap.responses["EXISTS"][-1] + # #=> 2 + # p imap.responses["UIDVALIDITY"][-1] + # #=> 968263756 + attr_reader :responses + + # Returns all response handlers. + attr_reader :response_handlers + + # The thread to receive exceptions. + attr_accessor :client_thread + + # Flag indicating a message has been seen + SEEN = :Seen + + # Flag indicating a message has been answered + ANSWERED = :Answered + + # Flag indicating a message has been flagged for special or urgent + # attention + FLAGGED = :Flagged + + # Flag indicating a message has been marked for deletion. This + # will occur when the mailbox is closed or expunged. + DELETED = :Deleted + + # Flag indicating a message is only a draft or work-in-progress version. + DRAFT = :Draft + + # Flag indicating that the message is "recent", meaning that this + # session is the first session in which the client has been notified + # of this message. + RECENT = :Recent + + # Flag indicating that a mailbox context name cannot contain + # children. + NOINFERIORS = :Noinferiors + + # Flag indicating that a mailbox is not selected. + NOSELECT = :Noselect + + # Flag indicating that a mailbox has been marked "interesting" by + # the server; this commonly indicates that the mailbox contains + # new messages. + MARKED = :Marked + + # Flag indicating that the mailbox does not contains new messages. + UNMARKED = :Unmarked + + # Returns the debug mode. + def self.debug + return @@debug + end + + # Sets the debug mode. + def self.debug=(val) + return @@debug = val + end + + # Adds an authenticator for Net::IMAP#authenticate. +auth_type+ + # is the type of authentication this authenticator supports + # (for instance, "LOGIN"). The +authenticator+ is an object + # which defines a process() method to handle authentication with + # the server. See Net::IMAP::LoginAuthenticator, + # Net::IMAP::CramMD5Authenticator, and Net::IMAP::DigestMD5Authenticator + # for examples. + # + # + # If +auth_type+ refers to an existing authenticator, it will be + # replaced by the new one. + def self.add_authenticator(auth_type, authenticator) + @@authenticators[auth_type] = authenticator + end + + # Disconnects from the server. + def disconnect + begin + begin + # try to call SSL::SSLSocket#io. + @sock.io.shutdown + rescue NoMethodError + # @sock is not an SSL::SSLSocket. + @sock.shutdown + end + rescue Errno::ENOTCONN + # ignore `Errno::ENOTCONN: Socket is not connected' on some platforms. + end + @receiver_thread.join + @sock.close + end + + # Returns true if disconnected from the server. + def disconnected? + return @sock.closed? + end + + # Sends a CAPABILITY command, and returns an array of + # capabilities that the server supports. Each capability + # is a string. See [IMAP] for a list of possible + # capabilities. + # + # Note that the Net::IMAP class does not modify its + # behaviour according to the capabilities of the server; + # it is up to the user of the class to ensure that + # a certain capability is supported by a server before + # using it. + def capability + synchronize do + send_command("CAPABILITY") + return @responses.delete("CAPABILITY")[-1] + end + end + + # Sends a NOOP command to the server. It does nothing. + def noop + send_command("NOOP") + end + + # Sends a LOGOUT command to inform the server that the client is + # done with the connection. + def logout + send_command("LOGOUT") + end + + # Sends a STARTTLS command to start TLS session. + def starttls(options = {}, verify = true) + send_command("STARTTLS") do |resp| + if resp.kind_of?(TaggedResponse) && resp.name == "OK" + begin + # for backward compatibility + certs = options.to_str + options = create_ssl_params(certs, verify) + rescue NoMethodError + end + start_tls_session(options) + end + end + end + + # Sends an AUTHENTICATE command to authenticate the client. + # The +auth_type+ parameter is a string that represents + # the authentication mechanism to be used. Currently Net::IMAP + # supports authentication mechanisms: + # + # LOGIN:: login using cleartext user and password. + # CRAM-MD5:: login with cleartext user and encrypted password + # (see [RFC-2195] for a full description). This + # mechanism requires that the server have the user's + # password stored in clear-text password. + # + # For both these mechanisms, there should be two +args+: username + # and (cleartext) password. A server may not support one or other + # of these mechanisms; check #capability() for a capability of + # the form "AUTH=LOGIN" or "AUTH=CRAM-MD5". + # + # Authentication is done using the appropriate authenticator object: + # see @@authenticators for more information on plugging in your own + # authenticator. + # + # For example: + # + # imap.authenticate('LOGIN', user, password) + # + # A Net::IMAP::NoResponseError is raised if authentication fails. + def authenticate(auth_type, *args) + auth_type = auth_type.upcase + unless @@authenticators.has_key?(auth_type) + raise ArgumentError, + format('unknown auth type - "%s"', auth_type) + end + authenticator = @@authenticators[auth_type].new(*args) + send_command("AUTHENTICATE", auth_type) do |resp| + if resp.instance_of?(ContinuationRequest) + data = authenticator.process(resp.data.text.unpack("m")[0]) + s = [data].pack("m").gsub(/\n/, "") + send_string_data(s) + put_string(CRLF) + end + end + end + + # Sends a LOGIN command to identify the client and carries + # the plaintext +password+ authenticating this +user+. Note + # that, unlike calling #authenticate() with an +auth_type+ + # of "LOGIN", #login() does *not* use the login authenticator. + # + # A Net::IMAP::NoResponseError is raised if authentication fails. + def login(user, password) + send_command("LOGIN", user, password) + end + + # Sends a SELECT command to select a +mailbox+ so that messages + # in the +mailbox+ can be accessed. + # + # After you have selected a mailbox, you may retrieve the + # number of items in that mailbox from @responses["EXISTS"][-1], + # and the number of recent messages from @responses["RECENT"][-1]. + # Note that these values can change if new messages arrive + # during a session; see #add_response_handler() for a way of + # detecting this event. + # + # A Net::IMAP::NoResponseError is raised if the mailbox does not + # exist or is for some reason non-selectable. + def select(mailbox) + synchronize do + @responses.clear + send_command("SELECT", mailbox) + end + end + + # Sends a EXAMINE command to select a +mailbox+ so that messages + # in the +mailbox+ can be accessed. Behaves the same as #select(), + # except that the selected +mailbox+ is identified as read-only. + # + # A Net::IMAP::NoResponseError is raised if the mailbox does not + # exist or is for some reason non-examinable. + def examine(mailbox) + synchronize do + @responses.clear + send_command("EXAMINE", mailbox) + end + end + + # Sends a CREATE command to create a new +mailbox+. + # + # A Net::IMAP::NoResponseError is raised if a mailbox with that name + # cannot be created. + def create(mailbox) + send_command("CREATE", mailbox) + end + + # Sends a DELETE command to remove the +mailbox+. + # + # A Net::IMAP::NoResponseError is raised if a mailbox with that name + # cannot be deleted, either because it does not exist or because the + # client does not have permission to delete it. + def delete(mailbox) + send_command("DELETE", mailbox) + end + + # Sends a RENAME command to change the name of the +mailbox+ to + # +newname+. + # + # A Net::IMAP::NoResponseError is raised if a mailbox with the + # name +mailbox+ cannot be renamed to +newname+ for whatever + # reason; for instance, because +mailbox+ does not exist, or + # because there is already a mailbox with the name +newname+. + def rename(mailbox, newname) + send_command("RENAME", mailbox, newname) + end + + # Sends a SUBSCRIBE command to add the specified +mailbox+ name to + # the server's set of "active" or "subscribed" mailboxes as returned + # by #lsub(). + # + # A Net::IMAP::NoResponseError is raised if +mailbox+ cannot be + # subscribed to, for instance because it does not exist. + def subscribe(mailbox) + send_command("SUBSCRIBE", mailbox) + end + + # Sends a UNSUBSCRIBE command to remove the specified +mailbox+ name + # from the server's set of "active" or "subscribed" mailboxes. + # + # A Net::IMAP::NoResponseError is raised if +mailbox+ cannot be + # unsubscribed from, for instance because the client is not currently + # subscribed to it. + def unsubscribe(mailbox) + send_command("UNSUBSCRIBE", mailbox) + end + + # Sends a LIST command, and returns a subset of names from + # the complete set of all names available to the client. + # +refname+ provides a context (for instance, a base directory + # in a directory-based mailbox hierarchy). +mailbox+ specifies + # a mailbox or (via wildcards) mailboxes under that context. + # Two wildcards may be used in +mailbox+: '*', which matches + # all characters *including* the hierarchy delimiter (for instance, + # '/' on a UNIX-hosted directory-based mailbox hierarchy); and '%', + # which matches all characters *except* the hierarchy delimiter. + # + # If +refname+ is empty, +mailbox+ is used directly to determine + # which mailboxes to match. If +mailbox+ is empty, the root + # name of +refname+ and the hierarchy delimiter are returned. + # + # The return value is an array of +Net::IMAP::MailboxList+. For example: + # + # imap.create("foo/bar") + # imap.create("foo/baz") + # p imap.list("", "foo/%") + # #=> [#, \\ + # #, \\ + # #] + def list(refname, mailbox) + synchronize do + send_command("LIST", refname, mailbox) + return @responses.delete("LIST") + end + end + + # Sends the GETQUOTAROOT command along with specified +mailbox+. + # This command is generally available to both admin and user. + # If mailbox exists, returns an array containing objects of + # Net::IMAP::MailboxQuotaRoot and Net::IMAP::MailboxQuota. + def getquotaroot(mailbox) + synchronize do + send_command("GETQUOTAROOT", mailbox) + result = [] + result.concat(@responses.delete("QUOTAROOT")) + result.concat(@responses.delete("QUOTA")) + return result + end + end + + # Sends the GETQUOTA command along with specified +mailbox+. + # If this mailbox exists, then an array containing a + # Net::IMAP::MailboxQuota object is returned. This + # command generally is only available to server admin. + def getquota(mailbox) + synchronize do + send_command("GETQUOTA", mailbox) + return @responses.delete("QUOTA") + end + end + + # Sends a SETQUOTA command along with the specified +mailbox+ and + # +quota+. If +quota+ is nil, then quota will be unset for that + # mailbox. Typically one needs to be logged in as server admin + # for this to work. The IMAP quota commands are described in + # [RFC-2087]. + def setquota(mailbox, quota) + if quota.nil? + data = '()' + else + data = '(STORAGE ' + quota.to_s + ')' + end + send_command("SETQUOTA", mailbox, RawData.new(data)) + end + + # Sends the SETACL command along with +mailbox+, +user+ and the + # +rights+ that user is to have on that mailbox. If +rights+ is nil, + # then that user will be stripped of any rights to that mailbox. + # The IMAP ACL commands are described in [RFC-2086]. + def setacl(mailbox, user, rights) + if rights.nil? + send_command("SETACL", mailbox, user, "") + else + send_command("SETACL", mailbox, user, rights) + end + end + + # Send the GETACL command along with specified +mailbox+. + # If this mailbox exists, an array containing objects of + # Net::IMAP::MailboxACLItem will be returned. + def getacl(mailbox) + synchronize do + send_command("GETACL", mailbox) + return @responses.delete("ACL")[-1] + end + end + + # Sends a LSUB command, and returns a subset of names from the set + # of names that the user has declared as being "active" or + # "subscribed". +refname+ and +mailbox+ are interpreted as + # for #list(). + # The return value is an array of +Net::IMAP::MailboxList+. + def lsub(refname, mailbox) + synchronize do + send_command("LSUB", refname, mailbox) + return @responses.delete("LSUB") + end + end + + # Sends a STATUS command, and returns the status of the indicated + # +mailbox+. +attr+ is a list of one or more attributes that + # we are request the status of. Supported attributes include: + # + # MESSAGES:: the number of messages in the mailbox. + # RECENT:: the number of recent messages in the mailbox. + # UNSEEN:: the number of unseen messages in the mailbox. + # + # The return value is a hash of attributes. For example: + # + # p imap.status("inbox", ["MESSAGES", "RECENT"]) + # #=> {"RECENT"=>0, "MESSAGES"=>44} + # + # A Net::IMAP::NoResponseError is raised if status values + # for +mailbox+ cannot be returned, for instance because it + # does not exist. + def status(mailbox, attr) + synchronize do + send_command("STATUS", mailbox, attr) + return @responses.delete("STATUS")[-1].attr + end + end + + # Sends a APPEND command to append the +message+ to the end of + # the +mailbox+. The optional +flags+ argument is an array of + # flags to initially passing to the new message. The optional + # +date_time+ argument specifies the creation time to assign to the + # new message; it defaults to the current time. + # For example: + # + # imap.append("inbox", <:: a set of message sequence numbers. ',' indicates + # an interval, ':' indicates a range. For instance, + # '2,10:12,15' means "2,10,11,12,15". + # + # BEFORE :: messages with an internal date strictly before + # . The date argument has a format similar + # to 8-Aug-2002. + # + # BODY :: messages that contain within their body. + # + # CC :: messages containing in their CC field. + # + # FROM :: messages that contain in their FROM field. + # + # NEW:: messages with the \Recent, but not the \Seen, flag set. + # + # NOT :: negate the following search key. + # + # OR :: "or" two search keys together. + # + # ON :: messages with an internal date exactly equal to , + # which has a format similar to 8-Aug-2002. + # + # SINCE :: messages with an internal date on or after . + # + # SUBJECT :: messages with in their subject. + # + # TO :: messages with in their TO field. + # + # For example: + # + # p imap.search(["SUBJECT", "hello", "NOT", "NEW"]) + # #=> [1, 6, 7, 8] + def search(keys, charset = nil) + return search_internal("SEARCH", keys, charset) + end + + # As for #search(), but returns unique identifiers. + def uid_search(keys, charset = nil) + return search_internal("UID SEARCH", keys, charset) + end + + # Sends a FETCH command to retrieve data associated with a message + # in the mailbox. The +set+ parameter is a number or an array of + # numbers or a Range object. The number is a message sequence + # number. +attr+ is a list of attributes to fetch; see the + # documentation for Net::IMAP::FetchData for a list of valid + # attributes. + # The return value is an array of Net::IMAP::FetchData. For example: + # + # p imap.fetch(6..8, "UID") + # #=> [#98}>, \\ + # #99}>, \\ + # #100}>] + # p imap.fetch(6, "BODY[HEADER.FIELDS (SUBJECT)]") + # #=> [#"Subject: test\r\n\r\n"}>] + # data = imap.uid_fetch(98, ["RFC822.SIZE", "INTERNALDATE"])[0] + # p data.seqno + # #=> 6 + # p data.attr["RFC822.SIZE"] + # #=> 611 + # p data.attr["INTERNALDATE"] + # #=> "12-Oct-2000 22:40:59 +0900" + # p data.attr["UID"] + # #=> 98 + def fetch(set, attr) + return fetch_internal("FETCH", set, attr) + end + + # As for #fetch(), but +set+ contains unique identifiers. + def uid_fetch(set, attr) + return fetch_internal("UID FETCH", set, attr) + end + + # Sends a STORE command to alter data associated with messages + # in the mailbox, in particular their flags. The +set+ parameter + # is a number or an array of numbers or a Range object. Each number + # is a message sequence number. +attr+ is the name of a data item + # to store: 'FLAGS' means to replace the message's flag list + # with the provided one; '+FLAGS' means to add the provided flags; + # and '-FLAGS' means to remove them. +flags+ is a list of flags. + # + # The return value is an array of Net::IMAP::FetchData. For example: + # + # p imap.store(6..8, "+FLAGS", [:Deleted]) + # #=> [#[:Seen, :Deleted]}>, \\ + # #[:Seen, :Deleted]}>, \\ + # #[:Seen, :Deleted]}>] + def store(set, attr, flags) + return store_internal("STORE", set, attr, flags) + end + + # As for #store(), but +set+ contains unique identifiers. + def uid_store(set, attr, flags) + return store_internal("UID STORE", set, attr, flags) + end + + # Sends a COPY command to copy the specified message(s) to the end + # of the specified destination +mailbox+. The +set+ parameter is + # a number or an array of numbers or a Range object. The number is + # a message sequence number. + def copy(set, mailbox) + copy_internal("COPY", set, mailbox) + end + + # As for #copy(), but +set+ contains unique identifiers. + def uid_copy(set, mailbox) + copy_internal("UID COPY", set, mailbox) + end + + # Sends a SORT command to sort messages in the mailbox. + # Returns an array of message sequence numbers. For example: + # + # p imap.sort(["FROM"], ["ALL"], "US-ASCII") + # #=> [1, 2, 3, 5, 6, 7, 8, 4, 9] + # p imap.sort(["DATE"], ["SUBJECT", "hello"], "US-ASCII") + # #=> [6, 7, 8, 1] + # + # See [SORT-THREAD-EXT] for more details. + def sort(sort_keys, search_keys, charset) + return sort_internal("SORT", sort_keys, search_keys, charset) + end + + # As for #sort(), but returns an array of unique identifiers. + def uid_sort(sort_keys, search_keys, charset) + return sort_internal("UID SORT", sort_keys, search_keys, charset) + end + + # Adds a response handler. For example, to detect when + # the server sends us a new EXISTS response (which normally + # indicates new messages being added to the mail box), + # you could add the following handler after selecting the + # mailbox. + # + # imap.add_response_handler { |resp| + # if resp.kind_of?(Net::IMAP::UntaggedResponse) and resp.name == "EXISTS" + # puts "Mailbox now has #{resp.data} messages" + # end + # } + # + def add_response_handler(handler = Proc.new) + @response_handlers.push(handler) + end + + # Removes the response handler. + def remove_response_handler(handler) + @response_handlers.delete(handler) + end + + # As for #search(), but returns message sequence numbers in threaded + # format, as a Net::IMAP::ThreadMember tree. The supported algorithms + # are: + # + # ORDEREDSUBJECT:: split into single-level threads according to subject, + # ordered by date. + # REFERENCES:: split into threads by parent/child relationships determined + # by which message is a reply to which. + # + # Unlike #search(), +charset+ is a required argument. US-ASCII + # and UTF-8 are sample values. + # + # See [SORT-THREAD-EXT] for more details. + def thread(algorithm, search_keys, charset) + return thread_internal("THREAD", algorithm, search_keys, charset) + end + + # As for #thread(), but returns unique identifiers instead of + # message sequence numbers. + def uid_thread(algorithm, search_keys, charset) + return thread_internal("UID THREAD", algorithm, search_keys, charset) + end + + # Decode a string from modified UTF-7 format to UTF-8. + # + # UTF-7 is a 7-bit encoding of Unicode [UTF7]. IMAP uses a + # slightly modified version of this to encode mailbox names + # containing non-ASCII characters; see [IMAP] section 5.1.3. + # + # Net::IMAP does _not_ automatically encode and decode + # mailbox names to and from utf7. + def self.decode_utf7(s) + return s.gsub(/&(.*?)-/n) { + if $1.empty? + "&" + else + base64 = $1.tr(",", "/") + x = base64.length % 4 + if x > 0 + base64.concat("=" * (4 - x)) + end + base64.unpack("m")[0].unpack("n*").pack("U*") + end + }.force_encoding("UTF-8") + end + + # Encode a string from UTF-8 format to modified UTF-7. + def self.encode_utf7(s) + return s.gsub(/(&)|([^\x20-\x25\x27-\x7e]+)/u) { + if $1 + "&-" + else + base64 = [$&.unpack("U*").pack("n*")].pack("m") + "&" + base64.delete("=\n").tr("/", ",") + "-" + end + }.force_encoding("ASCII-8BIT") + end + + private + + CRLF = "\r\n" # :nodoc: + PORT = 143 # :nodoc: + SSL_PORT = 993 # :nodoc: + + @@debug = false + @@authenticators = {} + + # call-seq: + # Net::IMAP.new(host, options = {}) + # + # Creates a new Net::IMAP object and connects it to the specified + # +host+. + # + # +options+ is an option hash, each key of which is a symbol. + # + # The available options are: + # + # port:: port number (default value is 143 for imap, or 993 for imaps) + # ssl:: if options[:ssl] is true, then an attempt will be made + # to use SSL (now TLS) to connect to the server. For this to work + # OpenSSL [OSSL] and the Ruby OpenSSL [RSSL] extensions need to + # be installed. + # if options[:ssl] is a hash, it's passed to + # OpenSSL::SSL::SSLContext#set_params as parameters. + # + # The most common errors are: + # + # Errno::ECONNREFUSED:: connection refused by +host+ or an intervening + # firewall. + # Errno::ETIMEDOUT:: connection timed out (possibly due to packets + # being dropped by an intervening firewall). + # Errno::ENETUNREACH:: there is no route to that network. + # SocketError:: hostname not known or other socket error. + # Net::IMAP::ByeResponseError:: we connected to the host, but they + # immediately said goodbye to us. + def initialize(host, port_or_options = {}, + usessl = false, certs = nil, verify = true) + super() + @host = host + begin + options = port_or_options.to_hash + rescue NoMethodError + # for backward compatibility + options = {} + options[:port] = port_or_options + if usessl + options[:ssl] = create_ssl_params(certs, verify) + end + end + @port = options[:port] || (options[:ssl] ? SSL_PORT : PORT) + @tag_prefix = "RUBY" + @tagno = 0 + @parser = ResponseParser.new + @sock = TCPSocket.open(@host, @port) + if options[:ssl] + start_tls_session(options[:ssl]) + @usessl = true + else + @usessl = false + end + @responses = Hash.new([].freeze) + @tagged_responses = {} + @response_handlers = [] + @tagged_response_arrival = new_cond + @continuation_request_arrival = new_cond + @logout_command_tag = nil + @debug_output_bol = true + @exception = nil + + @greeting = get_response + if @greeting.name == "BYE" + @sock.close + raise ByeResponseError, @greeting.raw_data + end + + @client_thread = Thread.current + @receiver_thread = Thread.start { + receive_responses + } + end + + def receive_responses + while true + synchronize do + @exception = nil + end + begin + resp = get_response + rescue Exception => e + synchronize do + @sock.close + @exception = e + end + break + end + unless resp + synchronize do + @exception = EOFError.new("end of file reached") + end + break + end + begin + synchronize do + case resp + when TaggedResponse + @tagged_responses[resp.tag] = resp + @tagged_response_arrival.broadcast + if resp.tag == @logout_command_tag + return + end + when UntaggedResponse + record_response(resp.name, resp.data) + if resp.data.instance_of?(ResponseText) && + (code = resp.data.code) + record_response(code.name, code.data) + end + if resp.name == "BYE" && @logout_command_tag.nil? + @sock.close + @exception = ByeResponseError.new(resp.raw_data) + break + end + when ContinuationRequest + @continuation_request_arrival.signal + end + @response_handlers.each do |handler| + handler.call(resp) + end + end + rescue Exception => e + @exception = e + synchronize do + @tagged_response_arrival.broadcast + @continuation_request_arrival.broadcast + end + end + end + synchronize do + @tagged_response_arrival.broadcast + @continuation_request_arrival.broadcast + end + end + + def get_tagged_response(tag, cmd) + until @tagged_responses.key?(tag) + raise @exception if @exception + @tagged_response_arrival.wait + end + resp = @tagged_responses.delete(tag) + case resp.name + when /\A(?:NO)\z/ni + raise NoResponseError, resp.data.text + when /\A(?:BAD)\z/ni + raise BadResponseError, resp.data.text + else + return resp + end + end + + def get_response + buff = "" + while true + s = @sock.gets(CRLF) + break unless s + buff.concat(s) + if /\{(\d+)\}\r\n/n =~ s + s = @sock.read($1.to_i) + buff.concat(s) + else + break + end + end + return nil if buff.length == 0 + if @@debug + $stderr.print(buff.gsub(/^/n, "S: ")) + end + return @parser.parse(buff) + end + + def record_response(name, data) + unless @responses.has_key?(name) + @responses[name] = [] + end + @responses[name].push(data) + end + + def send_command(cmd, *args, &block) + synchronize do + args.each do |i| + validate_data(i) + end + tag = generate_tag + put_string(tag + " " + cmd) + args.each do |i| + put_string(" ") + send_data(i) + end + put_string(CRLF) + if cmd == "LOGOUT" + @logout_command_tag = tag + end + if block + add_response_handler(block) + end + begin + return get_tagged_response(tag, cmd) + ensure + if block + remove_response_handler(block) + end + end + end + end + + def generate_tag + @tagno += 1 + return format("%s%04d", @tag_prefix, @tagno) + end + + def put_string(str) + @sock.print(str) + if @@debug + if @debug_output_bol + $stderr.print("C: ") + end + $stderr.print(str.gsub(/\n(?!\z)/n, "\nC: ")) + if /\r\n\z/n.match(str) + @debug_output_bol = true + else + @debug_output_bol = false + end + end + end + + def validate_data(data) + case data + when nil + when String + when Integer + if data < 0 || data >= 4294967296 + raise DataFormatError, num.to_s + end + when Array + data.each do |i| + validate_data(i) + end + when Time + when Symbol + else + data.validate + end + end + + def send_data(data) + case data + when nil + put_string("NIL") + when String + send_string_data(data) + when Integer + send_number_data(data) + when Array + send_list_data(data) + when Time + send_time_data(data) + when Symbol + send_symbol_data(data) + else + data.send_data(self) + end + end + + def send_string_data(str) + case str + when "" + put_string('""') + when /[\x80-\xff\r\n]/n + # literal + send_literal(str) + when /[(){ \x00-\x1f\x7f%*"\\]/n + # quoted string + send_quoted_string(str) + else + put_string(str) + end + end + + def send_quoted_string(str) + put_string('"' + str.gsub(/["\\]/n, "\\\\\\&") + '"') + end + + def send_literal(str) + put_string("{" + str.length.to_s + "}" + CRLF) + @continuation_request_arrival.wait + raise @exception if @exception + put_string(str) + end + + def send_number_data(num) + put_string(num.to_s) + end + + def send_list_data(list) + put_string("(") + first = true + list.each do |i| + if first + first = false + else + put_string(" ") + end + send_data(i) + end + put_string(")") + end + + DATE_MONTH = %w(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) + + def send_time_data(time) + t = time.dup.gmtime + s = format('"%2d-%3s-%4d %02d:%02d:%02d +0000"', + t.day, DATE_MONTH[t.month - 1], t.year, + t.hour, t.min, t.sec) + put_string(s) + end + + def send_symbol_data(symbol) + put_string("\\" + symbol.to_s) + end + + def search_internal(cmd, keys, charset) + if keys.instance_of?(String) + keys = [RawData.new(keys)] + else + normalize_searching_criteria(keys) + end + synchronize do + if charset + send_command(cmd, "CHARSET", charset, *keys) + else + send_command(cmd, *keys) + end + return @responses.delete("SEARCH")[-1] + end + end + + def fetch_internal(cmd, set, attr) + if attr.instance_of?(String) + attr = RawData.new(attr) + end + synchronize do + @responses.delete("FETCH") + send_command(cmd, MessageSet.new(set), attr) + return @responses.delete("FETCH") + end + end + + def store_internal(cmd, set, attr, flags) + if attr.instance_of?(String) + attr = RawData.new(attr) + end + synchronize do + @responses.delete("FETCH") + send_command(cmd, MessageSet.new(set), attr, flags) + return @responses.delete("FETCH") + end + end + + def copy_internal(cmd, set, mailbox) + send_command(cmd, MessageSet.new(set), mailbox) + end + + def sort_internal(cmd, sort_keys, search_keys, charset) + if search_keys.instance_of?(String) + search_keys = [RawData.new(search_keys)] + else + normalize_searching_criteria(search_keys) + end + normalize_searching_criteria(search_keys) + synchronize do + send_command(cmd, sort_keys, charset, *search_keys) + return @responses.delete("SORT")[-1] + end + end + + def thread_internal(cmd, algorithm, search_keys, charset) + if search_keys.instance_of?(String) + search_keys = [RawData.new(search_keys)] + else + normalize_searching_criteria(search_keys) + end + normalize_searching_criteria(search_keys) + send_command(cmd, algorithm, charset, *search_keys) + return @responses.delete("THREAD")[-1] + end + + def normalize_searching_criteria(keys) + keys.collect! do |i| + case i + when -1, Range, Array + MessageSet.new(i) + else + i + end + end + end + + def create_ssl_params(certs = nil, verify = true) + params = {} + if certs + if File.file?(certs) + params[:ca_file] = certs + elsif File.directory?(certs) + params[:ca_path] = certs + end + end + if verify + params[:verify_mode] = VERIFY_PEER + else + params[:verify_mode] = VERIFY_NONE + end + return params + end + + def start_tls_session(params = {}) + unless defined?(OpenSSL) + raise "SSL extension not installed" + end + if @sock.kind_of?(OpenSSL::SSL::SSLSocket) + raise RuntimeError, "already using SSL" + end + begin + params = params.to_hash + rescue NoMethodError + params = {} + end + context = SSLContext.new + context.set_params(params) + if defined?(VerifyCallbackProc) + context.verify_callback = VerifyCallbackProc + end + @sock = SSLSocket.new(@sock, context) + @sock.sync_close = true + @sock.connect + if context.verify_mode != VERIFY_NONE + @sock.post_connection_check(@host) + end + end + + class RawData # :nodoc: + def send_data(imap) + imap.send(:put_string, @data) + end + + def validate + end + + private + + def initialize(data) + @data = data + end + end + + class Atom # :nodoc: + def send_data(imap) + imap.send(:put_string, @data) + end + + def validate + end + + private + + def initialize(data) + @data = data + end + end + + class QuotedString # :nodoc: + def send_data(imap) + imap.send(:send_quoted_string, @data) + end + + def validate + end + + private + + def initialize(data) + @data = data + end + end + + class Literal # :nodoc: + def send_data(imap) + imap.send(:send_literal, @data) + end + + def validate + end + + private + + def initialize(data) + @data = data + end + end + + class MessageSet # :nodoc: + def send_data(imap) + imap.send(:put_string, format_internal(@data)) + end + + def validate + validate_internal(@data) + end + + private + + def initialize(data) + @data = data + end + + def format_internal(data) + case data + when "*" + return data + when Integer + if data == -1 + return "*" + else + return data.to_s + end + when Range + return format_internal(data.first) + + ":" + format_internal(data.last) + when Array + return data.collect {|i| format_internal(i)}.join(",") + when ThreadMember + return data.seqno.to_s + + ":" + data.children.collect {|i| format_internal(i).join(",")} + end + end + + def validate_internal(data) + case data + when "*" + when Integer + ensure_nz_number(data) + when Range + when Array + data.each do |i| + validate_internal(i) + end + when ThreadMember + data.children.each do |i| + validate_internal(i) + end + else + raise DataFormatError, data.inspect + end + end + + def ensure_nz_number(num) + if num < -1 || num == 0 || num >= 4294967296 + msg = "nz_number must be non-zero unsigned 32-bit integer: " + + num.inspect + raise DataFormatError, msg + end + end + end + + # Net::IMAP::ContinuationRequest represents command continuation requests. + # + # The command continuation request response is indicated by a "+" token + # instead of a tag. This form of response indicates that the server is + # ready to accept the continuation of a command from the client. The + # remainder of this response is a line of text. + # + # continue_req ::= "+" SPACE (resp_text / base64) + # + # ==== Fields: + # + # data:: Returns the data (Net::IMAP::ResponseText). + # + # raw_data:: Returns the raw data string. + ContinuationRequest = Struct.new(:data, :raw_data) + + # Net::IMAP::UntaggedResponse represents untagged responses. + # + # Data transmitted by the server to the client and status responses + # that do not indicate command completion are prefixed with the token + # "*", and are called untagged responses. + # + # response_data ::= "*" SPACE (resp_cond_state / resp_cond_bye / + # mailbox_data / message_data / capability_data) + # + # ==== Fields: + # + # name:: Returns the name such as "FLAGS", "LIST", "FETCH".... + # + # data:: Returns the data such as an array of flag symbols, + # a (()) object.... + # + # raw_data:: Returns the raw data string. + UntaggedResponse = Struct.new(:name, :data, :raw_data) + + # Net::IMAP::TaggedResponse represents tagged responses. + # + # The server completion result response indicates the success or + # failure of the operation. It is tagged with the same tag as the + # client command which began the operation. + # + # response_tagged ::= tag SPACE resp_cond_state CRLF + # + # tag ::= 1* + # + # resp_cond_state ::= ("OK" / "NO" / "BAD") SPACE resp_text + # + # ==== Fields: + # + # tag:: Returns the tag. + # + # name:: Returns the name. the name is one of "OK", "NO", "BAD". + # + # data:: Returns the data. See (()). + # + # raw_data:: Returns the raw data string. + # + TaggedResponse = Struct.new(:tag, :name, :data, :raw_data) + + # Net::IMAP::ResponseText represents texts of responses. + # The text may be prefixed by the response code. + # + # resp_text ::= ["[" resp_text_code "]" SPACE] (text_mime2 / text) + # ;; text SHOULD NOT begin with "[" or "=" + # + # ==== Fields: + # + # code:: Returns the response code. See (()). + # + # text:: Returns the text. + # + ResponseText = Struct.new(:code, :text) + + # + # Net::IMAP::ResponseCode represents response codes. + # + # resp_text_code ::= "ALERT" / "PARSE" / + # "PERMANENTFLAGS" SPACE "(" #(flag / "\*") ")" / + # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" / + # "UIDVALIDITY" SPACE nz_number / + # "UNSEEN" SPACE nz_number / + # atom [SPACE 1*] + # + # ==== Fields: + # + # name:: Returns the name such as "ALERT", "PERMANENTFLAGS", "UIDVALIDITY".... + # + # data:: Returns the data if it exists. + # + ResponseCode = Struct.new(:name, :data) + + # Net::IMAP::MailboxList represents contents of the LIST response. + # + # mailbox_list ::= "(" #("\Marked" / "\Noinferiors" / + # "\Noselect" / "\Unmarked" / flag_extension) ")" + # SPACE (<"> QUOTED_CHAR <"> / nil) SPACE mailbox + # + # ==== Fields: + # + # attr:: Returns the name attributes. Each name attribute is a symbol + # capitalized by String#capitalize, such as :Noselect (not :NoSelect). + # + # delim:: Returns the hierarchy delimiter + # + # name:: Returns the mailbox name. + # + MailboxList = Struct.new(:attr, :delim, :name) + + # Net::IMAP::MailboxQuota represents contents of GETQUOTA response. + # This object can also be a response to GETQUOTAROOT. In the syntax + # specification below, the delimiter used with the "#" construct is a + # single space (SPACE). + # + # quota_list ::= "(" #quota_resource ")" + # + # quota_resource ::= atom SPACE number SPACE number + # + # quota_response ::= "QUOTA" SPACE astring SPACE quota_list + # + # ==== Fields: + # + # mailbox:: The mailbox with the associated quota. + # + # usage:: Current storage usage of mailbox. + # + # quota:: Quota limit imposed on mailbox. + # + MailboxQuota = Struct.new(:mailbox, :usage, :quota) + + # Net::IMAP::MailboxQuotaRoot represents part of the GETQUOTAROOT + # response. (GETQUOTAROOT can also return Net::IMAP::MailboxQuota.) + # + # quotaroot_response ::= "QUOTAROOT" SPACE astring *(SPACE astring) + # + # ==== Fields: + # + # mailbox:: The mailbox with the associated quota. + # + # quotaroots:: Zero or more quotaroots that effect the quota on the + # specified mailbox. + # + MailboxQuotaRoot = Struct.new(:mailbox, :quotaroots) + + # Net::IMAP::MailboxACLItem represents response from GETACL. + # + # acl_data ::= "ACL" SPACE mailbox *(SPACE identifier SPACE rights) + # + # identifier ::= astring + # + # rights ::= astring + # + # ==== Fields: + # + # user:: Login name that has certain rights to the mailbox + # that was specified with the getacl command. + # + # rights:: The access rights the indicated user has to the + # mailbox. + # + MailboxACLItem = Struct.new(:user, :rights) + + # Net::IMAP::StatusData represents contents of the STATUS response. + # + # ==== Fields: + # + # mailbox:: Returns the mailbox name. + # + # attr:: Returns a hash. Each key is one of "MESSAGES", "RECENT", "UIDNEXT", + # "UIDVALIDITY", "UNSEEN". Each value is a number. + # + StatusData = Struct.new(:mailbox, :attr) + + # Net::IMAP::FetchData represents contents of the FETCH response. + # + # ==== Fields: + # + # seqno:: Returns the message sequence number. + # (Note: not the unique identifier, even for the UID command response.) + # + # attr:: Returns a hash. Each key is a data item name, and each value is + # its value. + # + # The current data items are: + # + # [BODY] + # A form of BODYSTRUCTURE without extension data. + # [BODY[
]<>] + # A string expressing the body contents of the specified section. + # [BODYSTRUCTURE] + # An object that describes the [MIME-IMB] body structure of a message. + # See Net::IMAP::BodyTypeBasic, Net::IMAP::BodyTypeText, + # Net::IMAP::BodyTypeMessage, Net::IMAP::BodyTypeMultipart. + # [ENVELOPE] + # A Net::IMAP::Envelope object that describes the envelope + # structure of a message. + # [FLAGS] + # A array of flag symbols that are set for this message. flag symbols + # are capitalized by String#capitalize. + # [INTERNALDATE] + # A string representing the internal date of the message. + # [RFC822] + # Equivalent to BODY[]. + # [RFC822.HEADER] + # Equivalent to BODY.PEEK[HEADER]. + # [RFC822.SIZE] + # A number expressing the [RFC-822] size of the message. + # [RFC822.TEXT] + # Equivalent to BODY[TEXT]. + # [UID] + # A number expressing the unique identifier of the message. + # + FetchData = Struct.new(:seqno, :attr) + + # Net::IMAP::Envelope represents envelope structures of messages. + # + # ==== Fields: + # + # date:: Returns a string that represents the date. + # + # subject:: Returns a string that represents the subject. + # + # from:: Returns an array of Net::IMAP::Address that represents the from. + # + # sender:: Returns an array of Net::IMAP::Address that represents the sender. + # + # reply_to:: Returns an array of Net::IMAP::Address that represents the reply-to. + # + # to:: Returns an array of Net::IMAP::Address that represents the to. + # + # cc:: Returns an array of Net::IMAP::Address that represents the cc. + # + # bcc:: Returns an array of Net::IMAP::Address that represents the bcc. + # + # in_reply_to:: Returns a string that represents the in-reply-to. + # + # message_id:: Returns a string that represents the message-id. + # + Envelope = Struct.new(:date, :subject, :from, :sender, :reply_to, + :to, :cc, :bcc, :in_reply_to, :message_id) + + # + # Net::IMAP::Address represents electronic mail addresses. + # + # ==== Fields: + # + # name:: Returns the phrase from [RFC-822] mailbox. + # + # route:: Returns the route from [RFC-822] route-addr. + # + # mailbox:: nil indicates end of [RFC-822] group. + # If non-nil and host is nil, returns [RFC-822] group name. + # Otherwise, returns [RFC-822] local-part + # + # host:: nil indicates [RFC-822] group syntax. + # Otherwise, returns [RFC-822] domain name. + # + Address = Struct.new(:name, :route, :mailbox, :host) + + # + # Net::IMAP::ContentDisposition represents Content-Disposition fields. + # + # ==== Fields: + # + # dsp_type:: Returns the disposition type. + # + # param:: Returns a hash that represents parameters of the Content-Disposition + # field. + # + ContentDisposition = Struct.new(:dsp_type, :param) + + # Net::IMAP::ThreadMember represents a thread-node returned + # by Net::IMAP#thread + # + # ==== Fields: + # + # seqno:: The sequence number of this message. + # + # children:: an array of Net::IMAP::ThreadMember objects for mail + # items that are children of this in the thread. + # + ThreadMember = Struct.new(:seqno, :children) + + # Net::IMAP::BodyTypeBasic represents basic body structures of messages. + # + # ==== Fields: + # + # media_type:: Returns the content media type name as defined in [MIME-IMB]. + # + # subtype:: Returns the content subtype name as defined in [MIME-IMB]. + # + # param:: Returns a hash that represents parameters as defined in [MIME-IMB]. + # + # content_id:: Returns a string giving the content id as defined in [MIME-IMB]. + # + # description:: Returns a string giving the content description as defined in + # [MIME-IMB]. + # + # encoding:: Returns a string giving the content transfer encoding as defined in + # [MIME-IMB]. + # + # size:: Returns a number giving the size of the body in octets. + # + # md5:: Returns a string giving the body MD5 value as defined in [MD5]. + # + # disposition:: Returns a Net::IMAP::ContentDisposition object giving + # the content disposition. + # + # language:: Returns a string or an array of strings giving the body + # language value as defined in [LANGUAGE-TAGS]. + # + # extension:: Returns extension data. + # + # multipart?:: Returns false. + # + class BodyTypeBasic < Struct.new(:media_type, :subtype, + :param, :content_id, + :description, :encoding, :size, + :md5, :disposition, :language, + :extension) + def multipart? + return false + end + + # Obsolete: use +subtype+ instead. Calling this will + # generate a warning message to +stderr+, then return + # the value of +subtype+. + def media_subtype + $stderr.printf("warning: media_subtype is obsolete.\n") + $stderr.printf(" use subtype instead.\n") + return subtype + end + end + + # Net::IMAP::BodyTypeText represents TEXT body structures of messages. + # + # ==== Fields: + # + # lines:: Returns the size of the body in text lines. + # + # And Net::IMAP::BodyTypeText has all fields of Net::IMAP::BodyTypeBasic. + # + class BodyTypeText < Struct.new(:media_type, :subtype, + :param, :content_id, + :description, :encoding, :size, + :lines, + :md5, :disposition, :language, + :extension) + def multipart? + return false + end + + # Obsolete: use +subtype+ instead. Calling this will + # generate a warning message to +stderr+, then return + # the value of +subtype+. + def media_subtype + $stderr.printf("warning: media_subtype is obsolete.\n") + $stderr.printf(" use subtype instead.\n") + return subtype + end + end + + # Net::IMAP::BodyTypeMessage represents MESSAGE/RFC822 body structures of messages. + # + # ==== Fields: + # + # envelope:: Returns a Net::IMAP::Envelope giving the envelope structure. + # + # body:: Returns an object giving the body structure. + # + # And Net::IMAP::BodyTypeMessage has all methods of Net::IMAP::BodyTypeText. + # + class BodyTypeMessage < Struct.new(:media_type, :subtype, + :param, :content_id, + :description, :encoding, :size, + :envelope, :body, :lines, + :md5, :disposition, :language, + :extension) + def multipart? + return false + end + + # Obsolete: use +subtype+ instead. Calling this will + # generate a warning message to +stderr+, then return + # the value of +subtype+. + def media_subtype + $stderr.printf("warning: media_subtype is obsolete.\n") + $stderr.printf(" use subtype instead.\n") + return subtype + end + end + + # Net::IMAP::BodyTypeMultipart represents multipart body structures + # of messages. + # + # ==== Fields: + # + # media_type:: Returns the content media type name as defined in [MIME-IMB]. + # + # subtype:: Returns the content subtype name as defined in [MIME-IMB]. + # + # parts:: Returns multiple parts. + # + # param:: Returns a hash that represents parameters as defined in [MIME-IMB]. + # + # disposition:: Returns a Net::IMAP::ContentDisposition object giving + # the content disposition. + # + # language:: Returns a string or an array of strings giving the body + # language value as defined in [LANGUAGE-TAGS]. + # + # extension:: Returns extension data. + # + # multipart?:: Returns true. + # + class BodyTypeMultipart < Struct.new(:media_type, :subtype, + :parts, + :param, :disposition, :language, + :extension) + def multipart? + return true + end + + # Obsolete: use +subtype+ instead. Calling this will + # generate a warning message to +stderr+, then return + # the value of +subtype+. + def media_subtype + $stderr.printf("warning: media_subtype is obsolete.\n") + $stderr.printf(" use subtype instead.\n") + return subtype + end + end + + class ResponseParser # :nodoc: + def parse(str) + @str = str + @pos = 0 + @lex_state = EXPR_BEG + @token = nil + return response + end + + private + + EXPR_BEG = :EXPR_BEG + EXPR_DATA = :EXPR_DATA + EXPR_TEXT = :EXPR_TEXT + EXPR_RTEXT = :EXPR_RTEXT + EXPR_CTEXT = :EXPR_CTEXT + + T_SPACE = :SPACE + T_NIL = :NIL + T_NUMBER = :NUMBER + T_ATOM = :ATOM + T_QUOTED = :QUOTED + T_LPAR = :LPAR + T_RPAR = :RPAR + T_BSLASH = :BSLASH + T_STAR = :STAR + T_LBRA = :LBRA + T_RBRA = :RBRA + T_LITERAL = :LITERAL + T_PLUS = :PLUS + T_PERCENT = :PERCENT + T_CRLF = :CRLF + T_EOF = :EOF + T_TEXT = :TEXT + + BEG_REGEXP = /\G(?:\ +(?# 1: SPACE )( +)|\ +(?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\ +(?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\ +(?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\ +(?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\ +(?# 6: LPAR )(\()|\ +(?# 7: RPAR )(\))|\ +(?# 8: BSLASH )(\\)|\ +(?# 9: STAR )(\*)|\ +(?# 10: LBRA )(\[)|\ +(?# 11: RBRA )(\])|\ +(?# 12: LITERAL )\{(\d+)\}\r\n|\ +(?# 13: PLUS )(\+)|\ +(?# 14: PERCENT )(%)|\ +(?# 15: CRLF )(\r\n)|\ +(?# 16: EOF )(\z))/ni + + DATA_REGEXP = /\G(?:\ +(?# 1: SPACE )( )|\ +(?# 2: NIL )(NIL)|\ +(?# 3: NUMBER )(\d+)|\ +(?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\ +(?# 5: LITERAL )\{(\d+)\}\r\n|\ +(?# 6: LPAR )(\()|\ +(?# 7: RPAR )(\)))/ni + + TEXT_REGEXP = /\G(?:\ +(?# 1: TEXT )([^\x00\r\n]*))/ni + + RTEXT_REGEXP = /\G(?:\ +(?# 1: LBRA )(\[)|\ +(?# 2: TEXT )([^\x00\r\n]*))/ni + + CTEXT_REGEXP = /\G(?:\ +(?# 1: TEXT )([^\x00\r\n\]]*))/ni + + Token = Struct.new(:symbol, :value) + + def response + token = lookahead + case token.symbol + when T_PLUS + result = continue_req + when T_STAR + result = response_untagged + else + result = response_tagged + end + match(T_CRLF) + match(T_EOF) + return result + end + + def continue_req + match(T_PLUS) + match(T_SPACE) + return ContinuationRequest.new(resp_text, @str) + end + + def response_untagged + match(T_STAR) + match(T_SPACE) + token = lookahead + if token.symbol == T_NUMBER + return numeric_response + elsif token.symbol == T_ATOM + case token.value + when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni + return response_cond + when /\A(?:FLAGS)\z/ni + return flags_response + when /\A(?:LIST|LSUB)\z/ni + return list_response + when /\A(?:QUOTA)\z/ni + return getquota_response + when /\A(?:QUOTAROOT)\z/ni + return getquotaroot_response + when /\A(?:ACL)\z/ni + return getacl_response + when /\A(?:SEARCH|SORT)\z/ni + return search_response + when /\A(?:THREAD)\z/ni + return thread_response + when /\A(?:STATUS)\z/ni + return status_response + when /\A(?:CAPABILITY)\z/ni + return capability_response + else + return text_response + end + else + parse_error("unexpected token %s", token.symbol) + end + end + + def response_tagged + tag = atom + match(T_SPACE) + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return TaggedResponse.new(tag, name, resp_text, @str) + end + + def response_cond + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return UntaggedResponse.new(name, resp_text, @str) + end + + def numeric_response + n = number + match(T_SPACE) + token = match(T_ATOM) + name = token.value.upcase + case name + when "EXISTS", "RECENT", "EXPUNGE" + return UntaggedResponse.new(name, n, @str) + when "FETCH" + shift_token + match(T_SPACE) + data = FetchData.new(n, msg_att) + return UntaggedResponse.new(name, data, @str) + end + end + + def msg_att + match(T_LPAR) + attr = {} + while true + token = lookahead + case token.symbol + when T_RPAR + shift_token + break + when T_SPACE + shift_token + token = lookahead + end + case token.value + when /\A(?:ENVELOPE)\z/ni + name, val = envelope_data + when /\A(?:FLAGS)\z/ni + name, val = flags_data + when /\A(?:INTERNALDATE)\z/ni + name, val = internaldate_data + when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni + name, val = rfc822_text + when /\A(?:RFC822\.SIZE)\z/ni + name, val = rfc822_size + when /\A(?:BODY(?:STRUCTURE)?)\z/ni + name, val = body_data + when /\A(?:UID)\z/ni + name, val = uid_data + else + parse_error("unknown attribute `%s'", token.value) + end + attr[name] = val + end + return attr + end + + def envelope_data + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return name, envelope + end + + def envelope + @lex_state = EXPR_DATA + token = lookahead + if token.symbol == T_NIL + shift_token + result = nil + else + match(T_LPAR) + date = nstring + match(T_SPACE) + subject = nstring + match(T_SPACE) + from = address_list + match(T_SPACE) + sender = address_list + match(T_SPACE) + reply_to = address_list + match(T_SPACE) + to = address_list + match(T_SPACE) + cc = address_list + match(T_SPACE) + bcc = address_list + match(T_SPACE) + in_reply_to = nstring + match(T_SPACE) + message_id = nstring + match(T_RPAR) + result = Envelope.new(date, subject, from, sender, reply_to, + to, cc, bcc, in_reply_to, message_id) + end + @lex_state = EXPR_BEG + return result + end + + def flags_data + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return name, flag_list + end + + def internaldate_data + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + token = match(T_QUOTED) + return name, token.value + end + + def rfc822_text + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return name, nstring + end + + def rfc822_size + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return name, number + end + + def body_data + token = match(T_ATOM) + name = token.value.upcase + token = lookahead + if token.symbol == T_SPACE + shift_token + return name, body + end + name.concat(section) + token = lookahead + if token.symbol == T_ATOM + name.concat(token.value) + shift_token + end + match(T_SPACE) + data = nstring + return name, data + end + + def body + @lex_state = EXPR_DATA + token = lookahead + if token.symbol == T_NIL + shift_token + result = nil + else + match(T_LPAR) + token = lookahead + if token.symbol == T_LPAR + result = body_type_mpart + else + result = body_type_1part + end + match(T_RPAR) + end + @lex_state = EXPR_BEG + return result + end + + def body_type_1part + token = lookahead + case token.value + when /\A(?:TEXT)\z/ni + return body_type_text + when /\A(?:MESSAGE)\z/ni + return body_type_msg + else + return body_type_basic + end + end + + def body_type_basic + mtype, msubtype = media_type + token = lookahead + if token.symbol == T_RPAR + return BodyTypeBasic.new(mtype, msubtype) + end + match(T_SPACE) + param, content_id, desc, enc, size = body_fields + md5, disposition, language, extension = body_ext_1part + return BodyTypeBasic.new(mtype, msubtype, + param, content_id, + desc, enc, size, + md5, disposition, language, extension) + end + + def body_type_text + mtype, msubtype = media_type + match(T_SPACE) + param, content_id, desc, enc, size = body_fields + match(T_SPACE) + lines = number + md5, disposition, language, extension = body_ext_1part + return BodyTypeText.new(mtype, msubtype, + param, content_id, + desc, enc, size, + lines, + md5, disposition, language, extension) + end + + def body_type_msg + mtype, msubtype = media_type + match(T_SPACE) + param, content_id, desc, enc, size = body_fields + match(T_SPACE) + env = envelope + match(T_SPACE) + b = body + match(T_SPACE) + lines = number + md5, disposition, language, extension = body_ext_1part + return BodyTypeMessage.new(mtype, msubtype, + param, content_id, + desc, enc, size, + env, b, lines, + md5, disposition, language, extension) + end + + def body_type_mpart + parts = [] + while true + token = lookahead + if token.symbol == T_SPACE + shift_token + break + end + parts.push(body) + end + mtype = "MULTIPART" + msubtype = case_insensitive_string + param, disposition, language, extension = body_ext_mpart + return BodyTypeMultipart.new(mtype, msubtype, parts, + param, disposition, language, + extension) + end + + def media_type + mtype = case_insensitive_string + match(T_SPACE) + msubtype = case_insensitive_string + return mtype, msubtype + end + + def body_fields + param = body_fld_param + match(T_SPACE) + content_id = nstring + match(T_SPACE) + desc = nstring + match(T_SPACE) + enc = case_insensitive_string + match(T_SPACE) + size = number + return param, content_id, desc, enc, size + end + + def body_fld_param + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + end + match(T_LPAR) + param = {} + while true + token = lookahead + case token.symbol + when T_RPAR + shift_token + break + when T_SPACE + shift_token + end + name = case_insensitive_string + match(T_SPACE) + val = string + param[name] = val + end + return param + end + + def body_ext_1part + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return nil + end + md5 = nstring + + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return md5 + end + disposition = body_fld_dsp + + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return md5, disposition + end + language = body_fld_lang + + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return md5, disposition, language + end + + extension = body_extensions + return md5, disposition, language, extension + end + + def body_ext_mpart + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return nil + end + param = body_fld_param + + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return param + end + disposition = body_fld_dsp + match(T_SPACE) + language = body_fld_lang + + token = lookahead + if token.symbol == T_SPACE + shift_token + else + return param, disposition, language + end + + extension = body_extensions + return param, disposition, language, extension + end + + def body_fld_dsp + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + end + match(T_LPAR) + dsp_type = case_insensitive_string + match(T_SPACE) + param = body_fld_param + match(T_RPAR) + return ContentDisposition.new(dsp_type, param) + end + + def body_fld_lang + token = lookahead + if token.symbol == T_LPAR + shift_token + result = [] + while true + token = lookahead + case token.symbol + when T_RPAR + shift_token + return result + when T_SPACE + shift_token + end + result.push(case_insensitive_string) + end + else + lang = nstring + if lang + return lang.upcase + else + return lang + end + end + end + + def body_extensions + result = [] + while true + token = lookahead + case token.symbol + when T_RPAR + return result + when T_SPACE + shift_token + end + result.push(body_extension) + end + end + + def body_extension + token = lookahead + case token.symbol + when T_LPAR + shift_token + result = body_extensions + match(T_RPAR) + return result + when T_NUMBER + return number + else + return nstring + end + end + + def section + str = "" + token = match(T_LBRA) + str.concat(token.value) + token = match(T_ATOM, T_NUMBER, T_RBRA) + if token.symbol == T_RBRA + str.concat(token.value) + return str + end + str.concat(token.value) + token = lookahead + if token.symbol == T_SPACE + shift_token + str.concat(token.value) + token = match(T_LPAR) + str.concat(token.value) + while true + token = lookahead + case token.symbol + when T_RPAR + str.concat(token.value) + shift_token + break + when T_SPACE + shift_token + str.concat(token.value) + end + str.concat(format_string(astring)) + end + end + token = match(T_RBRA) + str.concat(token.value) + return str + end + + def format_string(str) + case str + when "" + return '""' + when /[\x80-\xff\r\n]/n + # literal + return "{" + str.length.to_s + "}" + CRLF + str + when /[(){ \x00-\x1f\x7f%*"\\]/n + # quoted string + return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"' + else + # atom + return str + end + end + + def uid_data + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return name, number + end + + def text_response + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + @lex_state = EXPR_TEXT + token = match(T_TEXT) + @lex_state = EXPR_BEG + return UntaggedResponse.new(name, token.value) + end + + def flags_response + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return UntaggedResponse.new(name, flag_list, @str) + end + + def list_response + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + return UntaggedResponse.new(name, mailbox_list, @str) + end + + def mailbox_list + attr = flag_list + match(T_SPACE) + token = match(T_QUOTED, T_NIL) + if token.symbol == T_NIL + delim = nil + else + delim = token.value + end + match(T_SPACE) + name = astring + return MailboxList.new(attr, delim, name) + end + + def getquota_response + # If quota never established, get back + # `NO Quota root does not exist'. + # If quota removed, get `()' after the + # folder spec with no mention of `STORAGE'. + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + mailbox = astring + match(T_SPACE) + match(T_LPAR) + token = lookahead + case token.symbol + when T_RPAR + shift_token + data = MailboxQuota.new(mailbox, nil, nil) + return UntaggedResponse.new(name, data, @str) + when T_ATOM + shift_token + match(T_SPACE) + token = match(T_NUMBER) + usage = token.value + match(T_SPACE) + token = match(T_NUMBER) + quota = token.value + match(T_RPAR) + data = MailboxQuota.new(mailbox, usage, quota) + return UntaggedResponse.new(name, data, @str) + else + parse_error("unexpected token %s", token.symbol) + end + end + + def getquotaroot_response + # Similar to getquota, but only admin can use getquota. + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + mailbox = astring + quotaroots = [] + while true + token = lookahead + break unless token.symbol == T_SPACE + shift_token + quotaroots.push(astring) + end + data = MailboxQuotaRoot.new(mailbox, quotaroots) + return UntaggedResponse.new(name, data, @str) + end + + def getacl_response + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + mailbox = astring + data = [] + token = lookahead + if token.symbol == T_SPACE + shift_token + while true + token = lookahead + case token.symbol + when T_CRLF + break + when T_SPACE + shift_token + end + user = astring + match(T_SPACE) + rights = astring + ##XXX data.push([user, rights]) + data.push(MailboxACLItem.new(user, rights)) + end + end + return UntaggedResponse.new(name, data, @str) + end + + def search_response + token = match(T_ATOM) + name = token.value.upcase + token = lookahead + if token.symbol == T_SPACE + shift_token + data = [] + while true + token = lookahead + case token.symbol + when T_CRLF + break + when T_SPACE + shift_token + end + data.push(number) + end + else + data = [] + end + return UntaggedResponse.new(name, data, @str) + end + + def thread_response + token = match(T_ATOM) + name = token.value.upcase + token = lookahead + + if token.symbol == T_SPACE + threads = [] + + while true + shift_token + token = lookahead + + case token.symbol + when T_LPAR + threads << thread_branch(token) + when T_CRLF + break + end + end + else + # no member + threads = [] + end + + return UntaggedResponse.new(name, threads, @str) + end + + def thread_branch(token) + rootmember = nil + lastmember = nil + + while true + shift_token # ignore first T_LPAR + token = lookahead + + case token.symbol + when T_NUMBER + # new member + newmember = ThreadMember.new(number, []) + if rootmember.nil? + rootmember = newmember + else + lastmember.children << newmember + end + lastmember = newmember + when T_SPACE + # do nothing + when T_LPAR + if rootmember.nil? + # dummy member + lastmember = rootmember = ThreadMember.new(nil, []) + end + + lastmember.children << thread_branch(token) + when T_RPAR + break + end + end + + return rootmember + end + + def status_response + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + mailbox = astring + match(T_SPACE) + match(T_LPAR) + attr = {} + while true + token = lookahead + case token.symbol + when T_RPAR + shift_token + break + when T_SPACE + shift_token + end + token = match(T_ATOM) + key = token.value.upcase + match(T_SPACE) + val = number + attr[key] = val + end + data = StatusData.new(mailbox, attr) + return UntaggedResponse.new(name, data, @str) + end + + def capability_response + token = match(T_ATOM) + name = token.value.upcase + match(T_SPACE) + data = [] + while true + token = lookahead + case token.symbol + when T_CRLF + break + when T_SPACE + shift_token + end + data.push(atom.upcase) + end + return UntaggedResponse.new(name, data, @str) + end + + def resp_text + @lex_state = EXPR_RTEXT + token = lookahead + if token.symbol == T_LBRA + code = resp_text_code + else + code = nil + end + token = match(T_TEXT) + @lex_state = EXPR_BEG + return ResponseText.new(code, token.value) + end + + def resp_text_code + @lex_state = EXPR_BEG + match(T_LBRA) + token = match(T_ATOM) + name = token.value.upcase + case name + when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n + result = ResponseCode.new(name, nil) + when /\A(?:PERMANENTFLAGS)\z/n + match(T_SPACE) + result = ResponseCode.new(name, flag_list) + when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n + match(T_SPACE) + result = ResponseCode.new(name, number) + else + match(T_SPACE) + @lex_state = EXPR_CTEXT + token = match(T_TEXT) + @lex_state = EXPR_BEG + result = ResponseCode.new(name, token.value) + end + match(T_RBRA) + @lex_state = EXPR_RTEXT + return result + end + + def address_list + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + else + result = [] + match(T_LPAR) + while true + token = lookahead + case token.symbol + when T_RPAR + shift_token + break + when T_SPACE + shift_token + end + result.push(address) + end + return result + end + end + + ADDRESS_REGEXP = /\G\ +(?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \ +(?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \ +(?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \ +(?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\ +\)/ni + + def address + match(T_LPAR) + if @str.index(ADDRESS_REGEXP, @pos) + # address does not include literal. + @pos = $~.end(0) + name = $1 + route = $2 + mailbox = $3 + host = $4 + for s in [name, route, mailbox, host] + if s + s.gsub!(/\\(["\\])/n, "\\1") + end + end + else + name = nstring + match(T_SPACE) + route = nstring + match(T_SPACE) + mailbox = nstring + match(T_SPACE) + host = nstring + match(T_RPAR) + end + return Address.new(name, route, mailbox, host) + end + +# def flag_list +# result = [] +# match(T_LPAR) +# while true +# token = lookahead +# case token.symbol +# when T_RPAR +# shift_token +# break +# when T_SPACE +# shift_token +# end +# result.push(flag) +# end +# return result +# end + +# def flag +# token = lookahead +# if token.symbol == T_BSLASH +# shift_token +# token = lookahead +# if token.symbol == T_STAR +# shift_token +# return token.value.intern +# else +# return atom.intern +# end +# else +# return atom +# end +# end + + FLAG_REGEXP = /\ +(?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\ +(?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n + + def flag_list + if @str.index(/\(([^)]*)\)/ni, @pos) + @pos = $~.end(0) + return $1.scan(FLAG_REGEXP).collect { |flag, atom| + atom || flag.capitalize.intern + } + else + parse_error("invalid flag list") + end + end + + def nstring + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + else + return string + end + end + + def astring + token = lookahead + if string_token?(token) + return string + else + return atom + end + end + + def string + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + end + token = match(T_QUOTED, T_LITERAL) + return token.value + end + + STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL] + + def string_token?(token) + return STRING_TOKENS.include?(token.symbol) + end + + def case_insensitive_string + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + end + token = match(T_QUOTED, T_LITERAL) + return token.value.upcase + end + + def atom + result = "" + while true + token = lookahead + if atom_token?(token) + result.concat(token.value) + shift_token + else + if result.empty? + parse_error("unexpected token %s", token.symbol) + else + return result + end + end + end + end + + ATOM_TOKENS = [ + T_ATOM, + T_NUMBER, + T_NIL, + T_LBRA, + T_RBRA, + T_PLUS + ] + + def atom_token?(token) + return ATOM_TOKENS.include?(token.symbol) + end + + def number + token = lookahead + if token.symbol == T_NIL + shift_token + return nil + end + token = match(T_NUMBER) + return token.value.to_i + end + + def nil_atom + match(T_NIL) + return nil + end + + def match(*args) + token = lookahead + unless args.include?(token.symbol) + parse_error('unexpected token %s (expected %s)', + token.symbol.id2name, + args.collect {|i| i.id2name}.join(" or ")) + end + shift_token + return token + end + + def lookahead + unless @token + @token = next_token + end + return @token + end + + def shift_token + @token = nil + end + + def next_token + case @lex_state + when EXPR_BEG + if @str.index(BEG_REGEXP, @pos) + @pos = $~.end(0) + if $1 + return Token.new(T_SPACE, $+) + elsif $2 + return Token.new(T_NIL, $+) + elsif $3 + return Token.new(T_NUMBER, $+) + elsif $4 + return Token.new(T_ATOM, $+) + elsif $5 + return Token.new(T_QUOTED, + $+.gsub(/\\(["\\])/n, "\\1")) + elsif $6 + return Token.new(T_LPAR, $+) + elsif $7 + return Token.new(T_RPAR, $+) + elsif $8 + return Token.new(T_BSLASH, $+) + elsif $9 + return Token.new(T_STAR, $+) + elsif $10 + return Token.new(T_LBRA, $+) + elsif $11 + return Token.new(T_RBRA, $+) + elsif $12 + len = $+.to_i + val = @str[@pos, len] + @pos += len + return Token.new(T_LITERAL, val) + elsif $13 + return Token.new(T_PLUS, $+) + elsif $14 + return Token.new(T_PERCENT, $+) + elsif $15 + return Token.new(T_CRLF, $+) + elsif $16 + return Token.new(T_EOF, $+) + else + parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid") + end + else + @str.index(/\S*/n, @pos) + parse_error("unknown token - %s", $&.dump) + end + when EXPR_DATA + if @str.index(DATA_REGEXP, @pos) + @pos = $~.end(0) + if $1 + return Token.new(T_SPACE, $+) + elsif $2 + return Token.new(T_NIL, $+) + elsif $3 + return Token.new(T_NUMBER, $+) + elsif $4 + return Token.new(T_QUOTED, + $+.gsub(/\\(["\\])/n, "\\1")) + elsif $5 + len = $+.to_i + val = @str[@pos, len] + @pos += len + return Token.new(T_LITERAL, val) + elsif $6 + return Token.new(T_LPAR, $+) + elsif $7 + return Token.new(T_RPAR, $+) + else + parse_error("[Net::IMAP BUG] DATA_REGEXP is invalid") + end + else + @str.index(/\S*/n, @pos) + parse_error("unknown token - %s", $&.dump) + end + when EXPR_TEXT + if @str.index(TEXT_REGEXP, @pos) + @pos = $~.end(0) + if $1 + return Token.new(T_TEXT, $+) + else + parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid") + end + else + @str.index(/\S*/n, @pos) + parse_error("unknown token - %s", $&.dump) + end + when EXPR_RTEXT + if @str.index(RTEXT_REGEXP, @pos) + @pos = $~.end(0) + if $1 + return Token.new(T_LBRA, $+) + elsif $2 + return Token.new(T_TEXT, $+) + else + parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid") + end + else + @str.index(/\S*/n, @pos) + parse_error("unknown token - %s", $&.dump) + end + when EXPR_CTEXT + if @str.index(CTEXT_REGEXP, @pos) + @pos = $~.end(0) + if $1 + return Token.new(T_TEXT, $+) + else + parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid") + end + else + @str.index(/\S*/n, @pos) #/ + parse_error("unknown token - %s", $&.dump) + end + else + parse_error("invalid @lex_state - %s", @lex_state.inspect) + end + end + + def parse_error(fmt, *args) + if IMAP.debug + $stderr.printf("@str: %s\n", @str.dump) + $stderr.printf("@pos: %d\n", @pos) + $stderr.printf("@lex_state: %s\n", @lex_state) + if @token + $stderr.printf("@token.symbol: %s\n", @token.symbol) + $stderr.printf("@token.value: %s\n", @token.value.inspect) + end + end + raise ResponseParseError, format(fmt, *args) + end + end + + # Authenticator for the "LOGIN" authentication type. See + # #authenticate(). + class LoginAuthenticator + def process(data) + case @state + when STATE_USER + @state = STATE_PASSWORD + return @user + when STATE_PASSWORD + return @password + end + end + + private + + STATE_USER = :USER + STATE_PASSWORD = :PASSWORD + + def initialize(user, password) + @user = user + @password = password + @state = STATE_USER + end + end + add_authenticator "LOGIN", LoginAuthenticator + + # Authenticator for the "PLAIN" authentication type. See + # #authenticate(). + class PlainAuthenticator + def process(data) + return "\0#{@user}\0#{@password}" + end + + private + + def initialize(user, password) + @user = user + @password = password + end + end + add_authenticator "PLAIN", PlainAuthenticator + + # Authenticator for the "CRAM-MD5" authentication type. See + # #authenticate(). + class CramMD5Authenticator + def process(challenge) + digest = hmac_md5(challenge, @password) + return @user + " " + digest + end + + private + + def initialize(user, password) + @user = user + @password = password + end + + def hmac_md5(text, key) + if key.length > 64 + key = Digest::MD5.digest(key) + end + + k_ipad = key + "\0" * (64 - key.length) + k_opad = key + "\0" * (64 - key.length) + for i in 0..63 + k_ipad[i] = (k_ipad[i].ord ^ 0x36).chr + k_opad[i] = (k_opad[i].ord ^ 0x5c).chr + end + + digest = Digest::MD5.digest(k_ipad + text) + + return Digest::MD5.hexdigest(k_opad + digest) + end + end + add_authenticator "CRAM-MD5", CramMD5Authenticator + + # Authenticator for the "DIGEST-MD5" authentication type. See + # #authenticate(). + class DigestMD5Authenticator + def process(challenge) + case @stage + when STAGE_ONE + @stage = STAGE_TWO + sparams = {} + c = StringScanner.new(challenge) + while c.scan(/(?:\s*,)?\s*(\w+)=("(?:[^\\"]+|\\.)*"|[^,]+)\s*/) + k, v = c[1], c[2] + if v =~ /^"(.*)"$/ + v = $1 + if v =~ /,/ + v = v.split(',') + end + end + sparams[k] = v + end + + raise DataFormatError, "Bad Challenge: '#{challenge}'" unless c.rest.size == 0 + raise Error, "Server does not support auth (qop = #{sparams['qop'].join(',')})" unless sparams['qop'].include?("auth") + + response = { + :nonce => sparams['nonce'], + :username => @user, + :realm => sparams['realm'], + :cnonce => Digest::MD5.hexdigest("%.15f:%.15f:%d" % [Time.now.to_f, rand, Process.pid.to_s]), + :'digest-uri' => 'imap/' + sparams['realm'], + :qop => 'auth', + :maxbuf => 65535, + :nc => "%08d" % nc(sparams['nonce']), + :charset => sparams['charset'], + } + + response[:authzid] = @authname unless @authname.nil? + + # now, the real thing + a0 = Digest::MD5.digest( [ response.values_at(:username, :realm), @password ].join(':') ) + + a1 = [ a0, response.values_at(:nonce,:cnonce) ].join(':') + a1 << ':' + response[:authzid] unless response[:authzid].nil? + + a2 = "AUTHENTICATE:" + response[:'digest-uri'] + a2 << ":00000000000000000000000000000000" if response[:qop] and response[:qop] =~ /^auth-(?:conf|int)$/ + + response[:response] = Digest::MD5.hexdigest( + [ + Digest::MD5.hexdigest(a1), + response.values_at(:nonce, :nc, :cnonce, :qop), + Digest::MD5.hexdigest(a2) + ].join(':') + ) + + return response.keys.map {|key| qdval(key.to_s, response[key]) }.join(',') + when STAGE_TWO + @stage = nil + # if at the second stage, return an empty string + if challenge =~ /rspauth=/ + return '' + else + raise ResponseParseError, challenge + end + else + raise ResponseParseError, challenge + end + end + + def initialize(user, password, authname = nil) + @user, @password, @authname = user, password, authname + @nc, @stage = {}, STAGE_ONE + end + + private + + STAGE_ONE = :stage_one + STAGE_TWO = :stage_two + + def nc(nonce) + if @nc.has_key? nonce + @nc[nonce] = @nc[nonce] + 1 + else + @nc[nonce] = 1 + end + return @nc[nonce] + end + + # some responses need quoting + def qdval(k, v) + return if k.nil? or v.nil? + if %w"username authzid realm nonce cnonce digest-uri qop".include? k + v.gsub!(/([\\"])/, "\\\1") + return '%s="%s"' % [k, v] + else + return '%s=%s' % [k, v] + end + end + end + add_authenticator "DIGEST-MD5", DigestMD5Authenticator + + # Superclass of IMAP errors. + class Error < StandardError + end + + # Error raised when data is in the incorrect format. + class DataFormatError < Error + end + + # Error raised when a response from the server is non-parseable. + class ResponseParseError < Error + end + + # Superclass of all errors used to encapsulate "fail" responses + # from the server. + class ResponseError < Error + end + + # Error raised upon a "NO" response from the server, indicating + # that the client command could not be completed successfully. + class NoResponseError < ResponseError + end + + # Error raised upon a "BAD" response from the server, indicating + # that the client command violated the IMAP protocol, or an internal + # server failure has occurred. + class BadResponseError < ResponseError + end + + # Error raised upon a "BYE" response from the server, indicating + # that the client is not being allowed to login, or has been timed + # out due to inactivity. + class ByeResponseError < ResponseError + end + end +end + +if __FILE__ == $0 + # :enddoc: + require "getoptlong" + + $stdout.sync = true + $port = nil + $user = ENV["USER"] || ENV["LOGNAME"] + $auth = "login" + $ssl = false + + def usage + $stderr.print < + + --help print this message + --port=PORT specifies port + --user=USER specifies user + --auth=AUTH specifies auth type + --ssl use ssl +EOF + end + + def get_password + print "password: " + system("stty", "-echo") + begin + return gets.chop + ensure + system("stty", "echo") + print "\n" + end + end + + def get_command + printf("%s@%s> ", $user, $host) + if line = gets + return line.strip.split(/\s+/) + else + return nil + end + end + + parser = GetoptLong.new + parser.set_options(['--debug', GetoptLong::NO_ARGUMENT], + ['--help', GetoptLong::NO_ARGUMENT], + ['--port', GetoptLong::REQUIRED_ARGUMENT], + ['--user', GetoptLong::REQUIRED_ARGUMENT], + ['--auth', GetoptLong::REQUIRED_ARGUMENT], + ['--ssl', GetoptLong::NO_ARGUMENT]) + begin + parser.each_option do |name, arg| + case name + when "--port" + $port = arg + when "--user" + $user = arg + when "--auth" + $auth = arg + when "--ssl" + $ssl = true + when "--debug" + Net::IMAP.debug = true + when "--help" + usage + exit(1) + end + end + rescue + usage + exit(1) + end + + $host = ARGV.shift + unless $host + usage + exit(1) + end + + imap = Net::IMAP.new($host, :port => $port, :ssl => $ssl) + begin + password = get_password + imap.authenticate($auth, $user, password) + while true + cmd, *args = get_command + break unless cmd + begin + case cmd + when "list" + for mbox in imap.list("", args[0] || "*") + if mbox.attr.include?(Net::IMAP::NOSELECT) + prefix = "!" + elsif mbox.attr.include?(Net::IMAP::MARKED) + prefix = "*" + else + prefix = " " + end + print prefix, mbox.name, "\n" + end + when "select" + imap.select(args[0] || "inbox") + print "ok\n" + when "close" + imap.close + print "ok\n" + when "summary" + unless messages = imap.responses["EXISTS"][-1] + puts "not selected" + next + end + if messages > 0 + for data in imap.fetch(1..-1, ["ENVELOPE"]) + print data.seqno, ": ", data.attr["ENVELOPE"].subject, "\n" + end + else + puts "no message" + end + when "fetch" + if args[0] + data = imap.fetch(args[0].to_i, ["RFC822.HEADER", "RFC822.TEXT"])[0] + puts data.attr["RFC822.HEADER"] + puts data.attr["RFC822.TEXT"] + else + puts "missing argument" + end + when "logout", "exit", "quit" + break + when "help", "?" + print <. +# +# Documented by William Webber and Minero Aoki. +# +# This program is free software. You can re-distribute and/or +# modify this program under the same terms as Ruby itself, +# Ruby Distribute License. +# +# NOTE: You can find Japanese version of this document at: +# http://www.ruby-lang.org/ja/man/html/net_pop.html +# +# $Id: pop.rb 19776 2008-10-14 02:22:46Z kazu $ +# +# See Net::POP3 for documentation. +# + +require 'net/protocol' +require 'digest/md5' +require 'timeout' + +begin + require "openssl/ssl" +rescue LoadError +end + +module Net + + # Non-authentication POP3 protocol error + # (reply code "-ERR", except authentication). + class POPError < ProtocolError; end + + # POP3 authentication error. + class POPAuthenticationError < ProtoAuthError; end + + # Unexpected response from the server. + class POPBadResponse < POPError; end + + # + # = Net::POP3 + # + # == What is This Library? + # + # This library provides functionality for retrieving + # email via POP3, the Post Office Protocol version 3. For details + # of POP3, see [RFC1939] (http://www.ietf.org/rfc/rfc1939.txt). + # + # == Examples + # + # === Retrieving Messages + # + # This example retrieves messages from the server and deletes them + # on the server. + # + # Messages are written to files named 'inbox/1', 'inbox/2', .... + # Replace 'pop.example.com' with your POP3 server address, and + # 'YourAccount' and 'YourPassword' with the appropriate account + # details. + # + # require 'net/pop' + # + # pop = Net::POP3.new('pop.example.com') + # pop.start('YourAccount', 'YourPassword') # (1) + # if pop.mails.empty? + # puts 'No mail.' + # else + # i = 0 + # pop.each_mail do |m| # or "pop.mails.each ..." # (2) + # File.open("inbox/#{i}", 'w') do |f| + # f.write m.pop + # end + # m.delete + # i += 1 + # end + # puts "#{pop.mails.size} mails popped." + # end + # pop.finish # (3) + # + # 1. Call Net::POP3#start and start POP session. + # 2. Access messages by using POP3#each_mail and/or POP3#mails. + # 3. Close POP session by calling POP3#finish or use the block form of #start. + # + # === Shortened Code + # + # The example above is very verbose. You can shorten the code by using + # some utility methods. First, the block form of Net::POP3.start can + # be used instead of POP3.new, POP3#start and POP3#finish. + # + # require 'net/pop' + # + # Net::POP3.start('pop.example.com', 110, + # 'YourAccount', 'YourPassword') do |pop| + # if pop.mails.empty? + # puts 'No mail.' + # else + # i = 0 + # pop.each_mail do |m| # or "pop.mails.each ..." + # File.open("inbox/#{i}", 'w') do |f| + # f.write m.pop + # end + # m.delete + # i += 1 + # end + # puts "#{pop.mails.size} mails popped." + # end + # end + # + # POP3#delete_all is an alternative for #each_mail and #delete. + # + # require 'net/pop' + # + # Net::POP3.start('pop.example.com', 110, + # 'YourAccount', 'YourPassword') do |pop| + # if pop.mails.empty? + # puts 'No mail.' + # else + # i = 1 + # pop.delete_all do |m| + # File.open("inbox/#{i}", 'w') do |f| + # f.write m.pop + # end + # i += 1 + # end + # end + # end + # + # And here is an even shorter example. + # + # require 'net/pop' + # + # i = 0 + # Net::POP3.delete_all('pop.example.com', 110, + # 'YourAccount', 'YourPassword') do |m| + # File.open("inbox/#{i}", 'w') do |f| + # f.write m.pop + # end + # i += 1 + # end + # + # === Memory Space Issues + # + # All the examples above get each message as one big string. + # This example avoids this. + # + # require 'net/pop' + # + # i = 1 + # Net::POP3.delete_all('pop.example.com', 110, + # 'YourAccount', 'YourPassword') do |m| + # File.open("inbox/#{i}", 'w') do |f| + # m.pop do |chunk| # get a message little by little. + # f.write chunk + # end + # i += 1 + # end + # end + # + # === Using APOP + # + # The net/pop library supports APOP authentication. + # To use APOP, use the Net::APOP class instead of the Net::POP3 class. + # You can use the utility method, Net::POP3.APOP(). For example: + # + # require 'net/pop' + # + # # Use APOP authentication if $isapop == true + # pop = Net::POP3.APOP($is_apop).new('apop.example.com', 110) + # pop.start(YourAccount', 'YourPassword') do |pop| + # # Rest of the code is the same. + # end + # + # === Fetch Only Selected Mail Using 'UIDL' POP Command + # + # If your POP server provides UIDL functionality, + # you can grab only selected mails from the POP server. + # e.g. + # + # def need_pop?( id ) + # # determine if we need pop this mail... + # end + # + # Net::POP3.start('pop.example.com', 110, + # 'Your account', 'Your password') do |pop| + # pop.mails.select { |m| need_pop?(m.unique_id) }.each do |m| + # do_something(m.pop) + # end + # end + # + # The POPMail#unique_id() method returns the unique-id of the message as a + # String. Normally the unique-id is a hash of the message. + # + class POP3 < Protocol + + Revision = %q$Revision: 19776 $.split[1] + + # + # Class Parameters + # + + def POP3.default_port + default_pop3_port() + end + + # The default port for POP3 connections, port 110 + def POP3.default_pop3_port + 110 + end + + # The default port for POP3S connections, port 995 + def POP3.default_pop3s_port + 995 + end + + def POP3.socket_type #:nodoc: obsolete + Net::InternetMessageIO + end + + # + # Utilities + # + + # Returns the APOP class if +isapop+ is true; otherwise, returns + # the POP class. For example: + # + # # Example 1 + # pop = Net::POP3::APOP($is_apop).new(addr, port) + # + # # Example 2 + # Net::POP3::APOP($is_apop).start(addr, port) do |pop| + # .... + # end + # + def POP3.APOP(isapop) + isapop ? APOP : POP3 + end + + # Starts a POP3 session and iterates over each POPMail object, + # yielding it to the +block+. + # This method is equivalent to: + # + # Net::POP3.start(address, port, account, password) do |pop| + # pop.each_mail do |m| + # yield m + # end + # end + # + # This method raises a POPAuthenticationError if authentication fails. + # + # === Example + # + # Net::POP3.foreach('pop.example.com', 110, + # 'YourAccount', 'YourPassword') do |m| + # file.write m.pop + # m.delete if $DELETE + # end + # + def POP3.foreach(address, port = nil, + account = nil, password = nil, + isapop = false, &block) # :yields: message + start(address, port, account, password, isapop) {|pop| + pop.each_mail(&block) + } + end + + # Starts a POP3 session and deletes all messages on the server. + # If a block is given, each POPMail object is yielded to it before + # being deleted. + # + # This method raises a POPAuthenticationError if authentication fails. + # + # === Example + # + # Net::POP3.delete_all('pop.example.com', 110, + # 'YourAccount', 'YourPassword') do |m| + # file.write m.pop + # end + # + def POP3.delete_all(address, port = nil, + account = nil, password = nil, + isapop = false, &block) + start(address, port, account, password, isapop) {|pop| + pop.delete_all(&block) + } + end + + # Opens a POP3 session, attempts authentication, and quits. + # + # This method raises POPAuthenticationError if authentication fails. + # + # === Example: normal POP3 + # + # Net::POP3.auth_only('pop.example.com', 110, + # 'YourAccount', 'YourPassword') + # + # === Example: APOP + # + # Net::POP3.auth_only('pop.example.com', 110, + # 'YourAccount', 'YourPassword', true) + # + def POP3.auth_only(address, port = nil, + account = nil, password = nil, + isapop = false) + new(address, port, isapop).auth_only account, password + end + + # Starts a pop3 session, attempts authentication, and quits. + # This method must not be called while POP3 session is opened. + # This method raises POPAuthenticationError if authentication fails. + def auth_only(account, password) + raise IOError, 'opening previously opened POP session' if started? + start(account, password) { + ; + } + end + + # + # SSL + # + + @ssl_params = nil + + # call-seq: + # Net::POP.enable_ssl(params = {}) + # + # Enable SSL for all new instances. + # +params+ is passed to OpenSSL::SSLContext#set_params. + def POP3.enable_ssl(*args) + @ssl_params = create_ssl_params(*args) + end + + def POP3.create_ssl_params(verify_or_params = {}, certs = nil) + begin + params = verify_or_params.to_hash + rescue NoMethodError + params = {} + params[:verify_mode] = verify_or_params + if certs + if File.file?(certs) + params[:ca_file] = certs + elsif File.directory?(certs) + params[:ca_path] = certs + end + end + end + return params + end + + # Disable SSL for all new instances. + def POP3.disable_ssl + @ssl_params = nil + end + + def POP3.ssl_params + return @ssl_params + end + + def POP3.use_ssl? + return !@ssl_params.nil? + end + + def POP3.verify + return @ssl_params[:verify_mode] + end + + def POP3.certs + return @ssl_params[:ca_file] || @ssl_params[:ca_path] + end + + # + # Session management + # + + # Creates a new POP3 object and open the connection. Equivalent to + # + # Net::POP3.new(address, port, isapop).start(account, password) + # + # If +block+ is provided, yields the newly-opened POP3 object to it, + # and automatically closes it at the end of the session. + # + # === Example + # + # Net::POP3.start(addr, port, account, password) do |pop| + # pop.each_mail do |m| + # file.write m.pop + # m.delete + # end + # end + # + def POP3.start(address, port = nil, + account = nil, password = nil, + isapop = false, &block) # :yield: pop + new(address, port, isapop).start(account, password, &block) + end + + # Creates a new POP3 object. + # + # +address+ is the hostname or ip address of your POP3 server. + # + # The optional +port+ is the port to connect to. + # + # The optional +isapop+ specifies whether this connection is going + # to use APOP authentication; it defaults to +false+. + # + # This method does *not* open the TCP connection. + def initialize(addr, port = nil, isapop = false) + @address = addr + @ssl_params = POP3.ssl_params + @port = port + @apop = isapop + + @command = nil + @socket = nil + @started = false + @open_timeout = 30 + @read_timeout = 60 + @debug_output = nil + + @mails = nil + @n_mails = nil + @n_bytes = nil + end + + # Does this instance use APOP authentication? + def apop? + @apop + end + + # does this instance use SSL? + def use_ssl? + return !@ssl_params.nil? + end + + # call-seq: + # Net::POP#enable_ssl(params = {}) + # + # Enables SSL for this instance. Must be called before the connection is + # established to have any effect. + # +params[:port]+ is port to establish the SSL connection on; Defaults to 995. + # +params+ (except :port) is passed to OpenSSL::SSLContext#set_params. + def enable_ssl(verify_or_params = {}, certs = nil, port = nil) + begin + @ssl_params = verify_or_params.to_hash.dup + @port = @ssl_params.delete(:port) || @port + rescue NoMethodError + @ssl_params = POP3.create_ssl_params(verify_or_params, certs) + @port = port || @port + end + end + + def disable_ssl + @ssl_params = nil + end + + # Provide human-readable stringification of class state. + def inspect + "#<#{self.class} #{@address}:#{@port} open=#{@started}>" + end + + # *WARNING*: This method causes a serious security hole. + # Use this method only for debugging. + # + # Set an output stream for debugging. + # + # === Example + # + # pop = Net::POP.new(addr, port) + # pop.set_debug_output $stderr + # pop.start(account, passwd) do |pop| + # .... + # end + # + def set_debug_output(arg) + @debug_output = arg + end + + # The address to connect to. + attr_reader :address + + # The port number to connect to. + def port + return @port || (use_ssl? ? POP3.default_pop3s_port : POP3.default_pop3_port) + end + + # Seconds to wait until a connection is opened. + # If the POP3 object cannot open a connection within this time, + # it raises a TimeoutError exception. + attr_accessor :open_timeout + + # Seconds to wait until reading one block (by one read(1) call). + # If the POP3 object cannot complete a read() within this time, + # it raises a TimeoutError exception. + attr_reader :read_timeout + + # Set the read timeout. + def read_timeout=(sec) + @command.socket.read_timeout = sec if @command + @read_timeout = sec + end + + # +true+ if the POP3 session has started. + def started? + @started + end + + alias active? started? #:nodoc: obsolete + + # Starts a POP3 session. + # + # When called with block, gives a POP3 object to the block and + # closes the session after block call finishes. + # + # This method raises a POPAuthenticationError if authentication fails. + def start(account, password) # :yield: pop + raise IOError, 'POP session already started' if @started + if block_given? + begin + do_start account, password + return yield(self) + ensure + do_finish + end + else + do_start account, password + return self + end + end + + def do_start(account, password) + s = timeout(@open_timeout) { TCPSocket.open(@address, port) } + if use_ssl? + raise 'openssl library not installed' unless defined?(OpenSSL) + context = OpenSSL::SSL::SSLContext.new + context.set_params(@ssl_params) + s = OpenSSL::SSL::SSLSocket.new(s, context) + s.sync_close = true + s.connect + if context.verify_mode != OpenSSL::SSL::VERIFY_NONE + s.post_connection_check(@address) + end + end + @socket = InternetMessageIO.new(s) + logging "POP session started: #{@address}:#{@port} (#{@apop ? 'APOP' : 'POP'})" + @socket.read_timeout = @read_timeout + @socket.debug_output = @debug_output + on_connect + @command = POP3Command.new(@socket) + if apop? + @command.apop account, password + else + @command.auth account, password + end + @started = true + ensure + # Authentication failed, clean up connection. + unless @started + s.close if s and not s.closed? + @socket = nil + @command = nil + end + end + private :do_start + + def on_connect + end + private :on_connect + + # Finishes a POP3 session and closes TCP connection. + def finish + raise IOError, 'POP session not yet started' unless started? + do_finish + end + + def do_finish + @mails = nil + @n_mails = nil + @n_bytes = nil + @command.quit if @command + ensure + @started = false + @command = nil + @socket.close if @socket and not @socket.closed? + @socket = nil + end + private :do_finish + + def command + raise IOError, 'POP session not opened yet' \ + if not @socket or @socket.closed? + @command + end + private :command + + # + # POP protocol wrapper + # + + # Returns the number of messages on the POP server. + def n_mails + return @n_mails if @n_mails + @n_mails, @n_bytes = command().stat + @n_mails + end + + # Returns the total size in bytes of all the messages on the POP server. + def n_bytes + return @n_bytes if @n_bytes + @n_mails, @n_bytes = command().stat + @n_bytes + end + + # Returns an array of Net::POPMail objects, representing all the + # messages on the server. This array is renewed when the session + # restarts; otherwise, it is fetched from the server the first time + # this method is called (directly or indirectly) and cached. + # + # This method raises a POPError if an error occurs. + def mails + return @mails.dup if @mails + if n_mails() == 0 + # some popd raises error for LIST on the empty mailbox. + @mails = [] + return [] + end + + @mails = command().list.map {|num, size| + POPMail.new(num, size, self, command()) + } + @mails.dup + end + + # Yields each message to the passed-in block in turn. + # Equivalent to: + # + # pop3.mails.each do |popmail| + # .... + # end + # + # This method raises a POPError if an error occurs. + def each_mail(&block) # :yield: message + mails().each(&block) + end + + alias each each_mail + + # Deletes all messages on the server. + # + # If called with a block, yields each message in turn before deleting it. + # + # === Example + # + # n = 1 + # pop.delete_all do |m| + # File.open("inbox/#{n}") do |f| + # f.write m.pop + # end + # n += 1 + # end + # + # This method raises a POPError if an error occurs. + # + def delete_all # :yield: message + mails().each do |m| + yield m if block_given? + m.delete unless m.deleted? + end + end + + # Resets the session. This clears all "deleted" marks from messages. + # + # This method raises a POPError if an error occurs. + def reset + command().rset + mails().each do |m| + m.instance_eval { + @deleted = false + } + end + end + + def set_all_uids #:nodoc: internal use only (called from POPMail#uidl) + uidl = command().uidl + @mails.each {|m| m.uid = uidl[m.number] } + end + + def logging(msg) + @debug_output << msg + "\n" if @debug_output + end + + end # class POP3 + + # class aliases + POP = POP3 + POPSession = POP3 + POP3Session = POP3 + + # + # This class is equivalent to POP3, except that it uses APOP authentication. + # + class APOP < POP3 + # Always returns true. + def apop? + true + end + end + + # class aliases + APOPSession = APOP + + # + # This class represents a message which exists on the POP server. + # Instances of this class are created by the POP3 class; they should + # not be directly created by the user. + # + class POPMail + + def initialize(num, len, pop, cmd) #:nodoc: + @number = num + @length = len + @pop = pop + @command = cmd + @deleted = false + @uid = nil + end + + # The sequence number of the message on the server. + attr_reader :number + + # The length of the message in octets. + attr_reader :length + alias size length + + # Provide human-readable stringification of class state. + def inspect + "#<#{self.class} #{@number}#{@deleted ? ' deleted' : ''}>" + end + + # + # This method fetches the message. If called with a block, the + # message is yielded to the block one chunk at a time. If called + # without a block, the message is returned as a String. The optional + # +dest+ argument will be prepended to the returned String; this + # argument is essentially obsolete. + # + # === Example without block + # + # POP3.start('pop.example.com', 110, + # 'YourAccount, 'YourPassword') do |pop| + # n = 1 + # pop.mails.each do |popmail| + # File.open("inbox/#{n}", 'w') do |f| + # f.write popmail.pop + # end + # popmail.delete + # n += 1 + # end + # end + # + # === Example with block + # + # POP3.start('pop.example.com', 110, + # 'YourAccount, 'YourPassword') do |pop| + # n = 1 + # pop.mails.each do |popmail| + # File.open("inbox/#{n}", 'w') do |f| + # popmail.pop do |chunk| #### + # f.write chunk + # end + # end + # n += 1 + # end + # end + # + # This method raises a POPError if an error occurs. + # + def pop( dest = '', &block ) # :yield: message_chunk + if block_given? + @command.retr(@number, &block) + nil + else + @command.retr(@number) do |chunk| + dest << chunk + end + dest + end + end + + alias all pop #:nodoc: obsolete + alias mail pop #:nodoc: obsolete + + # Fetches the message header and +lines+ lines of body. + # + # The optional +dest+ argument is obsolete. + # + # This method raises a POPError if an error occurs. + def top(lines, dest = '') + @command.top(@number, lines) do |chunk| + dest << chunk + end + dest + end + + # Fetches the message header. + # + # The optional +dest+ argument is obsolete. + # + # This method raises a POPError if an error occurs. + def header(dest = '') + top(0, dest) + end + + # Marks a message for deletion on the server. Deletion does not + # actually occur until the end of the session; deletion may be + # cancelled for _all_ marked messages by calling POP3#reset(). + # + # This method raises a POPError if an error occurs. + # + # === Example + # + # POP3.start('pop.example.com', 110, + # 'YourAccount, 'YourPassword') do |pop| + # n = 1 + # pop.mails.each do |popmail| + # File.open("inbox/#{n}", 'w') do |f| + # f.write popmail.pop + # end + # popmail.delete #### + # n += 1 + # end + # end + # + def delete + @command.dele @number + @deleted = true + end + + alias delete! delete #:nodoc: obsolete + + # True if the mail has been deleted. + def deleted? + @deleted + end + + # Returns the unique-id of the message. + # Normally the unique-id is a hash string of the message. + # + # This method raises a POPError if an error occurs. + def unique_id + return @uid if @uid + @pop.set_all_uids + @uid + end + + alias uidl unique_id + + def uid=(uid) #:nodoc: internal use only + @uid = uid + end + + end # class POPMail + + + class POP3Command #:nodoc: internal use only + + def initialize(sock) + @socket = sock + @error_occured = false + res = check_response(critical { recv_response() }) + @apop_stamp = res.slice(/<[!-~]+@[!-~]+>/) + end + + attr_reader :socket + + def inspect + "#<#{self.class} socket=#{@socket}>" + end + + def auth(account, password) + check_response_auth(critical { + check_response_auth(get_response('USER %s', account)) + get_response('PASS %s', password) + }) + end + + def apop(account, password) + raise POPAuthenticationError, 'not APOP server; cannot login' \ + unless @apop_stamp + check_response_auth(critical { + get_response('APOP %s %s', + account, + Digest::MD5.hexdigest(@apop_stamp + password)) + }) + end + + def list + critical { + getok 'LIST' + list = [] + @socket.each_list_item do |line| + m = /\A(\d+)[ \t]+(\d+)/.match(line) or + raise POPBadResponse, "bad response: #{line}" + list.push [m[1].to_i, m[2].to_i] + end + return list + } + end + + def stat + res = check_response(critical { get_response('STAT') }) + m = /\A\+OK\s+(\d+)\s+(\d+)/.match(res) or + raise POPBadResponse, "wrong response format: #{res}" + [m[1].to_i, m[2].to_i] + end + + def rset + check_response(critical { get_response('RSET') }) + end + + def top(num, lines = 0, &block) + critical { + getok('TOP %d %d', num, lines) + @socket.each_message_chunk(&block) + } + end + + def retr(num, &block) + critical { + getok('RETR %d', num) + @socket.each_message_chunk(&block) + } + end + + def dele(num) + check_response(critical { get_response('DELE %d', num) }) + end + + def uidl(num = nil) + if num + res = check_response(critical { get_response('UIDL %d', num) }) + return res.split(/ /)[1] + else + critical { + getok('UIDL') + table = {} + @socket.each_list_item do |line| + num, uid = line.split + table[num.to_i] = uid + end + return table + } + end + end + + def quit + check_response(critical { get_response('QUIT') }) + end + + private + + def getok(fmt, *fargs) + @socket.writeline sprintf(fmt, *fargs) + check_response(recv_response()) + end + + def get_response(fmt, *fargs) + @socket.writeline sprintf(fmt, *fargs) + recv_response() + end + + def recv_response + @socket.readline + end + + def check_response(res) + raise POPError, res unless /\A\+OK/i =~ res + res + end + + def check_response_auth(res) + raise POPAuthenticationError, res unless /\A\+OK/i =~ res + res + end + + def critical + return '+OK dummy ok response' if @error_occured + begin + return yield() + rescue Exception + @error_occured = true + raise + end + end + + end # class POP3Command + +end # module Net diff --git a/lib/net/protocol.rb b/lib/net/protocol.rb new file mode 100644 index 0000000..c1fd94b --- /dev/null +++ b/lib/net/protocol.rb @@ -0,0 +1,382 @@ +# +# = net/protocol.rb +# +#-- +# Copyright (c) 1999-2004 Yukihiro Matsumoto +# Copyright (c) 1999-2004 Minero Aoki +# +# written and maintained by Minero Aoki +# +# This program is free software. You can re-distribute and/or +# modify this program under the same terms as Ruby itself, +# Ruby Distribute License or GNU General Public License. +# +# $Id: protocol.rb 12091 2007-03-19 02:27:08Z aamine $ +#++ +# +# WARNING: This file is going to remove. +# Do not rely on the implementation written in this file. +# + +require 'socket' +require 'timeout' + +module Net # :nodoc: + + class Protocol #:nodoc: internal use only + private + def Protocol.protocol_param(name, val) + module_eval(<<-End, __FILE__, __LINE__ + 1) + def #{name} + #{val} + end + End + end + end + + + class ProtocolError < StandardError; end + class ProtoSyntaxError < ProtocolError; end + class ProtoFatalError < ProtocolError; end + class ProtoUnknownError < ProtocolError; end + class ProtoServerError < ProtocolError; end + class ProtoAuthError < ProtocolError; end + class ProtoCommandError < ProtocolError; end + class ProtoRetriableError < ProtocolError; end + ProtocRetryError = ProtoRetriableError + + + class BufferedIO #:nodoc: internal use only + def initialize(io) + @io = io + @read_timeout = 60 + @debug_output = nil + @rbuf = '' + end + + attr_reader :io + attr_accessor :read_timeout + attr_accessor :debug_output + + def inspect + "#<#{self.class} io=#{@io}>" + end + + def closed? + @io.closed? + end + + def close + @io.close + end + + # + # Read + # + + public + + def read(len, dest = '', ignore_eof = false) + LOG "reading #{len} bytes..." + read_bytes = 0 + begin + while read_bytes + @rbuf.size < len + dest << (s = rbuf_consume(@rbuf.size)) + read_bytes += s.size + rbuf_fill + end + dest << (s = rbuf_consume(len - read_bytes)) + read_bytes += s.size + rescue EOFError + raise unless ignore_eof + end + LOG "read #{read_bytes} bytes" + dest + end + + def read_all(dest = '') + LOG 'reading all...' + read_bytes = 0 + begin + while true + dest << (s = rbuf_consume(@rbuf.size)) + read_bytes += s.size + rbuf_fill + end + rescue EOFError + ; + end + LOG "read #{read_bytes} bytes" + dest + end + + def readuntil(terminator, ignore_eof = false) + begin + until idx = @rbuf.index(terminator) + rbuf_fill + end + return rbuf_consume(idx + terminator.size) + rescue EOFError + raise unless ignore_eof + return rbuf_consume(@rbuf.size) + end + end + + def readline + readuntil("\n").chop + end + + private + + BUFSIZE = 1024 * 16 + + def rbuf_fill + timeout(@read_timeout) { + @rbuf << @io.sysread(BUFSIZE) + } + end + + def rbuf_consume(len) + s = @rbuf.slice!(0, len) + @debug_output << %Q[-> #{s.dump}\n] if @debug_output + s + end + + # + # Write + # + + public + + def write(str) + writing { + write0 str + } + end + + def writeline(str) + writing { + write0 str + "\r\n" + } + end + + private + + def writing + @written_bytes = 0 + @debug_output << '<- ' if @debug_output + yield + @debug_output << "\n" if @debug_output + bytes = @written_bytes + @written_bytes = nil + bytes + end + + def write0(str) + @debug_output << str.dump if @debug_output + len = @io.write(str) + @written_bytes += len + len + end + + # + # Logging + # + + private + + def LOG_off + @save_debug_out = @debug_output + @debug_output = nil + end + + def LOG_on + @debug_output = @save_debug_out + end + + def LOG(msg) + return unless @debug_output + @debug_output << msg + "\n" + end + end + + + class InternetMessageIO < BufferedIO #:nodoc: internal use only + def initialize(io) + super + @wbuf = nil + end + + # + # Read + # + + def each_message_chunk + LOG 'reading message...' + LOG_off() + read_bytes = 0 + while (line = readuntil("\r\n")) != ".\r\n" + read_bytes += line.size + yield line.sub(/\A\./, '') + end + LOG_on() + LOG "read message (#{read_bytes} bytes)" + end + + # *library private* (cannot handle 'break') + def each_list_item + while (str = readuntil("\r\n")) != ".\r\n" + yield str.chop + end + end + + def write_message_0(src) + prev = @written_bytes + each_crlf_line(src) do |line| + write0 line.sub(/\A\./, '..') + end + @written_bytes - prev + end + + # + # Write + # + + def write_message(src) + LOG "writing message from #{src.class}" + LOG_off() + len = writing { + using_each_crlf_line { + write_message_0 src + } + } + LOG_on() + LOG "wrote #{len} bytes" + len + end + + def write_message_by_block(&block) + LOG 'writing message from block' + LOG_off() + len = writing { + using_each_crlf_line { + begin + block.call(WriteAdapter.new(self, :write_message_0)) + rescue LocalJumpError + # allow `break' from writer block + end + } + } + LOG_on() + LOG "wrote #{len} bytes" + len + end + + private + + def using_each_crlf_line + @wbuf = '' + yield + if not @wbuf.empty? # unterminated last line + write0 @wbuf.chomp + "\r\n" + elsif @written_bytes == 0 # empty src + write0 "\r\n" + end + write0 ".\r\n" + @wbuf = nil + end + + def each_crlf_line(src) + buffer_filling(@wbuf, src) do + while line = @wbuf.slice!(/\A.*(?:\n|\r\n|\r(?!\z))/n) + yield line.chomp("\n") + "\r\n" + end + end + end + + def buffer_filling(buf, src) + case src + when String # for speeding up. + 0.step(src.size - 1, 1024) do |i| + buf << src[i, 1024] + yield + end + when File # for speeding up. + while s = src.read(1024) + buf << s + yield + end + else # generic reader + src.each do |str| + buf << str + yield if buf.size > 1024 + end + yield unless buf.empty? + end + end + end + + + # + # The writer adapter class + # + class WriteAdapter + def initialize(socket, method) + @socket = socket + @method_id = method + end + + def inspect + "#<#{self.class} socket=#{@socket.inspect}>" + end + + def write(str) + @socket.__send__(@method_id, str) + end + + alias print write + + def <<(str) + write str + self + end + + def puts(str = '') + write str.chomp("\n") + "\n" + end + + def printf(*args) + write sprintf(*args) + end + end + + + class ReadAdapter #:nodoc: internal use only + def initialize(block) + @block = block + end + + def inspect + "#<#{self.class}>" + end + + def <<(str) + call_block(str, &@block) if @block + end + + private + + # This method is needed because @block must be called by yield, + # not Proc#call. You can see difference when using `break' in + # the block. + def call_block(str) + yield str + end + end + + + module NetPrivate #:nodoc: obsolete + Socket = ::Net::InternetMessageIO + end + +end # module Net diff --git a/lib/net/smtp.rb b/lib/net/smtp.rb new file mode 100644 index 0000000..4aef8d7 --- /dev/null +++ b/lib/net/smtp.rb @@ -0,0 +1,1014 @@ +# = net/smtp.rb +# +# Copyright (c) 1999-2007 Yukihiro Matsumoto. +# +# Copyright (c) 1999-2007 Minero Aoki. +# +# Written & maintained by Minero Aoki . +# +# Documented by William Webber and Minero Aoki. +# +# This program is free software. You can re-distribute and/or +# modify this program under the same terms as Ruby itself. +# +# NOTE: You can find Japanese version of this document at: +# http://www.ruby-lang.org/ja/man/html/net_smtp.html +# +# $Id: smtp.rb 18351 2008-08-04 05:46:53Z shyouhei $ +# +# See Net::SMTP for documentation. +# + +require 'net/protocol' +require 'digest/md5' +require 'timeout' +begin + require 'openssl' +rescue LoadError +end + +module Net + + # Module mixed in to all SMTP error classes + module SMTPError + # This *class* is a module for backward compatibility. + # In later release, this module becomes a class. + end + + # Represents an SMTP authentication error. + class SMTPAuthenticationError < ProtoAuthError + include SMTPError + end + + # Represents SMTP error code 420 or 450, a temporary error. + class SMTPServerBusy < ProtoServerError + include SMTPError + end + + # Represents an SMTP command syntax error (error code 500) + class SMTPSyntaxError < ProtoSyntaxError + include SMTPError + end + + # Represents a fatal SMTP error (error code 5xx, except for 500) + class SMTPFatalError < ProtoFatalError + include SMTPError + end + + # Unexpected reply code returned from server. + class SMTPUnknownError < ProtoUnknownError + include SMTPError + end + + # Command is not supported on server. + class SMTPUnsupportedCommand < ProtocolError + include SMTPError + end + + # + # = Net::SMTP + # + # == What is This Library? + # + # This library provides functionality to send internet + # mail via SMTP, the Simple Mail Transfer Protocol. For details of + # SMTP itself, see [RFC2821] (http://www.ietf.org/rfc/rfc2821.txt). + # + # == What is This Library NOT? + # + # This library does NOT provide functions to compose internet mails. + # You must create them by yourself. If you want better mail support, + # try RubyMail or TMail. You can get both libraries from RAA. + # (http://www.ruby-lang.org/en/raa.html) + # + # FYI: the official documentation on internet mail is: [RFC2822] (http://www.ietf.org/rfc/rfc2822.txt). + # + # == Examples + # + # === Sending Messages + # + # You must open a connection to an SMTP server before sending messages. + # The first argument is the address of your SMTP server, and the second + # argument is the port number. Using SMTP.start with a block is the simplest + # way to do this. This way, the SMTP connection is closed automatically + # after the block is executed. + # + # require 'net/smtp' + # Net::SMTP.start('your.smtp.server', 25) do |smtp| + # # Use the SMTP object smtp only in this block. + # end + # + # Replace 'your.smtp.server' with your SMTP server. Normally + # your system manager or internet provider supplies a server + # for you. + # + # Then you can send messages. + # + # msgstr = < + # To: Destination Address + # Subject: test message + # Date: Sat, 23 Jun 2001 16:26:43 +0900 + # Message-Id: + # + # This is a test message. + # END_OF_MESSAGE + # + # require 'net/smtp' + # Net::SMTP.start('your.smtp.server', 25) do |smtp| + # smtp.send_message msgstr, + # 'your@mail.address', + # 'his_addess@example.com' + # end + # + # === Closing the Session + # + # You MUST close the SMTP session after sending messages, by calling + # the #finish method: + # + # # using SMTP#finish + # smtp = Net::SMTP.start('your.smtp.server', 25) + # smtp.send_message msgstr, 'from@address', 'to@address' + # smtp.finish + # + # You can also use the block form of SMTP.start/SMTP#start. This closes + # the SMTP session automatically: + # + # # using block form of SMTP.start + # Net::SMTP.start('your.smtp.server', 25) do |smtp| + # smtp.send_message msgstr, 'from@address', 'to@address' + # end + # + # I strongly recommend this scheme. This form is simpler and more robust. + # + # === HELO domain + # + # In almost all situations, you must provide a third argument + # to SMTP.start/SMTP#start. This is the domain name which you are on + # (the host to send mail from). It is called the "HELO domain". + # The SMTP server will judge whether it should send or reject + # the SMTP session by inspecting the HELO domain. + # + # Net::SMTP.start('your.smtp.server', 25, + # 'mail.from.domain') { |smtp| ... } + # + # === SMTP Authentication + # + # The Net::SMTP class supports three authentication schemes; + # PLAIN, LOGIN and CRAM MD5. (SMTP Authentication: [RFC2554]) + # To use SMTP authentication, pass extra arguments to + # SMTP.start/SMTP#start. + # + # # PLAIN + # Net::SMTP.start('your.smtp.server', 25, 'mail.from.domain', + # 'Your Account', 'Your Password', :plain) + # # LOGIN + # Net::SMTP.start('your.smtp.server', 25, 'mail.from.domain', + # 'Your Account', 'Your Password', :login) + # + # # CRAM MD5 + # Net::SMTP.start('your.smtp.server', 25, 'mail.from.domain', + # 'Your Account', 'Your Password', :cram_md5) + # + class SMTP + + Revision = %q$Revision: 18351 $.split[1] + + # The default SMTP port number, 25. + def SMTP.default_port + 25 + end + + # The default mail submission port number, 587. + def SMTP.default_submission_port + 587 + end + + # The default SMTPS port number, 465. + def SMTP.default_tls_port + 465 + end + + class << self + alias default_ssl_port default_tls_port + end + + def SMTP.default_ssl_context + OpenSSL::SSL::SSLContext.new + end + + # + # Creates a new Net::SMTP object. + # + # +address+ is the hostname or ip address of your SMTP + # server. +port+ is the port to connect to; it defaults to + # port 25. + # + # This method does not open the TCP connection. You can use + # SMTP.start instead of SMTP.new if you want to do everything + # at once. Otherwise, follow SMTP.new with SMTP#start. + # + def initialize(address, port = nil) + @address = address + @port = (port || SMTP.default_port) + @esmtp = true + @capabilities = nil + @socket = nil + @started = false + @open_timeout = 30 + @read_timeout = 60 + @error_occured = false + @debug_output = nil + @tls = false + @starttls = false + @ssl_context = nil + end + + # Provide human-readable stringification of class state. + def inspect + "#<#{self.class} #{@address}:#{@port} started=#{@started}>" + end + + # +true+ if the SMTP object uses ESMTP (which it does by default). + def esmtp? + @esmtp + end + + # + # Set whether to use ESMTP or not. This should be done before + # calling #start. Note that if #start is called in ESMTP mode, + # and the connection fails due to a ProtocolError, the SMTP + # object will automatically switch to plain SMTP mode and + # retry (but not vice versa). + # + def esmtp=(bool) + @esmtp = bool + end + + alias esmtp esmtp? + + # true if server advertises STARTTLS. + # You cannot get valid value before opening SMTP session. + def capable_starttls? + capable?('STARTTLS') + end + + def capable?(key) + return nil unless @capabilities + @capabilities[key] ? true : false + end + private :capable? + + # true if server advertises AUTH PLAIN. + # You cannot get valid value before opening SMTP session. + def capable_plain_auth? + auth_capable?('PLAIN') + end + + # true if server advertises AUTH LOGIN. + # You cannot get valid value before opening SMTP session. + def capable_login_auth? + auth_capable?('LOGIN') + end + + # true if server advertises AUTH CRAM-MD5. + # You cannot get valid value before opening SMTP session. + def capable_cram_md5_auth? + auth_capable?('CRAM-MD5') + end + + def auth_capable?(type) + return nil unless @capabilities + return false unless @capabilities['AUTH'] + @capabilities['AUTH'].include?(type) + end + private :auth_capable? + + # Returns supported authentication methods on this server. + # You cannot get valid value before opening SMTP session. + def capable_auth_types + return [] unless @capabilities + return [] unless @capabilities['AUTH'] + @capabilities['AUTH'] + end + + # true if this object uses SMTP/TLS (SMTPS). + def tls? + @tls + end + + alias ssl? tls? + + # Enables SMTP/TLS (SMTPS: SMTP over direct TLS connection) for + # this object. Must be called before the connection is established + # to have any effect. +context+ is a OpenSSL::SSL::SSLContext object. + def enable_tls(context = SMTP.default_ssl_context) + raise 'openssl library not installed' unless defined?(OpenSSL) + raise ArgumentError, "SMTPS and STARTTLS is exclusive" if @starttls + @tls = true + @ssl_context = context + end + + alias enable_ssl enable_tls + + # Disables SMTP/TLS for this object. Must be called before the + # connection is established to have any effect. + def disable_tls + @tls = false + @ssl_context = nil + end + + alias disable_ssl disable_tls + + # Returns truth value if this object uses STARTTLS. + # If this object always uses STARTTLS, returns :always. + # If this object uses STARTTLS when the server support TLS, returns :auto. + def starttls? + @starttls + end + + # true if this object uses STARTTLS. + def starttls_always? + @starttls == :always + end + + # true if this object uses STARTTLS when server advertises STARTTLS. + def starttls_auto? + @starttls == :auto + end + + # Enables SMTP/TLS (STARTTLS) for this object. + # +context+ is a OpenSSL::SSL::SSLContext object. + def enable_starttls(context = SMTP.default_ssl_context) + raise 'openssl library not installed' unless defined?(OpenSSL) + raise ArgumentError, "SMTPS and STARTTLS is exclusive" if @tls + @starttls = :always + @ssl_context = context + end + + # Enables SMTP/TLS (STARTTLS) for this object if server accepts. + # +context+ is a OpenSSL::SSL::SSLContext object. + def enable_starttls_auto(context = SMTP.default_ssl_context) + raise 'openssl library not installed' unless defined?(OpenSSL) + raise ArgumentError, "SMTPS and STARTTLS is exclusive" if @tls + @starttls = :auto + @ssl_context = context + end + + # Disables SMTP/TLS (STARTTLS) for this object. Must be called + # before the connection is established to have any effect. + def disable_starttls + @starttls = false + @ssl_context = nil + end + + # The address of the SMTP server to connect to. + attr_reader :address + + # The port number of the SMTP server to connect to. + attr_reader :port + + # Seconds to wait while attempting to open a connection. + # If the connection cannot be opened within this time, a + # TimeoutError is raised. + attr_accessor :open_timeout + + # Seconds to wait while reading one block (by one read(2) call). + # If the read(2) call does not complete within this time, a + # TimeoutError is raised. + attr_reader :read_timeout + + # Set the number of seconds to wait until timing-out a read(2) + # call. + def read_timeout=(sec) + @socket.read_timeout = sec if @socket + @read_timeout = sec + end + + # + # WARNING: This method causes serious security holes. + # Use this method for only debugging. + # + # Set an output stream for debug logging. + # You must call this before #start. + # + # # example + # smtp = Net::SMTP.new(addr, port) + # smtp.set_debug_output $stderr + # smtp.start do |smtp| + # .... + # end + # + def debug_output=(arg) + @debug_output = arg + end + + alias set_debug_output debug_output= + + # + # SMTP session control + # + + # + # Creates a new Net::SMTP object and connects to the server. + # + # This method is equivalent to: + # + # Net::SMTP.new(address, port).start(helo_domain, account, password, authtype) + # + # === Example + # + # Net::SMTP.start('your.smtp.server') do |smtp| + # smtp.send_message msgstr, 'from@example.com', ['dest@example.com'] + # end + # + # === Block Usage + # + # If called with a block, the newly-opened Net::SMTP object is yielded + # to the block, and automatically closed when the block finishes. If called + # without a block, the newly-opened Net::SMTP object is returned to + # the caller, and it is the caller's responsibility to close it when + # finished. + # + # === Parameters + # + # +address+ is the hostname or ip address of your smtp server. + # + # +port+ is the port to connect to; it defaults to port 25. + # + # +helo+ is the _HELO_ _domain_ provided by the client to the + # server (see overview comments); it defaults to 'localhost'. + # + # The remaining arguments are used for SMTP authentication, if required + # or desired. +user+ is the account name; +secret+ is your password + # or other authentication token; and +authtype+ is the authentication + # type, one of :plain, :login, or :cram_md5. See the discussion of + # SMTP Authentication in the overview notes. + # + # === Errors + # + # This method may raise: + # + # * Net::SMTPAuthenticationError + # * Net::SMTPServerBusy + # * Net::SMTPSyntaxError + # * Net::SMTPFatalError + # * Net::SMTPUnknownError + # * IOError + # * TimeoutError + # + def SMTP.start(address, port = nil, helo = 'localhost', + user = nil, secret = nil, authtype = nil, + &block) # :yield: smtp + new(address, port).start(helo, user, secret, authtype, &block) + end + + # +true+ if the SMTP session has been started. + def started? + @started + end + + # + # Opens a TCP connection and starts the SMTP session. + # + # === Parameters + # + # +helo+ is the _HELO_ _domain_ that you'll dispatch mails from; see + # the discussion in the overview notes. + # + # If both of +user+ and +secret+ are given, SMTP authentication + # will be attempted using the AUTH command. +authtype+ specifies + # the type of authentication to attempt; it must be one of + # :login, :plain, and :cram_md5. See the notes on SMTP Authentication + # in the overview. + # + # === Block Usage + # + # When this methods is called with a block, the newly-started SMTP + # object is yielded to the block, and automatically closed after + # the block call finishes. Otherwise, it is the caller's + # responsibility to close the session when finished. + # + # === Example + # + # This is very similar to the class method SMTP.start. + # + # require 'net/smtp' + # smtp = Net::SMTP.new('smtp.mail.server', 25) + # smtp.start(helo_domain, account, password, authtype) do |smtp| + # smtp.send_message msgstr, 'from@example.com', ['dest@example.com'] + # end + # + # The primary use of this method (as opposed to SMTP.start) + # is probably to set debugging (#set_debug_output) or ESMTP + # (#esmtp=), which must be done before the session is + # started. + # + # === Errors + # + # If session has already been started, an IOError will be raised. + # + # This method may raise: + # + # * Net::SMTPAuthenticationError + # * Net::SMTPServerBusy + # * Net::SMTPSyntaxError + # * Net::SMTPFatalError + # * Net::SMTPUnknownError + # * IOError + # * TimeoutError + # + def start(helo = 'localhost', + user = nil, secret = nil, authtype = nil) # :yield: smtp + if block_given? + begin + do_start helo, user, secret, authtype + return yield(self) + ensure + do_finish + end + else + do_start helo, user, secret, authtype + return self + end + end + + # Finishes the SMTP session and closes TCP connection. + # Raises IOError if not started. + def finish + raise IOError, 'not yet started' unless started? + do_finish + end + + private + + def do_start(helo_domain, user, secret, authtype) + raise IOError, 'SMTP session already started' if @started + if user or secret + check_auth_method(authtype || DEFAULT_AUTH_TYPE) + check_auth_args user, secret + end + s = timeout(@open_timeout) { TCPSocket.open(@address, @port) } + logging "Connection opened: #{@address}:#{@port}" + @socket = new_internet_message_io(tls? ? tlsconnect(s) : s) + check_response critical { recv_response() } + do_helo helo_domain + if starttls_always? or (capable_starttls? and starttls_auto?) + unless capable_starttls? + raise SMTPUnsupportedCommand, + "STARTTLS is not supported on this server" + end + starttls + @socket = new_internet_message_io(tlsconnect(s)) + # helo response may be different after STARTTLS + do_helo helo_domain + end + authenticate user, secret, (authtype || DEFAULT_AUTH_TYPE) if user + @started = true + ensure + unless @started + # authentication failed, cancel connection. + s.close if s and not s.closed? + @socket = nil + end + end + + def tlsconnect(s) + s = OpenSSL::SSL::SSLSocket.new(s, @ssl_context) + logging "TLS connection started" + s.sync_close = true + s.connect + if @ssl_context.verify_mode != OpenSSL::SSL::VERIFY_NONE + s.post_connection_check(@address) + end + s + end + + def new_internet_message_io(s) + io = InternetMessageIO.new(s) + io.read_timeout = @read_timeout + io.debug_output = @debug_output + io + end + + def do_helo(helo_domain) + res = @esmtp ? ehlo(helo_domain) : helo(helo_domain) + @capabilities = res.capabilities + rescue SMTPError + if @esmtp + @esmtp = false + @error_occured = false + retry + end + raise + end + + def do_finish + quit if @socket and not @socket.closed? and not @error_occured + ensure + @started = false + @error_occured = false + @socket.close if @socket and not @socket.closed? + @socket = nil + end + + # + # Message Sending + # + + public + + # + # Sends +msgstr+ as a message. Single CR ("\r") and LF ("\n") found + # in the +msgstr+, are converted into the CR LF pair. You cannot send a + # binary message with this method. +msgstr+ should include both + # the message headers and body. + # + # +from_addr+ is a String representing the source mail address. + # + # +to_addr+ is a String or Strings or Array of Strings, representing + # the destination mail address or addresses. + # + # === Example + # + # Net::SMTP.start('smtp.example.com') do |smtp| + # smtp.send_message msgstr, + # 'from@example.com', + # ['dest@example.com', 'dest2@example.com'] + # end + # + # === Errors + # + # This method may raise: + # + # * Net::SMTPServerBusy + # * Net::SMTPSyntaxError + # * Net::SMTPFatalError + # * Net::SMTPUnknownError + # * IOError + # * TimeoutError + # + def send_message(msgstr, from_addr, *to_addrs) + raise IOError, 'closed session' unless @socket + mailfrom from_addr + rcptto_list to_addrs + data msgstr + end + + alias send_mail send_message + alias sendmail send_message # obsolete + + # + # Opens a message writer stream and gives it to the block. + # The stream is valid only in the block, and has these methods: + # + # puts(str = ''):: outputs STR and CR LF. + # print(str):: outputs STR. + # printf(fmt, *args):: outputs sprintf(fmt,*args). + # write(str):: outputs STR and returns the length of written bytes. + # <<(str):: outputs STR and returns self. + # + # If a single CR ("\r") or LF ("\n") is found in the message, + # it is converted to the CR LF pair. You cannot send a binary + # message with this method. + # + # === Parameters + # + # +from_addr+ is a String representing the source mail address. + # + # +to_addr+ is a String or Strings or Array of Strings, representing + # the destination mail address or addresses. + # + # === Example + # + # Net::SMTP.start('smtp.example.com', 25) do |smtp| + # smtp.open_message_stream('from@example.com', ['dest@example.com']) do |f| + # f.puts 'From: from@example.com' + # f.puts 'To: dest@example.com' + # f.puts 'Subject: test message' + # f.puts + # f.puts 'This is a test message.' + # end + # end + # + # === Errors + # + # This method may raise: + # + # * Net::SMTPServerBusy + # * Net::SMTPSyntaxError + # * Net::SMTPFatalError + # * Net::SMTPUnknownError + # * IOError + # * TimeoutError + # + def open_message_stream(from_addr, *to_addrs, &block) # :yield: stream + raise IOError, 'closed session' unless @socket + mailfrom from_addr + rcptto_list to_addrs + data(&block) + end + + alias ready open_message_stream # obsolete + + # + # Authentication + # + + public + + DEFAULT_AUTH_TYPE = :plain + + def authenticate(user, secret, authtype = DEFAULT_AUTH_TYPE) + check_auth_method authtype + check_auth_args user, secret + send auth_method(authtype), user, secret + end + + def auth_plain(user, secret) + check_auth_args user, secret + res = critical { + get_response('AUTH PLAIN ' + base64_encode("\0#{user}\0#{secret}")) + } + check_auth_response res + res + end + + def auth_login(user, secret) + check_auth_args user, secret + res = critical { + check_auth_continue get_response('AUTH LOGIN') + check_auth_continue get_response(base64_encode(user)) + get_response(base64_encode(secret)) + } + check_auth_response res + res + end + + def auth_cram_md5(user, secret) + check_auth_args user, secret + res = critical { + res0 = get_response('AUTH CRAM-MD5') + check_auth_continue res0 + crammed = cram_md5_response(secret, res0.cram_md5_challenge) + get_response(base64_encode("#{user} #{crammed}")) + } + check_auth_response res + res + end + + private + + def check_auth_method(type) + unless respond_to?(auth_method(type), true) + raise ArgumentError, "wrong authentication type #{type}" + end + end + + def auth_method(type) + "auth_#{type.to_s.downcase}".intern + end + + def check_auth_args(user, secret) + unless user + raise ArgumentError, 'SMTP-AUTH requested but missing user name' + end + unless secret + raise ArgumentError, 'SMTP-AUTH requested but missing secret phrase' + end + end + + def base64_encode(str) + # expects "str" may not become too long + [str].pack('m').gsub(/\s+/, '') + end + + IMASK = 0x36 + OMASK = 0x5c + + # CRAM-MD5: [RFC2195] + def cram_md5_response(secret, challenge) + tmp = Digest::MD5.digest(cram_secret(secret, IMASK) + challenge) + Digest::MD5.hexdigest(cram_secret(secret, OMASK) + tmp) + end + + CRAM_BUFSIZE = 64 + + def cram_secret(secret, mask) + secret = Digest::MD5.digest(secret) if secret.size > CRAM_BUFSIZE + buf = secret.ljust(CRAM_BUFSIZE, "\0") + 0.upto(buf.size - 1) do |i| + buf[i] = (buf[i].ord ^ mask).chr + end + buf + end + + # + # SMTP command dispatcher + # + + public + + def starttls + getok('STARTTLS') + end + + def helo(domain) + getok("HELO #{domain}") + end + + def ehlo(domain) + getok("EHLO #{domain}") + end + + def mailfrom(from_addr) + if $SAFE > 0 + raise SecurityError, 'tainted from_addr' if from_addr.tainted? + end + getok("MAIL FROM:<#{from_addr}>") + end + + def rcptto_list(to_addrs) + raise ArgumentError, 'mail destination not given' if to_addrs.empty? + to_addrs.flatten.each do |addr| + rcptto addr + end + end + + def rcptto(to_addr) + if $SAFE > 0 + raise SecurityError, 'tainted to_addr' if to_addr.tainted? + end + getok("RCPT TO:<#{to_addr}>") + end + + # This method sends a message. + # If +msgstr+ is given, sends it as a message. + # If block is given, yield a message writer stream. + # You must write message before the block is closed. + # + # # Example 1 (by string) + # smtp.data(< +# Documentation:: William Webber and Wakou Aoyama +# +# This file holds the class Net::Telnet, which provides client-side +# telnet functionality. +# +# For documentation, see Net::Telnet. +# + +require "socket" +require "delegate" +require "timeout" +require "English" + +module Net + + # + # == Net::Telnet + # + # Provides telnet client functionality. + # + # This class also has, through delegation, all the methods of a + # socket object (by default, a +TCPSocket+, but can be set by the + # +Proxy+ option to new()). This provides methods such as + # close() to end the session and sysread() to read + # data directly from the host, instead of via the waitfor() + # mechanism. Note that if you do use sysread() directly + # when in telnet mode, you should probably pass the output through + # preprocess() to extract telnet command sequences. + # + # == Overview + # + # The telnet protocol allows a client to login remotely to a user + # account on a server and execute commands via a shell. The equivalent + # is done by creating a Net::Telnet class with the +Host+ option + # set to your host, calling #login() with your user and password, + # issuing one or more #cmd() calls, and then calling #close() + # to end the session. The #waitfor(), #print(), #puts(), and + # #write() methods, which #cmd() is implemented on top of, are + # only needed if you are doing something more complicated. + # + # A Net::Telnet object can also be used to connect to non-telnet + # services, such as SMTP or HTTP. In this case, you normally + # want to provide the +Port+ option to specify the port to + # connect to, and set the +Telnetmode+ option to false to prevent + # the client from attempting to interpret telnet command sequences. + # Generally, #login() will not work with other protocols, and you + # have to handle authentication yourself. + # + # For some protocols, it will be possible to specify the +Prompt+ + # option once when you create the Telnet object and use #cmd() calls; + # for others, you will have to specify the response sequence to + # look for as the Match option to every #cmd() call, or call + # #puts() and #waitfor() directly; for yet others, you will have + # to use #sysread() instead of #waitfor() and parse server + # responses yourself. + # + # It is worth noting that when you create a new Net::Telnet object, + # you can supply a proxy IO channel via the Proxy option. This + # can be used to attach the Telnet object to other Telnet objects, + # to already open sockets, or to any read-write IO object. This + # can be useful, for instance, for setting up a test fixture for + # unit testing. + # + # == Examples + # + # === Log in and send a command, echoing all output to stdout + # + # localhost = Net::Telnet::new("Host" => "localhost", + # "Timeout" => 10, + # "Prompt" => /[$%#>] \z/n) + # localhost.login("username", "password") { |c| print c } + # localhost.cmd("command") { |c| print c } + # localhost.close + # + # + # === Check a POP server to see if you have mail + # + # pop = Net::Telnet::new("Host" => "your_destination_host_here", + # "Port" => 110, + # "Telnetmode" => false, + # "Prompt" => /^\+OK/n) + # pop.cmd("user " + "your_username_here") { |c| print c } + # pop.cmd("pass " + "your_password_here") { |c| print c } + # pop.cmd("list") { |c| print c } + # + # == References + # + # There are a large number of RFCs relevant to the Telnet protocol. + # RFCs 854-861 define the base protocol. For a complete listing + # of relevant RFCs, see + # http://www.omnifarious.org/~hopper/technical/telnet-rfc.html + # + class Telnet < SimpleDelegator + + # :stopdoc: + IAC = 255.chr # "\377" # "\xff" # interpret as command + DONT = 254.chr # "\376" # "\xfe" # you are not to use option + DO = 253.chr # "\375" # "\xfd" # please, you use option + WONT = 252.chr # "\374" # "\xfc" # I won't use option + WILL = 251.chr # "\373" # "\xfb" # I will use option + SB = 250.chr # "\372" # "\xfa" # interpret as subnegotiation + GA = 249.chr # "\371" # "\xf9" # you may reverse the line + EL = 248.chr # "\370" # "\xf8" # erase the current line + EC = 247.chr # "\367" # "\xf7" # erase the current character + AYT = 246.chr # "\366" # "\xf6" # are you there + AO = 245.chr # "\365" # "\xf5" # abort output--but let prog finish + IP = 244.chr # "\364" # "\xf4" # interrupt process--permanently + BREAK = 243.chr # "\363" # "\xf3" # break + DM = 242.chr # "\362" # "\xf2" # data mark--for connect. cleaning + NOP = 241.chr # "\361" # "\xf1" # nop + SE = 240.chr # "\360" # "\xf0" # end sub negotiation + EOR = 239.chr # "\357" # "\xef" # end of record (transparent mode) + ABORT = 238.chr # "\356" # "\xee" # Abort process + SUSP = 237.chr # "\355" # "\xed" # Suspend process + EOF = 236.chr # "\354" # "\xec" # End of file + SYNCH = 242.chr # "\362" # "\xf2" # for telfunc calls + + OPT_BINARY = 0.chr # "\000" # "\x00" # Binary Transmission + OPT_ECHO = 1.chr # "\001" # "\x01" # Echo + OPT_RCP = 2.chr # "\002" # "\x02" # Reconnection + OPT_SGA = 3.chr # "\003" # "\x03" # Suppress Go Ahead + OPT_NAMS = 4.chr # "\004" # "\x04" # Approx Message Size Negotiation + OPT_STATUS = 5.chr # "\005" # "\x05" # Status + OPT_TM = 6.chr # "\006" # "\x06" # Timing Mark + OPT_RCTE = 7.chr # "\a" # "\x07" # Remote Controlled Trans and Echo + OPT_NAOL = 8.chr # "\010" # "\x08" # Output Line Width + OPT_NAOP = 9.chr # "\t" # "\x09" # Output Page Size + OPT_NAOCRD = 10.chr # "\n" # "\x0a" # Output Carriage-Return Disposition + OPT_NAOHTS = 11.chr # "\v" # "\x0b" # Output Horizontal Tab Stops + OPT_NAOHTD = 12.chr # "\f" # "\x0c" # Output Horizontal Tab Disposition + OPT_NAOFFD = 13.chr # "\r" # "\x0d" # Output Formfeed Disposition + OPT_NAOVTS = 14.chr # "\016" # "\x0e" # Output Vertical Tabstops + OPT_NAOVTD = 15.chr # "\017" # "\x0f" # Output Vertical Tab Disposition + OPT_NAOLFD = 16.chr # "\020" # "\x10" # Output Linefeed Disposition + OPT_XASCII = 17.chr # "\021" # "\x11" # Extended ASCII + OPT_LOGOUT = 18.chr # "\022" # "\x12" # Logout + OPT_BM = 19.chr # "\023" # "\x13" # Byte Macro + OPT_DET = 20.chr # "\024" # "\x14" # Data Entry Terminal + OPT_SUPDUP = 21.chr # "\025" # "\x15" # SUPDUP + OPT_SUPDUPOUTPUT = 22.chr # "\026" # "\x16" # SUPDUP Output + OPT_SNDLOC = 23.chr # "\027" # "\x17" # Send Location + OPT_TTYPE = 24.chr # "\030" # "\x18" # Terminal Type + OPT_EOR = 25.chr # "\031" # "\x19" # End of Record + OPT_TUID = 26.chr # "\032" # "\x1a" # TACACS User Identification + OPT_OUTMRK = 27.chr # "\e" # "\x1b" # Output Marking + OPT_TTYLOC = 28.chr # "\034" # "\x1c" # Terminal Location Number + OPT_3270REGIME = 29.chr # "\035" # "\x1d" # Telnet 3270 Regime + OPT_X3PAD = 30.chr # "\036" # "\x1e" # X.3 PAD + OPT_NAWS = 31.chr # "\037" # "\x1f" # Negotiate About Window Size + OPT_TSPEED = 32.chr # " " # "\x20" # Terminal Speed + OPT_LFLOW = 33.chr # "!" # "\x21" # Remote Flow Control + OPT_LINEMODE = 34.chr # "\"" # "\x22" # Linemode + OPT_XDISPLOC = 35.chr # "#" # "\x23" # X Display Location + OPT_OLD_ENVIRON = 36.chr # "$" # "\x24" # Environment Option + OPT_AUTHENTICATION = 37.chr # "%" # "\x25" # Authentication Option + OPT_ENCRYPT = 38.chr # "&" # "\x26" # Encryption Option + OPT_NEW_ENVIRON = 39.chr # "'" # "\x27" # New Environment Option + OPT_EXOPL = 255.chr # "\377" # "\xff" # Extended-Options-List + + NULL = "\000" + CR = "\015" + LF = "\012" + EOL = CR + LF + REVISION = '$Id: telnet.rb 17387 2008-06-17 14:04:48Z jeg2 $' + # :startdoc: + + # + # Creates a new Net::Telnet object. + # + # Attempts to connect to the host (unless the Proxy option is + # provided: see below). If a block is provided, it is yielded + # status messages on the attempt to connect to the server, of + # the form: + # + # Trying localhost... + # Connected to localhost. + # + # +options+ is a hash of options. The following example lists + # all options and their default values. + # + # host = Net::Telnet::new( + # "Host" => "localhost", # default: "localhost" + # "Port" => 23, # default: 23 + # "Binmode" => false, # default: false + # "Output_log" => "output_log", # default: nil (no output) + # "Dump_log" => "dump_log", # default: nil (no output) + # "Prompt" => /[$%#>] \z/n, # default: /[$%#>] \z/n + # "Telnetmode" => true, # default: true + # "Timeout" => 10, # default: 10 + # # if ignore timeout then set "Timeout" to false. + # "Waittime" => 0, # default: 0 + # "Proxy" => proxy # default: nil + # # proxy is Net::Telnet or IO object + # ) + # + # The options have the following meanings: + # + # Host:: the hostname or IP address of the host to connect to, as a String. + # Defaults to "localhost". + # + # Port:: the port to connect to. Defaults to 23. + # + # Binmode:: if false (the default), newline substitution is performed. + # Outgoing LF is + # converted to CRLF, and incoming CRLF is converted to LF. If + # true, this substitution is not performed. This value can + # also be set with the #binmode() method. The + # outgoing conversion only applies to the #puts() and #print() + # methods, not the #write() method. The precise nature of + # the newline conversion is also affected by the telnet options + # SGA and BIN. + # + # Output_log:: the name of the file to write connection status messages + # and all received traffic to. In the case of a proper + # Telnet session, this will include the client input as + # echoed by the host; otherwise, it only includes server + # responses. Output is appended verbatim to this file. + # By default, no output log is kept. + # + # Dump_log:: as for Output_log, except that output is written in hexdump + # format (16 bytes per line as hex pairs, followed by their + # printable equivalent), with connection status messages + # preceded by '#', sent traffic preceded by '>', and + # received traffic preceded by '<'. By default, not dump log + # is kept. + # + # Prompt:: a regular expression matching the host's command-line prompt + # sequence. This is needed by the Telnet class to determine + # when the output from a command has finished and the host is + # ready to receive a new command. By default, this regular + # expression is /[$%#>] \z/n. + # + # Telnetmode:: a boolean value, true by default. In telnet mode, + # traffic received from the host is parsed for special + # command sequences, and these sequences are escaped + # in outgoing traffic sent using #puts() or #print() + # (but not #write()). If you are using the Net::Telnet + # object to connect to a non-telnet service (such as + # SMTP or POP), this should be set to "false" to prevent + # undesired data corruption. This value can also be set + # by the #telnetmode() method. + # + # Timeout:: the number of seconds to wait before timing out both the + # initial attempt to connect to host (in this constructor), + # and all attempts to read data from the host (in #waitfor(), + # #cmd(), and #login()). Exceeding this timeout causes a + # TimeoutError to be raised. The default value is 10 seconds. + # You can disable the timeout by setting this value to false. + # In this case, the connect attempt will eventually timeout + # on the underlying connect(2) socket call with an + # Errno::ETIMEDOUT error (but generally only after a few + # minutes), but other attempts to read data from the host + # will hand indefinitely if no data is forthcoming. + # + # Waittime:: the amount of time to wait after seeing what looks like a + # prompt (that is, received data that matches the Prompt + # option regular expression) to see if more data arrives. + # If more data does arrive in this time, Net::Telnet assumes + # that what it saw was not really a prompt. This is to try to + # avoid false matches, but it can also lead to missing real + # prompts (if, for instance, a background process writes to + # the terminal soon after the prompt is displayed). By + # default, set to 0, meaning not to wait for more data. + # + # Proxy:: a proxy object to used instead of opening a direct connection + # to the host. Must be either another Net::Telnet object or + # an IO object. If it is another Net::Telnet object, this + # instance will use that one's socket for communication. If an + # IO object, it is used directly for communication. Any other + # kind of object will cause an error to be raised. + # + def initialize(options) # :yield: mesg + @options = options + @options["Host"] = "localhost" unless @options.has_key?("Host") + @options["Port"] = 23 unless @options.has_key?("Port") + @options["Prompt"] = /[$%#>] \z/n unless @options.has_key?("Prompt") + @options["Timeout"] = 10 unless @options.has_key?("Timeout") + @options["Waittime"] = 0 unless @options.has_key?("Waittime") + unless @options.has_key?("Binmode") + @options["Binmode"] = false + else + unless (true == @options["Binmode"] or false == @options["Binmode"]) + raise ArgumentError, "Binmode option must be true or false" + end + end + + unless @options.has_key?("Telnetmode") + @options["Telnetmode"] = true + else + unless (true == @options["Telnetmode"] or false == @options["Telnetmode"]) + raise ArgumentError, "Telnetmode option must be true or false" + end + end + + @telnet_option = { "SGA" => false, "BINARY" => false } + + if @options.has_key?("Output_log") + @log = File.open(@options["Output_log"], 'a+') + @log.sync = true + @log.binmode + end + + if @options.has_key?("Dump_log") + @dumplog = File.open(@options["Dump_log"], 'a+') + @dumplog.sync = true + @dumplog.binmode + def @dumplog.log_dump(dir, x) # :nodoc: + len = x.length + addr = 0 + offset = 0 + while 0 < len + if len < 16 + line = x[offset, len] + else + line = x[offset, 16] + end + hexvals = line.unpack('H*')[0] + hexvals += ' ' * (32 - hexvals.length) + hexvals = format("%s %s %s %s " * 4, *hexvals.unpack('a2' * 16)) + line = line.gsub(/[\000-\037\177-\377]/n, '.') + printf "%s 0x%5.5x: %s%s\n", dir, addr, hexvals, line + addr += 16 + offset += 16 + len -= 16 + end + print "\n" + end + end + + if @options.has_key?("Proxy") + if @options["Proxy"].kind_of?(Net::Telnet) + @sock = @options["Proxy"].sock + elsif @options["Proxy"].kind_of?(IO) + @sock = @options["Proxy"] + else + raise "Error: Proxy must be an instance of Net::Telnet or IO." + end + else + message = "Trying " + @options["Host"] + "...\n" + yield(message) if block_given? + @log.write(message) if @options.has_key?("Output_log") + @dumplog.log_dump('#', message) if @options.has_key?("Dump_log") + + begin + if @options["Timeout"] == false + @sock = TCPSocket.open(@options["Host"], @options["Port"]) + else + timeout(@options["Timeout"]) do + @sock = TCPSocket.open(@options["Host"], @options["Port"]) + end + end + rescue TimeoutError + raise TimeoutError, "timed out while opening a connection to the host" + rescue + @log.write($ERROR_INFO.to_s + "\n") if @options.has_key?("Output_log") + @dumplog.log_dump('#', $ERROR_INFO.to_s + "\n") if @options.has_key?("Dump_log") + raise + end + @sock.sync = true + @sock.binmode + + message = "Connected to " + @options["Host"] + ".\n" + yield(message) if block_given? + @log.write(message) if @options.has_key?("Output_log") + @dumplog.log_dump('#', message) if @options.has_key?("Dump_log") + end + + super(@sock) + end # initialize + + # The socket the Telnet object is using. Note that this object becomes + # a delegate of the Telnet object, so normally you invoke its methods + # directly on the Telnet object. + attr :sock + + # Set telnet command interpretation on (+mode+ == true) or off + # (+mode+ == false), or return the current value (+mode+ not + # provided). It should be on for true telnet sessions, off if + # using Net::Telnet to connect to a non-telnet service such + # as SMTP. + def telnetmode(mode = nil) + case mode + when nil + @options["Telnetmode"] + when true, false + @options["Telnetmode"] = mode + else + raise ArgumentError, "argument must be true or false, or missing" + end + end + + # Turn telnet command interpretation on (true) or off (false). It + # should be on for true telnet sessions, off if using Net::Telnet + # to connect to a non-telnet service such as SMTP. + def telnetmode=(mode) + if (true == mode or false == mode) + @options["Telnetmode"] = mode + else + raise ArgumentError, "argument must be true or false" + end + end + + # Turn newline conversion on (+mode+ == false) or off (+mode+ == true), + # or return the current value (+mode+ is not specified). + def binmode(mode = nil) + case mode + when nil + @options["Binmode"] + when true, false + @options["Binmode"] = mode + else + raise ArgumentError, "argument must be true or false" + end + end + + # Turn newline conversion on (false) or off (true). + def binmode=(mode) + if (true == mode or false == mode) + @options["Binmode"] = mode + else + raise ArgumentError, "argument must be true or false" + end + end + + # Preprocess received data from the host. + # + # Performs newline conversion and detects telnet command sequences. + # Called automatically by #waitfor(). You should only use this + # method yourself if you have read input directly using sysread() + # or similar, and even then only if in telnet mode. + def preprocess(string) + # combine CR+NULL into CR + string = string.gsub(/#{CR}#{NULL}/no, CR) if @options["Telnetmode"] + + # combine EOL into "\n" + string = string.gsub(/#{EOL}/no, "\n") unless @options["Binmode"] + + # remove NULL + string = string.gsub(/#{NULL}/no, '') unless @options["Binmode"] + + string.gsub(/#{IAC}( + [#{IAC}#{AO}#{AYT}#{DM}#{IP}#{NOP}]| + [#{DO}#{DONT}#{WILL}#{WONT}] + [#{OPT_BINARY}-#{OPT_NEW_ENVIRON}#{OPT_EXOPL}]| + #{SB}[^#{IAC}]*#{IAC}#{SE} + )/xno) do + if IAC == $1 # handle escaped IAC characters + IAC + elsif AYT == $1 # respond to "IAC AYT" (are you there) + self.write("nobody here but us pigeons" + EOL) + '' + elsif DO[0] == $1[0] # respond to "IAC DO x" + if OPT_BINARY[0] == $1[1] + @telnet_option["BINARY"] = true + self.write(IAC + WILL + OPT_BINARY) + else + self.write(IAC + WONT + $1[1..1]) + end + '' + elsif DONT[0] == $1[0] # respond to "IAC DON'T x" with "IAC WON'T x" + self.write(IAC + WONT + $1[1..1]) + '' + elsif WILL[0] == $1[0] # respond to "IAC WILL x" + if OPT_BINARY[0] == $1[1] + self.write(IAC + DO + OPT_BINARY) + elsif OPT_ECHO[0] == $1[1] + self.write(IAC + DO + OPT_ECHO) + elsif OPT_SGA[0] == $1[1] + @telnet_option["SGA"] = true + self.write(IAC + DO + OPT_SGA) + else + self.write(IAC + DONT + $1[1..1]) + end + '' + elsif WONT[0] == $1[0] # respond to "IAC WON'T x" + if OPT_ECHO[0] == $1[1] + self.write(IAC + DONT + OPT_ECHO) + elsif OPT_SGA[0] == $1[1] + @telnet_option["SGA"] = false + self.write(IAC + DONT + OPT_SGA) + else + self.write(IAC + DONT + $1[1..1]) + end + '' + else + '' + end + end + end # preprocess + + # Read data from the host until a certain sequence is matched. + # + # If a block is given, the received data will be yielded as it + # is read in (not necessarily all in one go), or nil if EOF + # occurs before any data is received. Whether a block is given + # or not, all data read will be returned in a single string, or again + # nil if EOF occurs before any data is received. Note that + # received data includes the matched sequence we were looking for. + # + # +options+ can be either a regular expression or a hash of options. + # If a regular expression, this specifies the data to wait for. + # If a hash, this can specify the following options: + # + # Match:: a regular expression, specifying the data to wait for. + # Prompt:: as for Match; used only if Match is not specified. + # String:: as for Match, except a string that will be converted + # into a regular expression. Used only if Match and + # Prompt are not specified. + # Timeout:: the number of seconds to wait for data from the host + # before raising a TimeoutError. If set to false, + # no timeout will occur. If not specified, the + # Timeout option value specified when this instance + # was created will be used, or, failing that, the + # default value of 10 seconds. + # Waittime:: the number of seconds to wait after matching against + # the input data to see if more data arrives. If more + # data arrives within this time, we will judge ourselves + # not to have matched successfully, and will continue + # trying to match. If not specified, the Waittime option + # value specified when this instance was created will be + # used, or, failing that, the default value of 0 seconds, + # which means not to wait for more input. + # FailEOF:: if true, when the remote end closes the connection then an + # EOFError will be raised. Otherwise, defaults to the old + # behaviour that the function will return whatever data + # has been received already, or nil if nothing was received. + # + def waitfor(options) # :yield: recvdata + time_out = @options["Timeout"] + waittime = @options["Waittime"] + fail_eof = @options["FailEOF"] + + if options.kind_of?(Hash) + prompt = if options.has_key?("Match") + options["Match"] + elsif options.has_key?("Prompt") + options["Prompt"] + elsif options.has_key?("String") + Regexp.new( Regexp.quote(options["String"]) ) + end + time_out = options["Timeout"] if options.has_key?("Timeout") + waittime = options["Waittime"] if options.has_key?("Waittime") + fail_eof = options["FailEOF"] if options.has_key?("FailEOF") + else + prompt = options + end + + if time_out == false + time_out = nil + end + + line = '' + buf = '' + rest = '' + until(prompt === line and not IO::select([@sock], nil, nil, waittime)) + unless IO::select([@sock], nil, nil, time_out) + raise TimeoutError, "timed out while waiting for more data" + end + begin + c = @sock.readpartial(1024 * 1024) + @dumplog.log_dump('<', c) if @options.has_key?("Dump_log") + if @options["Telnetmode"] + c = rest + c + if Integer(c.rindex(/#{IAC}#{SE}/no) || 0) < + Integer(c.rindex(/#{IAC}#{SB}/no) || 0) + buf = preprocess(c[0 ... c.rindex(/#{IAC}#{SB}/no)]) + rest = c[c.rindex(/#{IAC}#{SB}/no) .. -1] + elsif pt = c.rindex(/#{IAC}[^#{IAC}#{AO}#{AYT}#{DM}#{IP}#{NOP}]?\z/no) || + c.rindex(/\r\z/no) + buf = preprocess(c[0 ... pt]) + rest = c[pt .. -1] + else + buf = preprocess(c) + rest = '' + end + else + # Not Telnetmode. + # + # We cannot use preprocess() on this data, because that + # method makes some Telnetmode-specific assumptions. + buf = rest + c + rest = '' + unless @options["Binmode"] + if pt = buf.rindex(/\r\z/no) + buf = buf[0 ... pt] + rest = buf[pt .. -1] + end + buf.gsub!(/#{EOL}/no, "\n") + end + end + @log.print(buf) if @options.has_key?("Output_log") + line += buf + yield buf if block_given? + rescue EOFError # End of file reached + raise if fail_eof + if line == '' + line = nil + yield nil if block_given? + end + break + end + end + line + end + + # Write +string+ to the host. + # + # Does not perform any conversions on +string+. Will log +string+ to the + # dumplog, if the Dump_log option is set. + def write(string) + length = string.length + while 0 < length + IO::select(nil, [@sock]) + @dumplog.log_dump('>', string[-length..-1]) if @options.has_key?("Dump_log") + length -= @sock.syswrite(string[-length..-1]) + end + end + + # Sends a string to the host. + # + # This does _not_ automatically append a newline to the string. Embedded + # newlines may be converted and telnet command sequences escaped + # depending upon the values of telnetmode, binmode, and telnet options + # set by the host. + def print(string) + string = string.gsub(/#{IAC}/no, IAC + IAC) if @options["Telnetmode"] + + if @options["Binmode"] + self.write(string) + else + if @telnet_option["BINARY"] and @telnet_option["SGA"] + # IAC WILL SGA IAC DO BIN send EOL --> CR + self.write(string.gsub(/\n/n, CR)) + elsif @telnet_option["SGA"] + # IAC WILL SGA send EOL --> CR+NULL + self.write(string.gsub(/\n/n, CR + NULL)) + else + # NONE send EOL --> CR+LF + self.write(string.gsub(/\n/n, EOL)) + end + end + end + + # Sends a string to the host. + # + # Same as #print(), but appends a newline to the string. + def puts(string) + self.print(string + "\n") + end + + # Send a command to the host. + # + # More exactly, sends a string to the host, and reads in all received + # data until is sees the prompt or other matched sequence. + # + # If a block is given, the received data will be yielded to it as + # it is read in. Whether a block is given or not, the received data + # will be return as a string. Note that the received data includes + # the prompt and in most cases the host's echo of our command. + # + # +options+ is either a String, specified the string or command to + # send to the host; or it is a hash of options. If a hash, the + # following options can be specified: + # + # String:: the command or other string to send to the host. + # Match:: a regular expression, the sequence to look for in + # the received data before returning. If not specified, + # the Prompt option value specified when this instance + # was created will be used, or, failing that, the default + # prompt of /[$%#>] \z/n. + # Timeout:: the seconds to wait for data from the host before raising + # a Timeout error. If not specified, the Timeout option + # value specified when this instance was created will be + # used, or, failing that, the default value of 10 seconds. + # + # The command or other string will have the newline sequence appended + # to it. + def cmd(options) # :yield: recvdata + match = @options["Prompt"] + time_out = @options["Timeout"] + + if options.kind_of?(Hash) + string = options["String"] + match = options["Match"] if options.has_key?("Match") + time_out = options["Timeout"] if options.has_key?("Timeout") + else + string = options + end + + self.puts(string) + if block_given? + waitfor({"Prompt" => match, "Timeout" => time_out}){|c| yield c } + else + waitfor({"Prompt" => match, "Timeout" => time_out}) + end + end + + # Login to the host with a given username and password. + # + # The username and password can either be provided as two string + # arguments in that order, or as a hash with keys "Name" and + # "Password". + # + # This method looks for the strings "login" and "Password" from the + # host to determine when to send the username and password. If the + # login sequence does not follow this pattern (for instance, you + # are connecting to a service other than telnet), you will need + # to handle login yourself. + # + # The password can be omitted, either by only + # provided one String argument, which will be used as the username, + # or by providing a has that has no "Password" key. In this case, + # the method will not look for the "Password:" prompt; if it is + # sent, it will have to be dealt with by later calls. + # + # The method returns all data received during the login process from + # the host, including the echoed username but not the password (which + # the host should not echo). If a block is passed in, this received + # data is also yielded to the block as it is received. + def login(options, password = nil) # :yield: recvdata + login_prompt = /[Ll]ogin[: ]*\z/n + password_prompt = /[Pp]ass(?:word|phrase)[: ]*\z/n + if options.kind_of?(Hash) + username = options["Name"] + password = options["Password"] + login_prompt = options["LoginPrompt"] if options["LoginPrompt"] + password_prompt = options["PasswordPrompt"] if options["PasswordPrompt"] + else + username = options + end + + if block_given? + line = waitfor(login_prompt){|c| yield c } + if password + line += cmd({"String" => username, + "Match" => password_prompt}){|c| yield c } + line += cmd(password){|c| yield c } + else + line += cmd(username){|c| yield c } + end + else + line = waitfor(login_prompt) + if password + line += cmd({"String" => username, + "Match" => password_prompt}) + line += cmd(password) + else + line += cmd(username) + end + end + line + end + + end # class Telnet +end # module Net + diff --git a/lib/observer.rb b/lib/observer.rb new file mode 100644 index 0000000..472a154 --- /dev/null +++ b/lib/observer.rb @@ -0,0 +1,193 @@ +# +# observer.rb implements the _Observer_ object-oriented design pattern. The +# following documentation is copied, with modifications, from "Programming +# Ruby", by Hunt and Thomas; http://www.rubycentral.com/book/lib_patterns.html. +# +# == About +# +# The Observer pattern, also known as Publish/Subscribe, provides a simple +# mechanism for one object to inform a set of interested third-party objects +# when its state changes. +# +# == Mechanism +# +# In the Ruby implementation, the notifying class mixes in the +Observable+ +# module, which provides the methods for managing the associated observer +# objects. +# +# The observers must implement the +update+ method to receive notifications. +# +# The observable object must: +# * assert that it has +changed+ +# * call +notify_observers+ +# +# == Example +# +# The following example demonstrates this nicely. A +Ticker+, when run, +# continually receives the stock +Price+ for its +@symbol+. A +Warner+ is a +# general observer of the price, and two warners are demonstrated, a +WarnLow+ +# and a +WarnHigh+, which print a warning if the price is below or above their +# set limits, respectively. +# +# The +update+ callback allows the warners to run without being explicitly +# called. The system is set up with the +Ticker+ and several observers, and the +# observers do their duty without the top-level code having to interfere. +# +# Note that the contract between publisher and subscriber (observable and +# observer) is not declared or enforced. The +Ticker+ publishes a time and a +# price, and the warners receive that. But if you don't ensure that your +# contracts are correct, nothing else can warn you. +# +# require "observer" +# +# class Ticker ### Periodically fetch a stock price. +# include Observable +# +# def initialize(symbol) +# @symbol = symbol +# end +# +# def run +# lastPrice = nil +# loop do +# price = Price.fetch(@symbol) +# print "Current price: #{price}\n" +# if price != lastPrice +# changed # notify observers +# lastPrice = price +# notify_observers(Time.now, price) +# end +# sleep 1 +# end +# end +# end +# +# class Price ### A mock class to fetch a stock price (60 - 140). +# def Price.fetch(symbol) +# 60 + rand(80) +# end +# end +# +# class Warner ### An abstract observer of Ticker objects. +# def initialize(ticker, limit) +# @limit = limit +# ticker.add_observer(self) +# end +# end +# +# class WarnLow < Warner +# def update(time, price) # callback for observer +# if price < @limit +# print "--- #{time.to_s}: Price below #@limit: #{price}\n" +# end +# end +# end +# +# class WarnHigh < Warner +# def update(time, price) # callback for observer +# if price > @limit +# print "+++ #{time.to_s}: Price above #@limit: #{price}\n" +# end +# end +# end +# +# ticker = Ticker.new("MSFT") +# WarnLow.new(ticker, 80) +# WarnHigh.new(ticker, 120) +# ticker.run +# +# Produces: +# +# Current price: 83 +# Current price: 75 +# --- Sun Jun 09 00:10:25 CDT 2002: Price below 80: 75 +# Current price: 90 +# Current price: 134 +# +++ Sun Jun 09 00:10:25 CDT 2002: Price above 120: 134 +# Current price: 134 +# Current price: 112 +# Current price: 79 +# --- Sun Jun 09 00:10:25 CDT 2002: Price below 80: 79 + + +# +# Implements the Observable design pattern as a mixin so that other objects can +# be notified of changes in state. See observer.rb for details and an example. +# +module Observable + + # + # Add +observer+ as an observer on this object. +observer+ will now receive + # notifications. The second optional argument specifies a method to notify + # updates, of which default value is +update+. + # + def add_observer(observer, func=:update) + @observer_peers = {} unless defined? @observer_peers + unless observer.respond_to? func + raise NoMethodError, "observer does not respond to `#{func.to_s}'" + end + @observer_peers[observer] = func + end + + # + # Delete +observer+ as an observer on this object. It will no longer receive + # notifications. + # + def delete_observer(observer) + @observer_peers.delete observer if defined? @observer_peers + end + + # + # Delete all observers associated with this object. + # + def delete_observers + @observer_peers.clear if defined? @observer_peers + end + + # + # Return the number of observers associated with this object. + # + def count_observers + if defined? @observer_peers + @observer_peers.size + else + 0 + end + end + + # + # Set the changed state of this object. Notifications will be sent only if + # the changed +state+ is +true+. + # + def changed(state=true) + @observer_state = state + end + + # + # Query the changed state of this object. + # + def changed? + if defined? @observer_state and @observer_state + true + else + false + end + end + + # + # If this object's changed state is +true+, invoke the update method in each + # currently associated observer in turn, passing it the given arguments. The + # changed state is then set to +false+. + # + def notify_observers(*arg) + if defined? @observer_state and @observer_state + if defined? @observer_peers + @observer_peers.each { |k, v| + k.send v, *arg + } + end + @observer_state = false + end + end + +end diff --git a/lib/open-uri.rb b/lib/open-uri.rb new file mode 100644 index 0000000..b426455 --- /dev/null +++ b/lib/open-uri.rb @@ -0,0 +1,832 @@ +require 'uri' +require 'stringio' +require 'time' + +module Kernel + private + alias open_uri_original_open open # :nodoc: + class << self + alias open_uri_original_open open # :nodoc: + end + + # makes possible to open various resources including URIs. + # If the first argument respond to `open' method, + # the method is called with the rest arguments. + # + # If the first argument is a string which begins with xxx://, + # it is parsed by URI.parse. If the parsed object respond to `open' method, + # the method is called with the rest arguments. + # + # Otherwise original open is called. + # + # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and + # URI::FTP#open, + # Kernel[#.]open can accepts such URIs and strings which begins with + # http://, https:// and ftp://. + # In these case, the opened file object is extended by OpenURI::Meta. + def open(name, *rest, &block) # :doc: + if name.respond_to?(:open) + name.open(*rest, &block) + elsif name.respond_to?(:to_str) && + %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name && + (uri = URI.parse(name)).respond_to?(:open) + uri.open(*rest, &block) + else + open_uri_original_open(name, *rest, &block) + end + end + module_function :open +end + +# OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp. +# +#== Example +# +# It is possible to open http/https/ftp URL as usual like opening a file: +# +# open("http://www.ruby-lang.org/") {|f| +# f.each_line {|line| p line} +# } +# +# The opened file has several methods for meta information as follows since +# it is extended by OpenURI::Meta. +# +# open("http://www.ruby-lang.org/en") {|f| +# f.each_line {|line| p line} +# p f.base_uri # +# p f.content_type # "text/html" +# p f.charset # "iso-8859-1" +# p f.content_encoding # [] +# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002 +# } +# +# Additional header fields can be specified by an optional hash argument. +# +# open("http://www.ruby-lang.org/en/", +# "User-Agent" => "Ruby/#{RUBY_VERSION}", +# "From" => "foo@bar.invalid", +# "Referer" => "http://www.ruby-lang.org/") {|f| +# # ... +# } +# +# The environment variables such as http_proxy, https_proxy and ftp_proxy +# are in effect by default. :proxy => nil disables proxy. +# +# open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f| +# # ... +# } +# +# URI objects can be opened in a similar way. +# +# uri = URI.parse("http://www.ruby-lang.org/en/") +# uri.open {|f| +# # ... +# } +# +# URI objects can be read directly. The returned string is also extended by +# OpenURI::Meta. +# +# str = uri.read +# p str.base_uri +# +# Author:: Tanaka Akira + +module OpenURI + Options = { + :proxy => true, + :proxy_http_basic_authentication => true, + :progress_proc => true, + :content_length_proc => true, + :http_basic_authentication => true, + :read_timeout => true, + :ssl_ca_cert => nil, + :ssl_verify_mode => nil, + :ftp_active_mode => false, + :redirect => true, + } + + def OpenURI.check_options(options) # :nodoc: + options.each {|k, v| + next unless Symbol === k + unless Options.include? k + raise ArgumentError, "unrecognized option: #{k}" + end + } + end + + def OpenURI.scan_open_optional_arguments(*rest) # :nodoc: + if !rest.empty? && (String === rest.first || Integer === rest.first) + mode = rest.shift + if !rest.empty? && Integer === rest.first + perm = rest.shift + end + end + return mode, perm, rest + end + + def OpenURI.open_uri(name, *rest) # :nodoc: + uri = URI::Generic === name ? name : URI.parse(name) + mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest) + options = rest.shift if !rest.empty? && Hash === rest.first + raise ArgumentError.new("extra arguments") if !rest.empty? + options ||= {} + OpenURI.check_options(options) + + if /\Arb?(?:\Z|:([^:]+))/ =~ mode + encoding, = $1,Encoding.find($1) if $1 + mode = nil + end + + unless mode == nil || + mode == 'r' || mode == 'rb' || + mode == File::RDONLY + raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)") + end + + io = open_loop(uri, options) + io.set_encoding(encoding) if encoding + if block_given? + begin + yield io + ensure + io.close + end + else + io + end + end + + def OpenURI.open_loop(uri, options) # :nodoc: + proxy_opts = [] + proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication + proxy_opts << :proxy if options.include? :proxy + proxy_opts.compact! + if 1 < proxy_opts.length + raise ArgumentError, "multiple proxy options specified" + end + case proxy_opts.first + when :proxy_http_basic_authentication + opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication) + proxy_user = proxy_user.to_str + proxy_pass = proxy_pass.to_str + if opt_proxy == true + raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}") + end + when :proxy + opt_proxy = options.fetch(:proxy) + proxy_user = nil + proxy_pass = nil + when nil + opt_proxy = true + proxy_user = nil + proxy_pass = nil + end + case opt_proxy + when true + find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil} + when nil, false + find_proxy = lambda {|u| nil} + when String + opt_proxy = URI.parse(opt_proxy) + find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]} + when URI::Generic + find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]} + else + raise ArgumentError.new("Invalid proxy option: #{opt_proxy}") + end + + uri_set = {} + buf = nil + while true + redirect = catch(:open_uri_redirect) { + buf = Buffer.new + uri.buffer_open(buf, find_proxy.call(uri), options) + nil + } + if redirect + if redirect.relative? + # Although it violates RFC2616, Location: field may have relative + # URI. It is converted to absolute URI using uri as a base URI. + redirect = uri + redirect + end + if !options.fetch(:redirect, true) + raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect) + end + unless OpenURI.redirectable?(uri, redirect) + raise "redirection forbidden: #{uri} -> #{redirect}" + end + if options.include? :http_basic_authentication + # send authentication only for the URI directly specified. + options = options.dup + options.delete :http_basic_authentication + end + uri = redirect + raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s + uri_set[uri.to_s] = true + else + break + end + end + io = buf.io + io.base_uri = uri + io + end + + def OpenURI.redirectable?(uri1, uri2) # :nodoc: + # This test is intended to forbid a redirection from http://... to + # file:///etc/passwd. + # https to http redirect is also forbidden intentionally. + # It avoids sending secure cookie or referer by non-secure HTTP protocol. + # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3) + # However this is ad hoc. It should be extensible/configurable. + uri1.scheme.downcase == uri2.scheme.downcase || + (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme) + end + + def OpenURI.open_http(buf, target, proxy, options) # :nodoc: + if proxy + proxy_uri, proxy_user, proxy_pass = proxy + raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP + end + + if target.userinfo && "1.9.0" <= RUBY_VERSION + # don't raise for 1.8 because compatibility. + raise ArgumentError, "userinfo not supported. [RFC3986]" + end + + header = {} + options.each {|k, v| header[k] = v if String === k } + + require 'net/http' + klass = Net::HTTP + if URI::HTTP === target + # HTTP or HTTPS + if proxy + if proxy_user && proxy_pass + klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port, proxy_user, proxy_pass) + else + klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port) + end + end + target_host = target.host + target_port = target.port + request_uri = target.request_uri + else + # FTP over HTTP proxy + target_host = proxy_uri.host + target_port = proxy_uri.port + request_uri = target.to_s + if proxy_user && proxy_pass + header["Proxy-Authorization"] = 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m').delete("\r\n") + end + end + + http = klass.new(target_host, target_port) + if target.class == URI::HTTPS + require 'net/https' + http.use_ssl = true + http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER + store = OpenSSL::X509::Store.new + if options[:ssl_ca_cert] + if File.directory? options[:ssl_ca_cert] + store.add_path options[:ssl_ca_cert] + else + store.add_file options[:ssl_ca_cert] + end + else + store.set_default_paths + end + http.cert_store = store + end + if options.include? :read_timeout + http.read_timeout = options[:read_timeout] + end + + resp = nil + http.start { + req = Net::HTTP::Get.new(request_uri, header) + if options.include? :http_basic_authentication + user, pass = options[:http_basic_authentication] + req.basic_auth user, pass + end + http.request(req) {|response| + resp = response + if options[:content_length_proc] && Net::HTTPSuccess === resp + if resp.key?('Content-Length') + options[:content_length_proc].call(resp['Content-Length'].to_i) + else + options[:content_length_proc].call(nil) + end + end + resp.read_body {|str| + buf << str + if options[:progress_proc] && Net::HTTPSuccess === resp + options[:progress_proc].call(buf.size) + end + } + } + } + io = buf.io + io.rewind + io.status = [resp.code, resp.message] + resp.each {|name,value| buf.io.meta_add_field name, value } + case resp + when Net::HTTPSuccess + when Net::HTTPMovedPermanently, # 301 + Net::HTTPFound, # 302 + Net::HTTPSeeOther, # 303 + Net::HTTPTemporaryRedirect # 307 + begin + loc_uri = URI.parse(resp['location']) + rescue URI::InvalidURIError + raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io) + end + throw :open_uri_redirect, loc_uri + else + raise OpenURI::HTTPError.new(io.status.join(' '), io) + end + end + + class HTTPError < StandardError + def initialize(message, io) + super(message) + @io = io + end + attr_reader :io + end + + class HTTPRedirect < HTTPError + def initialize(message, io, uri) + super(message, io) + @uri = uri + end + attr_reader :uri + end + + class Buffer # :nodoc: + def initialize + @io = StringIO.new + @size = 0 + end + attr_reader :size + + StringMax = 10240 + def <<(str) + @io << str + @size += str.length + if StringIO === @io && StringMax < @size + require 'tempfile' + io = Tempfile.new('open-uri') + io.binmode + Meta.init io, @io if Meta === @io + io << @io.string + @io = io + end + end + + def io + Meta.init @io unless Meta === @io + @io + end + end + + # Mixin for holding meta-information. + module Meta + def Meta.init(obj, src=nil) # :nodoc: + obj.extend Meta + obj.instance_eval { + @base_uri = nil + @meta = {} + } + if src + obj.status = src.status + obj.base_uri = src.base_uri + src.meta.each {|name, value| + obj.meta_add_field(name, value) + } + end + end + + # returns an Array which consists status code and message. + attr_accessor :status + + # returns a URI which is base of relative URIs in the data. + # It may differ from the URI supplied by a user because redirection. + attr_accessor :base_uri + + # returns a Hash which represents header fields. + # The Hash keys are downcased for canonicalization. + attr_reader :meta + + def meta_setup_encoding # :nodoc: + charset = self.charset + enc = nil + if charset + begin + enc = Encoding.find(charset) + rescue ArgumentError + end + end + enc = Encoding::ASCII_8BIT unless enc + if self.respond_to? :force_encoding + self.force_encoding(enc) + elsif self.respond_to? :string + self.string.force_encoding(enc) + else # Tempfile + self.set_encoding enc + end + end + + def meta_add_field(name, value) # :nodoc: + name = name.downcase + @meta[name] = value + meta_setup_encoding if name == 'content-type' + end + + # returns a Time which represents Last-Modified field. + def last_modified + if v = @meta['last-modified'] + Time.httpdate(v) + else + nil + end + end + + RE_LWS = /[\r\n\t ]+/n + RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n + RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n + RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n + + def content_type_parse # :nodoc: + v = @meta['content-type'] + # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045. + if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v + type = $1.downcase + subtype = $2.downcase + parameters = [] + $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval| + val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& } if qval + parameters << [att.downcase, val] + } + ["#{type}/#{subtype}", *parameters] + else + nil + end + end + + # returns "type/subtype" which is MIME Content-Type. + # It is downcased for canonicalization. + # Content-Type parameters are stripped. + def content_type + type, *parameters = content_type_parse + type || 'application/octet-stream' + end + + # returns a charset parameter in Content-Type field. + # It is downcased for canonicalization. + # + # If charset parameter is not given but a block is given, + # the block is called and its result is returned. + # It can be used to guess charset. + # + # If charset parameter and block is not given, + # nil is returned except text type in HTTP. + # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1. + def charset + type, *parameters = content_type_parse + if pair = parameters.assoc('charset') + pair.last.downcase + elsif block_given? + yield + elsif type && %r{\Atext/} =~ type && + @base_uri && /\Ahttp\z/i =~ @base_uri.scheme + "iso-8859-1" # RFC2616 3.7.1 + else + nil + end + end + + # returns a list of encodings in Content-Encoding field + # as an Array of String. + # The encodings are downcased for canonicalization. + def content_encoding + v = @meta['content-encoding'] + if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v + v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase} + else + [] + end + end + end + + # Mixin for HTTP and FTP URIs. + module OpenRead + # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP. + # + # OpenURI::OpenRead#open takes optional 3 arguments as: + # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }] + # + # `mode', `perm' is same as Kernel#open. + # + # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't + # support write mode (yet). + # Also `perm' is just ignored because it is meaningful only for file + # creation. + # + # `options' must be a hash. + # + # Each pairs which key is a string in the hash specify a extra header + # field for HTTP. + # I.e. it is ignored for FTP without HTTP proxy. + # + # The hash may include other options which key is a symbol: + # + # [:proxy] + # Synopsis: + # :proxy => "http://proxy.foo.com:8000/" + # :proxy => URI.parse("http://proxy.foo.com:8000/") + # :proxy => true + # :proxy => false + # :proxy => nil + # + # If :proxy option is specified, the value should be String, URI, + # boolean or nil. + # When String or URI is given, it is treated as proxy URI. + # When true is given or the option itself is not specified, + # environment variable `scheme_proxy' is examined. + # `scheme' is replaced by `http', `https' or `ftp'. + # When false or nil is given, the environment variables are ignored and + # connection will be made to a server directly. + # + # [:proxy_http_basic_authentication] + # Synopsis: + # :proxy_http_basic_authentication => ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"] + # :proxy_http_basic_authentication => [URI.parse("http://proxy.foo.com:8000/"), "proxy-user", "proxy-password"] + # + # If :proxy option is specified, the value should be an Array with 3 elements. + # It should contain a proxy URI, a proxy user name and a proxy password. + # The proxy URI should be a String, an URI or nil. + # The proxy user name and password should be a String. + # + # If nil is given for the proxy URI, this option is just ignored. + # + # If :proxy and :proxy_http_basic_authentication is specified, + # ArgumentError is raised. + # + # [:http_basic_authentication] + # Synopsis: + # :http_basic_authentication=>[user, password] + # + # If :http_basic_authentication is specified, + # the value should be an array which contains 2 strings: + # username and password. + # It is used for HTTP Basic authentication defined by RFC 2617. + # + # [:content_length_proc] + # Synopsis: + # :content_length_proc => lambda {|content_length| ... } + # + # If :content_length_proc option is specified, the option value procedure + # is called before actual transfer is started. + # It takes one argument which is expected content length in bytes. + # + # If two or more transfer is done by HTTP redirection, the procedure + # is called only one for a last transfer. + # + # When expected content length is unknown, the procedure is called with + # nil. + # It is happen when HTTP response has no Content-Length header. + # + # [:progress_proc] + # Synopsis: + # :progress_proc => lambda {|size| ...} + # + # If :progress_proc option is specified, the proc is called with one + # argument each time when `open' gets content fragment from network. + # The argument `size' `size' is a accumulated transfered size in bytes. + # + # If two or more transfer is done by HTTP redirection, the procedure + # is called only one for a last transfer. + # + # :progress_proc and :content_length_proc are intended to be used for + # progress bar. + # For example, it can be implemented as follows using Ruby/ProgressBar. + # + # pbar = nil + # open("http://...", + # :content_length_proc => lambda {|t| + # if t && 0 < t + # pbar = ProgressBar.new("...", t) + # pbar.file_transfer_mode + # end + # }, + # :progress_proc => lambda {|s| + # pbar.set s if pbar + # }) {|f| ... } + # + # [:read_timeout] + # Synopsis: + # :read_timeout=>nil (no timeout) + # :read_timeout=>10 (10 second) + # + # :read_timeout option specifies a timeout of read for http connections. + # + # [:ssl_ca_cert] + # Synopsis: + # :ssl_ca_cert=>filename + # + # :ssl_ca_cert is used to specify CA certificate for SSL. + # If it is given, default certificates are not used. + # + # [:ssl_verify_mode] + # Synopsis: + # :ssl_verify_mode=>mode + # + # :ssl_verify_mode is used to specify openssl verify mode. + # + # OpenURI::OpenRead#open returns an IO like object if block is not given. + # Otherwise it yields the IO object and return the value of the block. + # The IO object is extended with OpenURI::Meta. + # + # [:ftp_active_mode] + # Synopsis: + # :ftp_active_mode=>bool + # + # :ftp_active_mode=>true is used to make ftp active mode. + # Note that the active mode is default in Ruby 1.8 or prior. + # Ruby 1.9 uses passive mode by default. + # + # [:redirect] + # Synopsis: + # :redirect=>bool + # + # :redirect=>false is used to disable HTTP redirects at all. + # OpenURI::HTTPRedirect exception raised on redirection. + # It is true by default. + # The true means redirections between http and ftp is permitted. + # + def open(*rest, &block) + OpenURI.open_uri(self, *rest, &block) + end + + # OpenURI::OpenRead#read([options]) reads a content referenced by self and + # returns the content as string. + # The string is extended with OpenURI::Meta. + # The argument `options' is same as OpenURI::OpenRead#open. + def read(options={}) + self.open(options) {|f| + str = f.read + Meta.init str, f + str + } + end + end +end + +module URI + class Generic + # returns a proxy URI. + # The proxy URI is obtained from environment variables such as http_proxy, + # ftp_proxy, no_proxy, etc. + # If there is no proper proxy, nil is returned. + # + # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.) + # are examined too. + # + # But http_proxy and HTTP_PROXY is treated specially under CGI environment. + # It's because HTTP_PROXY may be set by Proxy: header. + # So HTTP_PROXY is not used. + # http_proxy is not used too if the variable is case insensitive. + # CGI_HTTP_PROXY can be used instead. + def find_proxy + name = self.scheme.downcase + '_proxy' + proxy_uri = nil + if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI? + # HTTP_PROXY conflicts with *_proxy for proxy settings and + # HTTP_* for header information in CGI. + # So it should be careful to use it. + pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k } + case pairs.length + when 0 # no proxy setting anyway. + proxy_uri = nil + when 1 + k, v = pairs.shift + if k == 'http_proxy' && ENV[k.upcase] == nil + # http_proxy is safe to use because ENV is case sensitive. + proxy_uri = ENV[name] + else + proxy_uri = nil + end + else # http_proxy is safe to use because ENV is case sensitive. + proxy_uri = ENV.to_hash[name] + end + if !proxy_uri + # Use CGI_HTTP_PROXY. cf. libwww-perl. + proxy_uri = ENV["CGI_#{name.upcase}"] + end + elsif name == 'http_proxy' + unless proxy_uri = ENV[name] + if proxy_uri = ENV[name.upcase] + warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.' + end + end + else + proxy_uri = ENV[name] || ENV[name.upcase] + end + + if proxy_uri && self.host + require 'socket' + begin + addr = IPSocket.getaddress(self.host) + proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr + rescue SocketError + end + end + + if proxy_uri + proxy_uri = URI.parse(proxy_uri) + name = 'no_proxy' + if no_proxy = ENV[name] || ENV[name.upcase] + no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port| + if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host && + (!port || self.port == port.to_i) + proxy_uri = nil + break + end + } + end + proxy_uri + else + nil + end + end + end + + class HTTP + def buffer_open(buf, proxy, options) # :nodoc: + OpenURI.open_http(buf, self, proxy, options) + end + + include OpenURI::OpenRead + end + + class FTP + def buffer_open(buf, proxy, options) # :nodoc: + if proxy + OpenURI.open_http(buf, self, proxy, options) + return + end + require 'net/ftp' + + path = self.path + path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it. + directories = path.split(%r{/}, -1) + directories.each {|d| + d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") } + } + unless filename = directories.pop + raise ArgumentError, "no filename: #{self.inspect}" + end + directories.each {|d| + if /[\r\n]/ =~ d + raise ArgumentError, "invalid directory: #{d.inspect}" + end + } + if /[\r\n]/ =~ filename + raise ArgumentError, "invalid filename: #{filename.inspect}" + end + typecode = self.typecode + if typecode && /\A[aid]\z/ !~ typecode + raise ArgumentError, "invalid typecode: #{typecode.inspect}" + end + + # The access sequence is defined by RFC 1738 + ftp = Net::FTP.new + ftp.connect(self.host, self.port) + ftp.passive = true if !options[:ftp_active_mode] + # todo: extract user/passwd from .netrc. + user = 'anonymous' + passwd = nil + user, passwd = self.userinfo.split(/:/) if self.userinfo + ftp.login(user, passwd) + directories.each {|cwd| + ftp.voidcmd("CWD #{cwd}") + } + if typecode + # xxx: typecode D is not handled. + ftp.voidcmd("TYPE #{typecode.upcase}") + end + if options[:content_length_proc] + options[:content_length_proc].call(ftp.size(filename)) + end + ftp.retrbinary("RETR #{filename}", 4096) { |str| + buf << str + options[:progress_proc].call(buf.size) if options[:progress_proc] + } + ftp.close + buf.io.rewind + end + + include OpenURI::OpenRead + end +end diff --git a/lib/open3.rb b/lib/open3.rb new file mode 100644 index 0000000..d776de7 --- /dev/null +++ b/lib/open3.rb @@ -0,0 +1,98 @@ +# +# = open3.rb: Popen, but with stderr, too +# +# Author:: Yukihiro Matsumoto +# Documentation:: Konrad Meyer +# +# Open3 gives you access to stdin, stdout, and stderr when running other +# programs. +# + +# +# Open3 grants you access to stdin, stdout, stderr and a thread to wait the +# child process when running another program. +# +# Example: +# +# require "open3" +# include Open3 +# +# stdin, stdout, stderr, wait_thr = popen3('nroff -man') +# +# Open3.popen3 can also take a block which will receive stdin, stdout, +# stderr and wait_thr as parameters. +# This ensures stdin, stdout and stderr are closed and +# the process is terminated once the block exits. +# +# Example: +# +# require "open3" +# +# Open3.popen3('nroff -man') { |stdin, stdout, stderr, wait_thr| ... } +# + +module Open3 + # + # Open stdin, stdout, and stderr streams and start external executable. + # In addition, a thread for waiting the started process is noticed. + # The thread has a thread variable :pid which is the pid of the started + # process. + # + # Non-block form: + # + # stdin, stdout, stderr, wait_thr = Open3.popen3(cmd) + # pid = wait_thr[:pid] # pid of the started process. + # ... + # stdin.close # stdin, stdout and stderr should be closed in this form. + # stdout.close + # stderr.close + # exit_status = wait_thr.value # Process::Status object returned. + # + # Block form: + # + # Open3.popen3(cmd) { |stdin, stdout, stderr, wait_thr| ... } + # + # The parameter +cmd+ is passed directly to Kernel#spawn. + # + # wait_thr.value waits the termination of the process. + # The block form also waits the process when it returns. + # + # Closing stdin, stdout and stderr does not wait the process. + # + def popen3(*cmd) + pw = IO::pipe # pipe[0] for read, pipe[1] for write + pr = IO::pipe + pe = IO::pipe + + pid = spawn(*cmd, STDIN=>pw[0], STDOUT=>pr[1], STDERR=>pe[1]) + wait_thr = Process.detach(pid) + pw[0].close + pr[1].close + pe[1].close + pi = [pw[1], pr[0], pe[0], wait_thr] + pw[1].sync = true + if defined? yield + begin + return yield(*pi) + ensure + [pw[1], pr[0], pe[0]].each{|p| p.close unless p.closed?} + wait_thr.join + end + end + pi + end + module_function :popen3 +end + +if $0 == __FILE__ + a = Open3.popen3("nroff -man") + Thread.start do + while line = gets + a[0].print line + end + a[0].close + end + while line = a[1].gets + print ":", line + end +end diff --git a/lib/optparse.rb b/lib/optparse.rb new file mode 100644 index 0000000..2fe3c3b --- /dev/null +++ b/lib/optparse.rb @@ -0,0 +1,1810 @@ +# +# optparse.rb - command-line option analysis with the OptionParser class. +# +# Author:: Nobu Nakada +# Documentation:: Nobu Nakada and Gavin Sinclair. +# +# See OptionParser for documentation. +# + + +# == Developer Documentation (not for RDoc output) +# +# === Class tree +# +# - OptionParser:: front end +# - OptionParser::Switch:: each switches +# - OptionParser::List:: options list +# - OptionParser::ParseError:: errors on parsing +# - OptionParser::AmbiguousOption +# - OptionParser::NeedlessArgument +# - OptionParser::MissingArgument +# - OptionParser::InvalidOption +# - OptionParser::InvalidArgument +# - OptionParser::AmbiguousArgument +# +# === Object relationship diagram +# +# +--------------+ +# | OptionParser |<>-----+ +# +--------------+ | +--------+ +# | ,-| Switch | +# on_head -------->+---------------+ / +--------+ +# accept/reject -->| List |<|>- +# | |<|>- +----------+ +# on ------------->+---------------+ `-| argument | +# : : | class | +# +---------------+ |==========| +# on_tail -------->| | |pattern | +# +---------------+ |----------| +# OptionParser.accept ->| DefaultList | |converter | +# reject |(shared between| +----------+ +# | all instances)| +# +---------------+ +# +# == OptionParser +# +# === Introduction +# +# OptionParser is a class for command-line option analysis. It is much more +# advanced, yet also easier to use, than GetoptLong, and is a more Ruby-oriented +# solution. +# +# === Features +# +# 1. The argument specification and the code to handle it are written in the +# same place. +# 2. It can output an option summary; you don't need to maintain this string +# separately. +# 3. Optional and mandatory arguments are specified very gracefully. +# 4. Arguments can be automatically converted to a specified class. +# 5. Arguments can be restricted to a certain set. +# +# All of these features are demonstrated in the examples below. +# +# === Minimal example +# +# require 'optparse' +# +# options = {} +# OptionParser.new do |opts| +# opts.banner = "Usage: example.rb [options]" +# +# opts.on("-v", "--[no-]verbose", "Run verbosely") do |v| +# options[:verbose] = v +# end +# end.parse! +# +# p options +# p ARGV +# +# === Complete example +# +# The following example is a complete Ruby program. You can run it and see the +# effect of specifying various options. This is probably the best way to learn +# the features of +optparse+. +# +# require 'optparse' +# require 'optparse/time' +# require 'ostruct' +# require 'pp' +# +# class OptparseExample +# +# CODES = %w[iso-2022-jp shift_jis euc-jp utf8 binary] +# CODE_ALIASES = { "jis" => "iso-2022-jp", "sjis" => "shift_jis" } +# +# # +# # Return a structure describing the options. +# # +# def self.parse(args) +# # The options specified on the command line will be collected in *options*. +# # We set default values here. +# options = OpenStruct.new +# options.library = [] +# options.inplace = false +# options.encoding = "utf8" +# options.transfer_type = :auto +# options.verbose = false +# +# opts = OptionParser.new do |opts| +# opts.banner = "Usage: example.rb [options]" +# +# opts.separator "" +# opts.separator "Specific options:" +# +# # Mandatory argument. +# opts.on("-r", "--require LIBRARY", +# "Require the LIBRARY before executing your script") do |lib| +# options.library << lib +# end +# +# # Optional argument; multi-line description. +# opts.on("-i", "--inplace [EXTENSION]", +# "Edit ARGV files in place", +# " (make backup if EXTENSION supplied)") do |ext| +# options.inplace = true +# options.extension = ext || '' +# options.extension.sub!(/\A\.?(?=.)/, ".") # Ensure extension begins with dot. +# end +# +# # Cast 'delay' argument to a Float. +# opts.on("--delay N", Float, "Delay N seconds before executing") do |n| +# options.delay = n +# end +# +# # Cast 'time' argument to a Time object. +# opts.on("-t", "--time [TIME]", Time, "Begin execution at given time") do |time| +# options.time = time +# end +# +# # Cast to octal integer. +# opts.on("-F", "--irs [OCTAL]", OptionParser::OctalInteger, +# "Specify record separator (default \\0)") do |rs| +# options.record_separator = rs +# end +# +# # List of arguments. +# opts.on("--list x,y,z", Array, "Example 'list' of arguments") do |list| +# options.list = list +# end +# +# # Keyword completion. We are specifying a specific set of arguments (CODES +# # and CODE_ALIASES - notice the latter is a Hash), and the user may provide +# # the shortest unambiguous text. +# code_list = (CODE_ALIASES.keys + CODES).join(',') +# opts.on("--code CODE", CODES, CODE_ALIASES, "Select encoding", +# " (#{code_list})") do |encoding| +# options.encoding = encoding +# end +# +# # Optional argument with keyword completion. +# opts.on("--type [TYPE]", [:text, :binary, :auto], +# "Select transfer type (text, binary, auto)") do |t| +# options.transfer_type = t +# end +# +# # Boolean switch. +# opts.on("-v", "--[no-]verbose", "Run verbosely") do |v| +# options.verbose = v +# end +# +# opts.separator "" +# opts.separator "Common options:" +# +# # No argument, shows at tail. This will print an options summary. +# # Try it and see! +# opts.on_tail("-h", "--help", "Show this message") do +# puts opts +# exit +# end +# +# # Another typical switch to print the version. +# opts.on_tail("--version", "Show version") do +# puts OptionParser::Version.join('.') +# exit +# end +# end +# +# opts.parse!(args) +# options +# end # parse() +# +# end # class OptparseExample +# +# options = OptparseExample.parse(ARGV) +# pp options +# +# === Further documentation +# +# The above examples should be enough to learn how to use this class. If you +# have any questions, email me (gsinclair@soyabean.com.au) and I will update +# this document. +# +class OptionParser + # :stopdoc: + RCSID = %w$Id: optparse.rb 23396 2009-05-11 15:05:43Z yugui $[1..-1].each {|s| s.freeze}.freeze + Version = (RCSID[1].split('.').collect {|s| s.to_i}.extend(Comparable).freeze if RCSID[1]) + LastModified = (Time.gm(*RCSID[2, 2].join('-').scan(/\d+/).collect {|s| s.to_i}) if RCSID[2]) + Release = RCSID[2] + + NoArgument = [NO_ARGUMENT = :NONE, nil].freeze + RequiredArgument = [REQUIRED_ARGUMENT = :REQUIRED, true].freeze + OptionalArgument = [OPTIONAL_ARGUMENT = :OPTIONAL, false].freeze + # :startdoc: + + # + # Keyword completion module. This allows partial arguments to be specified + # and resolved against a list of acceptable values. + # + module Completion + def complete(key, icase = false, pat = nil) + pat ||= Regexp.new('\A' + Regexp.quote(key).gsub(/\w+\b/, '\&\w*'), + icase) + canon, sw, cn = nil + candidates = [] + each do |k, *v| + (if Regexp === k + kn = nil + k === key + else + kn = defined?(k.id2name) ? k.id2name : k + pat === kn + end) or next + v << k if v.empty? + candidates << [k, v, kn] + end + candidates = candidates.sort_by {|k, v, kn| kn.size} + if candidates.size == 1 + canon, sw, * = candidates[0] + elsif candidates.size > 1 + canon, sw, cn = candidates.shift + candidates.each do |k, v, kn| + next if sw == v + if String === cn and String === kn + if cn.rindex(kn, 0) + canon, sw, cn = k, v, kn + next + elsif kn.rindex(cn, 0) + next + end + end + throw :ambiguous, key + end + end + if canon + block_given? or return key, *sw + yield(key, *sw) + end + end + + def convert(opt = nil, val = nil, *) + val + end + end + + + # + # Map from option/keyword string to object with completion. + # + class OptionMap < Hash + include Completion + end + + + # + # Individual switch class. Not important to the user. + # + # Defined within Switch are several Switch-derived classes: NoArgument, + # RequiredArgument, etc. + # + class Switch + attr_reader :pattern, :conv, :short, :long, :arg, :desc, :block + + # + # Guesses argument style from +arg+. Returns corresponding + # OptionParser::Switch class (OptionalArgument, etc.). + # + def self.guess(arg) + case arg + when "" + t = self + when /\A=?\[/ + t = Switch::OptionalArgument + when /\A\s+\[/ + t = Switch::PlacedArgument + else + t = Switch::RequiredArgument + end + self >= t or incompatible_argument_styles(arg, t) + t + end + + def self.incompatible_argument_styles(arg, t) + raise(ArgumentError, "#{arg}: incompatible argument styles\n #{self}, #{t}", + ParseError.filter_backtrace(caller(2))) + end + + def self.pattern + NilClass + end + + def initialize(pattern = nil, conv = nil, + short = nil, long = nil, arg = nil, + desc = ([] if short or long), block = Proc.new) + raise if Array === pattern + @pattern, @conv, @short, @long, @arg, @desc, @block = + pattern, conv, short, long, arg, desc, block + end + + # + # Parses +arg+ and returns rest of +arg+ and matched portion to the + # argument pattern. Yields when the pattern doesn't match substring. + # + def parse_arg(arg) + pattern or return nil, [arg] + unless m = pattern.match(arg) + yield(InvalidArgument, arg) + return arg, [] + end + if String === m + m = [s = m] + else + m = m.to_a + s = m[0] + return nil, m unless String === s + end + raise InvalidArgument, arg unless arg.rindex(s, 0) + return nil, m if s.length == arg.length + yield(InvalidArgument, arg) # didn't match whole arg + return arg[s.length..-1], m + end + private :parse_arg + + # + # Parses argument, converts and returns +arg+, +block+ and result of + # conversion. Yields at semi-error condition instead of raising an + # exception. + # + def conv_arg(arg, val = []) + if conv + val = conv.call(*val) + else + val = proc {|v| v}.call(*val) + end + return arg, block, val + end + private :conv_arg + + # + # Produces the summary text. Each line of the summary is yielded to the + # block (without newline). + # + # +sdone+:: Already summarized short style options keyed hash. + # +ldone+:: Already summarized long style options keyed hash. + # +width+:: Width of left side (option part). In other words, the right + # side (description part) starts after +width+ columns. + # +max+:: Maximum width of left side -> the options are filled within + # +max+ columns. + # +indent+:: Prefix string indents all summarized lines. + # + def summarize(sdone = [], ldone = [], width = 1, max = width - 1, indent = "") + sopts, lopts = [], [], nil + @short.each {|s| sdone.fetch(s) {sopts << s}; sdone[s] = true} if @short + @long.each {|s| ldone.fetch(s) {lopts << s}; ldone[s] = true} if @long + return if sopts.empty? and lopts.empty? # completely hidden + + left = [sopts.join(', ')] + right = desc.dup + + while s = lopts.shift + l = left[-1].length + s.length + l += arg.length if left.size == 1 && arg + l < max or sopts.empty? or left << '' + left[-1] << if left[-1].empty? then ' ' * 4 else ', ' end << s + end + + left[0] << arg if arg + mlen = left.collect {|ss| ss.length}.max.to_i + while mlen > width and l = left.shift + mlen = left.collect {|ss| ss.length}.max.to_i if l.length == mlen + yield(indent + l) + end + + while begin l = left.shift; r = right.shift; l or r end + l = l.to_s.ljust(width) + ' ' + r if r and !r.empty? + yield(indent + l) + end + + self + end + + def add_banner(to) # :nodoc: + unless @short or @long + s = desc.join + to << " [" + s + "]..." unless s.empty? + end + to + end + + def match_nonswitch?(str) # :nodoc: + @pattern =~ str unless @short or @long + end + + # + # Main name of the switch. + # + def switch_name + (long.first || short.first).sub(/\A-+(?:\[no-\])?/, '') + end + + # + # Switch that takes no arguments. + # + class NoArgument < self + + # + # Raises an exception if any arguments given. + # + def parse(arg, argv) + yield(NeedlessArgument, arg) if arg + conv_arg(arg) + end + + def self.incompatible_argument_styles(*) + end + + def self.pattern + Object + end + end + + # + # Switch that takes an argument. + # + class RequiredArgument < self + + # + # Raises an exception if argument is not present. + # + def parse(arg, argv) + unless arg + raise MissingArgument if argv.empty? + arg = argv.shift + end + conv_arg(*parse_arg(arg, &method(:raise))) + end + end + + # + # Switch that can omit argument. + # + class OptionalArgument < self + + # + # Parses argument if given, or uses default value. + # + def parse(arg, argv, &error) + if arg + conv_arg(*parse_arg(arg, &error)) + else + conv_arg(arg) + end + end + end + + # + # Switch that takes an argument, which does not begin with '-'. + # + class PlacedArgument < self + + # + # Returns nil if argument is not present or begins with '-'. + # + def parse(arg, argv, &error) + if !(val = arg) and (argv.empty? or /\A-/ =~ (val = argv[0])) + return nil, block, nil + end + opt = (val = parse_arg(val, &error))[1] + val = conv_arg(*val) + if opt and !arg + argv.shift + else + val[0] = nil + end + val + end + end + end + + # + # Simple option list providing mapping from short and/or long option + # string to OptionParser::Switch and mapping from acceptable argument to + # matching pattern and converter pair. Also provides summary feature. + # + class List + # Map from acceptable argument types to pattern and converter pairs. + attr_reader :atype + + # Map from short style option switches to actual switch objects. + attr_reader :short + + # Map from long style option switches to actual switch objects. + attr_reader :long + + # List of all switches and summary string. + attr_reader :list + + # + # Just initializes all instance variables. + # + def initialize + @atype = {} + @short = OptionMap.new + @long = OptionMap.new + @list = [] + end + + # + # See OptionParser.accept. + # + def accept(t, pat = /.*/nm, &block) + if pat + pat.respond_to?(:match) or + raise TypeError, "has no `match'", ParseError.filter_backtrace(caller(2)) + else + pat = t if t.respond_to?(:match) + end + unless block + block = pat.method(:convert).to_proc if pat.respond_to?(:convert) + end + @atype[t] = [pat, block] + end + + # + # See OptionParser.reject. + # + def reject(t) + @atype.delete(t) + end + + # + # Adds +sw+ according to +sopts+, +lopts+ and +nlopts+. + # + # +sw+:: OptionParser::Switch instance to be added. + # +sopts+:: Short style option list. + # +lopts+:: Long style option list. + # +nlopts+:: Negated long style options list. + # + def update(sw, sopts, lopts, nsw = nil, nlopts = nil) + sopts.each {|o| @short[o] = sw} if sopts + lopts.each {|o| @long[o] = sw} if lopts + nlopts.each {|o| @long[o] = nsw} if nsw and nlopts + used = @short.invert.update(@long.invert) + @list.delete_if {|o| Switch === o and !used[o]} + end + private :update + + # + # Inserts +switch+ at the head of the list, and associates short, long + # and negated long options. Arguments are: + # + # +switch+:: OptionParser::Switch instance to be inserted. + # +short_opts+:: List of short style options. + # +long_opts+:: List of long style options. + # +nolong_opts+:: List of long style options with "no-" prefix. + # + # prepend(switch, short_opts, long_opts, nolong_opts) + # + def prepend(*args) + update(*args) + @list.unshift(args[0]) + end + + # + # Appends +switch+ at the tail of the list, and associates short, long + # and negated long options. Arguments are: + # + # +switch+:: OptionParser::Switch instance to be inserted. + # +short_opts+:: List of short style options. + # +long_opts+:: List of long style options. + # +nolong_opts+:: List of long style options with "no-" prefix. + # + # append(switch, short_opts, long_opts, nolong_opts) + # + def append(*args) + update(*args) + @list.push(args[0]) + end + + # + # Searches +key+ in +id+ list. The result is returned or yielded if a + # block is given. If it isn't found, nil is returned. + # + def search(id, key) + if list = __send__(id) + val = list.fetch(key) {return nil} + block_given? ? yield(val) : val + end + end + + # + # Searches list +id+ for +opt+ and the optional patterns for completion + # +pat+. If +icase+ is true, the search is case insensitive. The result + # is returned or yielded if a block is given. If it isn't found, nil is + # returned. + # + def complete(id, opt, icase = false, *pat, &block) + __send__(id).complete(opt, icase, *pat, &block) + end + + # + # Iterates over each option, passing the option to the +block+. + # + def each_option(&block) + list.each(&block) + end + + # + # Creates the summary table, passing each line to the +block+ (without + # newline). The arguments +args+ are passed along to the summarize + # method which is called on every option. + # + def summarize(*args, &block) + sum = [] + list.reverse_each do |opt| + if opt.respond_to?(:summarize) # perhaps OptionParser::Switch + s = [] + opt.summarize(*args) {|l| s << l} + sum.concat(s.reverse) + elsif !opt or opt.empty? + sum << "" + elsif opt.respond_to?(:each_line) + sum.concat([*opt.each_line].reverse) + else + sum.concat([*opt.each].reverse) + end + end + sum.reverse_each(&block) + end + + def add_banner(to) # :nodoc: + list.each do |opt| + if opt.respond_to?(:add_banner) + opt.add_banner(to) + end + end + to + end + end + + # + # Hash with completion search feature. See OptionParser::Completion. + # + class CompletingHash < Hash + include Completion + + # + # Completion for hash key. + # + def match(key) + *values = fetch(key) { + raise AmbiguousArgument, catch(:ambiguous) {return complete(key)} + } + return key, *values + end + end + + # :stopdoc: + + # + # Enumeration of acceptable argument styles. Possible values are: + # + # NO_ARGUMENT:: The switch takes no arguments. (:NONE) + # REQUIRED_ARGUMENT:: The switch requires an argument. (:REQUIRED) + # OPTIONAL_ARGUMENT:: The switch requires an optional argument. (:OPTIONAL) + # + # Use like --switch=argument (long style) or -Xargument (short style). For + # short style, only portion matched to argument pattern is dealed as + # argument. + # + ArgumentStyle = {} + NoArgument.each {|el| ArgumentStyle[el] = Switch::NoArgument} + RequiredArgument.each {|el| ArgumentStyle[el] = Switch::RequiredArgument} + OptionalArgument.each {|el| ArgumentStyle[el] = Switch::OptionalArgument} + ArgumentStyle.freeze + + # + # Switches common used such as '--', and also provides default + # argument classes + # + DefaultList = List.new + DefaultList.short['-'] = Switch::NoArgument.new {} + DefaultList.long[''] = Switch::NoArgument.new {throw :terminate} + + # + # Default options for ARGV, which never appear in option summary. + # + Officious = {} + + # + # --help + # Shows option summary. + # + Officious['help'] = proc do |parser| + Switch::NoArgument.new do + puts parser.help + exit + end + end + + # + # --version + # Shows version string if Version is defined. + # + Officious['version'] = proc do |parser| + Switch::OptionalArgument.new do |pkg| + if pkg + begin + require 'optparse/version' + rescue LoadError + else + show_version(*pkg.split(/,/)) or + abort("#{parser.program_name}: no version found in package #{pkg}") + exit + end + end + v = parser.ver or abort("#{parser.program_name}: version unknown") + puts v + exit + end + end + + # :startdoc: + + # + # Class methods + # + + # + # Initializes a new instance and evaluates the optional block in context + # of the instance. Arguments +args+ are passed to #new, see there for + # description of parameters. + # + # This method is *deprecated*, its behavior corresponds to the older #new + # method. + # + def self.with(*args, &block) + opts = new(*args) + opts.instance_eval(&block) + opts + end + + # + # Returns an incremented value of +default+ according to +arg+. + # + def self.inc(arg, default = nil) + case arg + when Integer + arg.nonzero? + when nil + default.to_i + 1 + end + end + def inc(*args) + self.class.inc(*args) + end + + # + # Initializes the instance and yields itself if called with a block. + # + # +banner+:: Banner message. + # +width+:: Summary width. + # +indent+:: Summary indent. + # + def initialize(banner = nil, width = 32, indent = ' ' * 4) + @stack = [DefaultList, List.new, List.new] + @program_name = nil + @banner = banner + @summary_width = width + @summary_indent = indent + @default_argv = ARGV + add_officious + yield self if block_given? + end + + def add_officious # :nodoc: + list = base() + Officious.each do |opt, block| + list.long[opt] ||= block.call(self) + end + end + + # + # Terminates option parsing. Optional parameter +arg+ is a string pushed + # back to be the first non-option argument. + # + def terminate(arg = nil) + self.class.terminate(arg) + end + def self.terminate(arg = nil) + throw :terminate, arg + end + + @stack = [DefaultList] + def self.top() DefaultList end + + # + # Directs to accept specified class +t+. The argument string is passed to + # the block in which it should be converted to the desired class. + # + # +t+:: Argument class specifier, any object including Class. + # +pat+:: Pattern for argument, defaults to +t+ if it responds to match. + # + # accept(t, pat, &block) + # + def accept(*args, &blk) top.accept(*args, &blk) end + # + # See #accept. + # + def self.accept(*args, &blk) top.accept(*args, &blk) end + + # + # Directs to reject specified class argument. + # + # +t+:: Argument class specifier, any object including Class. + # + # reject(t) + # + def reject(*args, &blk) top.reject(*args, &blk) end + # + # See #reject. + # + def self.reject(*args, &blk) top.reject(*args, &blk) end + + # + # Instance methods + # + + # Heading banner preceding summary. + attr_writer :banner + + # Program name to be emitted in error message and default banner, + # defaults to $0. + attr_writer :program_name + + # Width for option list portion of summary. Must be Numeric. + attr_accessor :summary_width + + # Indentation for summary. Must be String (or have + String method). + attr_accessor :summary_indent + + # Strings to be parsed in default. + attr_accessor :default_argv + + # + # Heading banner preceding summary. + # + def banner + unless @banner + @banner = "Usage: #{program_name} [options]" + visit(:add_banner, @banner) + end + @banner + end + + # + # Program name to be emitted in error message and default banner, defaults + # to $0. + # + def program_name + @program_name || File.basename($0, '.*') + end + + # for experimental cascading :-) + alias set_banner banner= + alias set_program_name program_name= + alias set_summary_width summary_width= + alias set_summary_indent summary_indent= + + # Version + attr_writer :version + # Release code + attr_writer :release + + # + # Version + # + def version + @version || (defined?(::Version) && ::Version) + end + + # + # Release code + # + def release + @release || (defined?(::Release) && ::Release) || (defined?(::RELEASE) && ::RELEASE) + end + + # + # Returns version string from program_name, version and release. + # + def ver + if v = version + str = "#{program_name} #{[v].join('.')}" + str << " (#{v})" if v = release + str + end + end + + def warn(mesg = $!) + super("#{program_name}: #{mesg}") + end + + def abort(mesg = $!) + super("#{program_name}: #{mesg}") + end + + # + # Subject of #on / #on_head, #accept / #reject + # + def top + @stack[-1] + end + + # + # Subject of #on_tail. + # + def base + @stack[1] + end + + # + # Pushes a new List. + # + def new + @stack.push(List.new) + if block_given? + yield self + else + self + end + end + + # + # Removes the last List. + # + def remove + @stack.pop + end + + # + # Puts option summary into +to+ and returns +to+. Yields each line if + # a block is given. + # + # +to+:: Output destination, which must have method <<. Defaults to []. + # +width+:: Width of left side, defaults to @summary_width. + # +max+:: Maximum length allowed for left side, defaults to +width+ - 1. + # +indent+:: Indentation, defaults to @summary_indent. + # + def summarize(to = [], width = @summary_width, max = width - 1, indent = @summary_indent, &blk) + blk ||= proc {|l| to << (l.index($/, -1) ? l : l + $/)} + visit(:summarize, {}, {}, width, max, indent, &blk) + to + end + + # + # Returns option summary string. + # + def help; summarize(banner.to_s.sub(/\n?\z/, "\n")) end + alias to_s help + + # + # Returns option summary list. + # + def to_a; summarize(banner.to_a.dup) end + + # + # Checks if an argument is given twice, in which case an ArgumentError is + # raised. Called from OptionParser#switch only. + # + # +obj+:: New argument. + # +prv+:: Previously specified argument. + # +msg+:: Exception message. + # + def notwice(obj, prv, msg) + unless !prv or prv == obj + raise(ArgumentError, "argument #{msg} given twice: #{obj}", + ParseError.filter_backtrace(caller(2))) + end + obj + end + private :notwice + + SPLAT_PROC = proc {|*a| a.length <= 1 ? a.first : a} + # + # Creates an OptionParser::Switch from the parameters. The parsed argument + # value is passed to the given block, where it can be processed. + # + # See at the beginning of OptionParser for some full examples. + # + # +opts+ can include the following elements: + # + # [Argument style:] + # One of the following: + # :NONE, :REQUIRED, :OPTIONAL + # + # [Argument pattern:] + # Acceptable option argument format, must be pre-defined with + # OptionParser.accept or OptionParser#accept, or Regexp. This can appear + # once or assigned as String if not present, otherwise causes an + # ArgumentError. Examples: + # Float, Time, Array + # + # [Possible argument values:] + # Hash or Array. + # [:text, :binary, :auto] + # %w[iso-2022-jp shift_jis euc-jp utf8 binary] + # { "jis" => "iso-2022-jp", "sjis" => "shift_jis" } + # + # [Long style switch:] + # Specifies a long style switch which takes a mandatory, optional or no + # argument. It's a string of the following form: + # "--switch=MANDATORY" or "--switch MANDATORY" + # "--switch[=OPTIONAL]" + # "--switch" + # + # [Short style switch:] + # Specifies short style switch which takes a mandatory, optional or no + # argument. It's a string of the following form: + # "-xMANDATORY" + # "-x[OPTIONAL]" + # "-x" + # There is also a special form which matches character range (not full + # set of regular expression): + # "-[a-z]MANDATORY" + # "-[a-z][OPTIONAL]" + # "-[a-z]" + # + # [Argument style and description:] + # Instead of specifying mandatory or optional arguments directly in the + # switch parameter, this separate parameter can be used. + # "=MANDATORY" + # "=[OPTIONAL]" + # + # [Description:] + # Description string for the option. + # "Run verbosely" + # + # [Handler:] + # Handler for the parsed argument value. Either give a block or pass a + # Proc or Method as an argument. + # + def make_switch(opts, block = nil) + short, long, nolong, style, pattern, conv, not_pattern, not_conv, not_style = [], [], [] + ldesc, sdesc, desc, arg = [], [], [] + default_style = Switch::NoArgument + default_pattern = nil + klass = nil + n, q, a = nil + + opts.each do |o| + # argument class + next if search(:atype, o) do |pat, c| + klass = notwice(o, klass, 'type') + if not_style and not_style != Switch::NoArgument + not_pattern, not_conv = pat, c + else + default_pattern, conv = pat, c + end + end + + # directly specified pattern(any object possible to match) + if (!(String === o || Symbol === o)) and o.respond_to?(:match) + pattern = notwice(o, pattern, 'pattern') + if pattern.respond_to?(:convert) + conv = pattern.method(:convert).to_proc + else + conv = SPLAT_PROC + end + next + end + + # anything others + case o + when Proc, Method + block = notwice(o, block, 'block') + when Array, Hash + case pattern + when CompletingHash + when nil + pattern = CompletingHash.new + conv = pattern.method(:convert).to_proc if pattern.respond_to?(:convert) + else + raise ArgumentError, "argument pattern given twice" + end + o.each {|pat, *v| pattern[pat] = v.fetch(0) {pat}} + when Module + raise ArgumentError, "unsupported argument type: #{o}", ParseError.filter_backtrace(caller(4)) + when *ArgumentStyle.keys + style = notwice(ArgumentStyle[o], style, 'style') + when /^--no-([^\[\]=\s]*)(.+)?/ + q, a = $1, $2 + o = notwice(a ? Object : TrueClass, klass, 'type') + not_pattern, not_conv = search(:atype, o) unless not_style + not_style = (not_style || default_style).guess(arg = a) if a + default_style = Switch::NoArgument + default_pattern, conv = search(:atype, FalseClass) unless default_pattern + ldesc << "--no-#{q}" + long << 'no-' + (q = q.downcase) + nolong << q + when /^--\[no-\]([^\[\]=\s]*)(.+)?/ + q, a = $1, $2 + o = notwice(a ? Object : TrueClass, klass, 'type') + if a + default_style = default_style.guess(arg = a) + default_pattern, conv = search(:atype, o) unless default_pattern + end + ldesc << "--[no-]#{q}" + long << (o = q.downcase) + not_pattern, not_conv = search(:atype, FalseClass) unless not_style + not_style = Switch::NoArgument + nolong << 'no-' + o + when /^--([^\[\]=\s]*)(.+)?/ + q, a = $1, $2 + if a + o = notwice(NilClass, klass, 'type') + default_style = default_style.guess(arg = a) + default_pattern, conv = search(:atype, o) unless default_pattern + end + ldesc << "--#{q}" + long << (o = q.downcase) + when /^-(\[\^?\]?(?:[^\\\]]|\\.)*\])(.+)?/ + q, a = $1, $2 + o = notwice(Object, klass, 'type') + if a + default_style = default_style.guess(arg = a) + default_pattern, conv = search(:atype, o) unless default_pattern + end + sdesc << "-#{q}" + short << Regexp.new(q) + when /^-(.)(.+)?/ + q, a = $1, $2 + if a + o = notwice(NilClass, klass, 'type') + default_style = default_style.guess(arg = a) + default_pattern, conv = search(:atype, o) unless default_pattern + end + sdesc << "-#{q}" + short << q + when /^=/ + style = notwice(default_style.guess(arg = o), style, 'style') + default_pattern, conv = search(:atype, Object) unless default_pattern + else + desc.push(o) + end + end + + default_pattern, conv = search(:atype, default_style.pattern) unless default_pattern + if !(short.empty? and long.empty?) + s = (style || default_style).new(pattern || default_pattern, + conv, sdesc, ldesc, arg, desc, block) + elsif !block + if style or pattern + raise ArgumentError, "no switch given", ParseError.filter_backtrace(caller) + end + s = desc + else + short << pattern + s = (style || default_style).new(pattern, + conv, nil, nil, arg, desc, block) + end + return s, short, long, + (not_style.new(not_pattern, not_conv, sdesc, ldesc, nil, desc, block) if not_style), + nolong + end + + def define(*opts, &block) + top.append(*(sw = make_switch(opts, block))) + sw[0] + end + + # + # Add option switch and handler. See #make_switch for an explanation of + # parameters. + # + def on(*opts, &block) + define(*opts, &block) + self + end + alias def_option define + + def define_head(*opts, &block) + top.prepend(*(sw = make_switch(opts, block))) + sw[0] + end + + # + # Add option switch like with #on, but at head of summary. + # + def on_head(*opts, &block) + define_head(*opts, &block) + self + end + alias def_head_option define_head + + def define_tail(*opts, &block) + base.append(*(sw = make_switch(opts, block))) + sw[0] + end + + # + # Add option switch like with #on, but at tail of summary. + # + def on_tail(*opts, &block) + define_tail(*opts, &block) + self + end + alias def_tail_option define_tail + + # + # Add separator in summary. + # + def separator(string) + top.append(string, nil, nil) + end + + # + # Parses command line arguments +argv+ in order. When a block is given, + # each non-option argument is yielded. + # + # Returns the rest of +argv+ left unparsed. + # + def order(*argv, &block) + argv = argv[0].dup if argv.size == 1 and Array === argv[0] + order!(argv, &block) + end + + # + # Same as #order, but removes switches destructively. + # + def order!(argv = default_argv, &nonopt) + parse_in_order(argv, &nonopt) + end + + def parse_in_order(argv = default_argv, setter = nil, &nonopt) # :nodoc: + opt, arg, val, rest = nil + nonopt ||= proc {|a| throw :terminate, a} + argv.unshift(arg) if arg = catch(:terminate) { + while arg = argv.shift + case arg + # long option + when /\A--([^=]*)(?:=(.*))?/nm + opt, rest = $1, $2 + begin + sw, = complete(:long, opt, true) + rescue ParseError + raise $!.set_option(arg, true) + end + begin + opt, cb, val = sw.parse(rest, argv) {|*exc| raise(*exc)} + val = cb.call(val) if cb + setter.call(sw.switch_name, val) if setter + rescue ParseError + raise $!.set_option(arg, rest) + end + + # short option + when /\A-(.)((=).*|.+)?/nm + opt, has_arg, eq, val, rest = $1, $3, $3, $2, $2 + begin + sw, = search(:short, opt) + unless sw + begin + sw, = complete(:short, opt) + # short option matched. + val = arg.sub(/\A-/, '') + has_arg = true + rescue InvalidOption + # if no short options match, try completion with long + # options. + sw, = complete(:long, opt) + eq ||= !rest + end + end + rescue ParseError + raise $!.set_option(arg, true) + end + begin + opt, cb, val = sw.parse(val, argv) {|*exc| raise(*exc) if eq} + raise InvalidOption, arg if has_arg and !eq and arg == "-#{opt}" + argv.unshift(opt) if opt and (!rest or (opt = opt.sub(/\A-*/, '-')) != '-') + val = cb.call(val) if cb + setter.call(sw.switch_name, val) if setter + rescue ParseError + raise $!.set_option(arg, arg.length > 2) + end + + # non-option argument + else + catch(:prune) do + visit(:each_option) do |sw0| + sw = sw0 + sw.block.call(arg) if Switch === sw and sw.match_nonswitch?(arg) + end + nonopt.call(arg) + end + end + end + + nil + } + + visit(:search, :short, nil) {|sw| sw.block.call(*argv) if !sw.pattern} + + argv + end + private :parse_in_order + + # + # Parses command line arguments +argv+ in permutation mode and returns + # list of non-option arguments. + # + def permute(*argv) + argv = argv[0].dup if argv.size == 1 and Array === argv[0] + permute!(argv) + end + + # + # Same as #permute, but removes switches destructively. + # + def permute!(argv = default_argv) + nonopts = [] + order!(argv, &nonopts.method(:<<)) + argv[0, 0] = nonopts + argv + end + + # + # Parses command line arguments +argv+ in order when environment variable + # POSIXLY_CORRECT is set, and in permutation mode otherwise. + # + def parse(*argv) + argv = argv[0].dup if argv.size == 1 and Array === argv[0] + parse!(argv) + end + + # + # Same as #parse, but removes switches destructively. + # + def parse!(argv = default_argv) + if ENV.include?('POSIXLY_CORRECT') + order!(argv) + else + permute!(argv) + end + end + + # + # Wrapper method for getopts.rb. + # + # params = ARGV.getopts("ab:", "foo", "bar:") + # # params[:a] = true # -a + # # params[:b] = "1" # -b1 + # # params[:foo] = "1" # --foo + # # params[:bar] = "x" # --bar x + # + def getopts(*args) + argv = Array === args.first ? args.shift : default_argv + single_options, *long_options = *args + + result = {} + + single_options.scan(/(.)(:)?/) do |opt, val| + if val + result[opt] = nil + define("-#{opt} VAL") + else + result[opt] = false + define("-#{opt}") + end + end if single_options + + long_options.each do |arg| + opt, val = arg.split(':', 2) + if val + result[opt] = val.empty? ? nil : val + define("--#{opt} VAL") + else + result[opt] = false + define("--#{opt}") + end + end + + parse_in_order(argv, result.method(:[]=)) + result + end + + # + # See #getopts. + # + def self.getopts(*args) + new.getopts(*args) + end + + # + # Traverses @stack, sending each element method +id+ with +args+ and + # +block+. + # + def visit(id, *args, &block) + @stack.reverse_each do |el| + el.send(id, *args, &block) + end + nil + end + private :visit + + # + # Searches +key+ in @stack for +id+ hash and returns or yields the result. + # + def search(id, key) + block_given = block_given? + visit(:search, id, key) do |k| + return block_given ? yield(k) : k + end + end + private :search + + # + # Completes shortened long style option switch and returns pair of + # canonical switch and switch descriptor OptionParser::Switch. + # + # +id+:: Searching table. + # +opt+:: Searching key. + # +icase+:: Search case insensitive if true. + # +pat+:: Optional pattern for completion. + # + def complete(typ, opt, icase = false, *pat) + if pat.empty? + search(typ, opt) {|sw| return [sw, opt]} # exact match or... + end + raise AmbiguousOption, catch(:ambiguous) { + visit(:complete, typ, opt, icase, *pat) {|o, *sw| return sw} + raise InvalidOption, opt + } + end + private :complete + + # + # Loads options from file names as +filename+. Does nothing when the file + # is not present. Returns whether successfully loaded. + # + # +filename+ defaults to basename of the program without suffix in a + # directory ~/.options. + # + def load(filename = nil) + begin + filename ||= File.expand_path(File.basename($0, '.*'), '~/.options') + rescue + return false + end + begin + parse(*IO.readlines(filename).each {|s| s.chomp!}) + true + rescue Errno::ENOENT, Errno::ENOTDIR + false + end + end + + # + # Parses environment variable +env+ or its uppercase with splitting like a + # shell. + # + # +env+ defaults to the basename of the program. + # + def environment(env = File.basename($0, '.*')) + env = ENV[env] || ENV[env.upcase] or return + require 'shellwords' + parse(*Shellwords.shellwords(env)) + end + + # + # Acceptable argument classes + # + + # + # Any string and no conversion. This is fall-back. + # + accept(Object) {|s,|s or s.nil?} + + accept(NilClass) {|s,|s} + + # + # Any non-empty string, and no conversion. + # + accept(String, /.+/nm) {|s,*|s} + + # + # Ruby/C-like integer, octal for 0-7 sequence, binary for 0b, hexadecimal + # for 0x, and decimal for others; with optional sign prefix. Converts to + # Integer. + # + decimal = '\d+(?:_\d+)*' + binary = 'b[01]+(?:_[01]+)*' + hex = 'x[\da-f]+(?:_[\da-f]+)*' + octal = "0(?:[0-7]*(?:_[0-7]+)*|#{binary}|#{hex})" + integer = "#{octal}|#{decimal}" + accept(Integer, %r"\A[-+]?(?:#{integer})"io) {|s,| Integer(s) if s} + + # + # Float number format, and converts to Float. + # + float = "(?:#{decimal}(?:\\.(?:#{decimal})?)?|\\.#{decimal})(?:E[-+]?#{decimal})?" + floatpat = %r"\A[-+]?#{float}"io + accept(Float, floatpat) {|s,| s.to_f if s} + + # + # Generic numeric format, converts to Integer for integer format, Float + # for float format. + # + accept(Numeric, %r"\A[-+]?(?:#{octal}|#{float})"io) {|s,| eval(s) if s} + + # + # Decimal integer format, to be converted to Integer. + # + DecimalInteger = /\A[-+]?#{decimal}/io + accept(DecimalInteger) {|s,| s.to_i if s} + + # + # Ruby/C like octal/hexadecimal/binary integer format, to be converted to + # Integer. + # + OctalInteger = /\A[-+]?(?:[0-7]+(?:_[0-7]+)*|0(?:#{binary}|#{hex}))/io + accept(OctalInteger) {|s,| s.oct if s} + + # + # Decimal integer/float number format, to be converted to Integer for + # integer format, Float for float format. + # + DecimalNumeric = floatpat # decimal integer is allowed as float also. + accept(DecimalNumeric) {|s,| eval(s) if s} + + # + # Boolean switch, which means whether it is present or not, whether it is + # absent or not with prefix no-, or it takes an argument + # yes/no/true/false/+/-. + # + yesno = CompletingHash.new + %w[- no false].each {|el| yesno[el] = false} + %w[+ yes true].each {|el| yesno[el] = true} + yesno['nil'] = false # shoud be nil? + accept(TrueClass, yesno) {|arg, val| val == nil or val} + # + # Similar to TrueClass, but defaults to false. + # + accept(FalseClass, yesno) {|arg, val| val != nil and val} + + # + # List of strings separated by ",". + # + accept(Array) do |s,| + if s + s = s.split(',').collect {|ss| ss unless ss.empty?} + end + s + end + + # + # Regular expression with options. + # + accept(Regexp, %r"\A/((?:\\.|[^\\])*)/([[:alpha:]]+)?\z|.*") do |all, s, o| + f = 0 + if o + f |= Regexp::IGNORECASE if /i/ =~ o + f |= Regexp::MULTILINE if /m/ =~ o + f |= Regexp::EXTENDED if /x/ =~ o + k = o.delete("^imx") + end + Regexp.new(s || all, f, k) + end + + # + # Exceptions + # + + # + # Base class of exceptions from OptionParser. + # + class ParseError < RuntimeError + # Reason which caused the error. + Reason = 'parse error'.freeze + + def initialize(*args) + @args = args + @reason = nil + end + + attr_reader :args + attr_writer :reason + + # + # Pushes back erred argument(s) to +argv+. + # + def recover(argv) + argv[0, 0] = @args + argv + end + + def self.filter_backtrace(array) + unless $DEBUG + array.delete_if(&%r"\A#{Regexp.quote(__FILE__)}:"o.method(:=~)) + end + array + end + + def set_backtrace(array) + super(self.class.filter_backtrace(array)) + end + + def set_option(opt, eq) + if eq + @args[0] = opt + else + @args.unshift(opt) + end + self + end + + # + # Returns error reason. Override this for I18N. + # + def reason + @reason || self.class::Reason + end + + def inspect + "#<#{self.class.to_s}: #{args.join(' ')}>" + end + + # + # Default stringizing method to emit standard error message. + # + def message + reason + ': ' + args.join(' ') + end + + alias to_s message + end + + # + # Raises when ambiguously completable string is encountered. + # + class AmbiguousOption < ParseError + const_set(:Reason, 'ambiguous option'.freeze) + end + + # + # Raises when there is an argument for a switch which takes no argument. + # + class NeedlessArgument < ParseError + const_set(:Reason, 'needless argument'.freeze) + end + + # + # Raises when a switch with mandatory argument has no argument. + # + class MissingArgument < ParseError + const_set(:Reason, 'missing argument'.freeze) + end + + # + # Raises when switch is undefined. + # + class InvalidOption < ParseError + const_set(:Reason, 'invalid option'.freeze) + end + + # + # Raises when the given argument does not match required format. + # + class InvalidArgument < ParseError + const_set(:Reason, 'invalid argument'.freeze) + end + + # + # Raises when the given argument word can't be completed uniquely. + # + class AmbiguousArgument < InvalidArgument + const_set(:Reason, 'ambiguous argument'.freeze) + end + + # + # Miscellaneous + # + + # + # Extends command line arguments array (ARGV) to parse itself. + # + module Arguable + + # + # Sets OptionParser object, when +opt+ is +false+ or +nil+, methods + # OptionParser::Arguable#options and OptionParser::Arguable#options= are + # undefined. Thus, there is no ways to access the OptionParser object + # via the receiver object. + # + def options=(opt) + unless @optparse = opt + class << self + undef_method(:options) + undef_method(:options=) + end + end + end + + # + # Actual OptionParser object, automatically created if nonexistent. + # + # If called with a block, yields the OptionParser object and returns the + # result of the block. If an OptionParser::ParseError exception occurs + # in the block, it is rescued, a error message printed to STDERR and + # +nil+ returned. + # + def options + @optparse ||= OptionParser.new + @optparse.default_argv = self + block_given? or return @optparse + begin + yield @optparse + rescue ParseError + @optparse.warn $! + nil + end + end + + # + # Parses +self+ destructively in order and returns +self+ containing the + # rest arguments left unparsed. + # + def order!(&blk) options.order!(self, &blk) end + + # + # Parses +self+ destructively in permutation mode and returns +self+ + # containing the rest arguments left unparsed. + # + def permute!() options.permute!(self) end + + # + # Parses +self+ destructively and returns +self+ containing the + # rest arguments left unparsed. + # + def parse!() options.parse!(self) end + + # + # Substitution of getopts is possible as follows. Also see + # OptionParser#getopts. + # + # def getopts(*args) + # ($OPT = ARGV.getopts(*args)).each do |opt, val| + # eval "$OPT_#{opt.gsub(/[^A-Za-z0-9_]/, '_')} = val" + # end + # rescue OptionParser::ParseError + # end + # + def getopts(*args) + options.getopts(self, *args) + end + + # + # Initializes instance variable. + # + def self.extend_object(obj) + super + obj.instance_eval {@optparse = nil} + end + def initialize(*args) + super + @optparse = nil + end + end + + # + # Acceptable argument classes. Now contains DecimalInteger, OctalInteger + # and DecimalNumeric. See Acceptable argument classes (in source code). + # + module Acceptables + const_set(:DecimalInteger, OptionParser::DecimalInteger) + const_set(:OctalInteger, OptionParser::OctalInteger) + const_set(:DecimalNumeric, OptionParser::DecimalNumeric) + end +end + +# ARGV is arguable by OptionParser +ARGV.extend(OptionParser::Arguable) + +if $0 == __FILE__ + Version = OptionParser::Version + ARGV.options {|q| + q.parse!.empty? or puts "what's #{ARGV.join(' ')}?" + } or abort(ARGV.options.to_s) +end diff --git a/lib/optparse/date.rb b/lib/optparse/date.rb new file mode 100644 index 0000000..d680559 --- /dev/null +++ b/lib/optparse/date.rb @@ -0,0 +1,17 @@ +require 'optparse' +require 'date' + +OptionParser.accept(DateTime) do |s,| + begin + DateTime.parse(s) if s + rescue ArgumentError + raise OptionParser::InvalidArgument, s + end +end +OptionParser.accept(Date) do |s,| + begin + Date.parse(s) if s + rescue ArgumentError + raise OptionParser::InvalidArgument, s + end +end diff --git a/lib/optparse/shellwords.rb b/lib/optparse/shellwords.rb new file mode 100644 index 0000000..0422d7c --- /dev/null +++ b/lib/optparse/shellwords.rb @@ -0,0 +1,6 @@ +# -*- ruby -*- + +require 'shellwords' +require 'optparse' + +OptionParser.accept(Shellwords) {|s,| Shellwords.shellwords(s)} diff --git a/lib/optparse/time.rb b/lib/optparse/time.rb new file mode 100644 index 0000000..402cadc --- /dev/null +++ b/lib/optparse/time.rb @@ -0,0 +1,10 @@ +require 'optparse' +require 'time' + +OptionParser.accept(Time) do |s,| + begin + (Time.httpdate(s) rescue Time.parse(s)) if s + rescue + raise OptionParser::InvalidArgument, s + end +end diff --git a/lib/optparse/uri.rb b/lib/optparse/uri.rb new file mode 100644 index 0000000..024dc69 --- /dev/null +++ b/lib/optparse/uri.rb @@ -0,0 +1,6 @@ +# -*- ruby -*- + +require 'optparse' +require 'uri' + +OptionParser.accept(URI) {|s,| URI.parse(s) if s} diff --git a/lib/optparse/version.rb b/lib/optparse/version.rb new file mode 100644 index 0000000..76ed564 --- /dev/null +++ b/lib/optparse/version.rb @@ -0,0 +1,70 @@ +# OptionParser internal utility + +class << OptionParser + def show_version(*pkgs) + progname = ARGV.options.program_name + result = false + show = proc do |klass, cname, version| + str = "#{progname}" + unless klass == ::Object and cname == :VERSION + version = version.join(".") if Array === version + str << ": #{klass}" unless klass == Object + str << " version #{version}" + end + [:Release, :RELEASE].find do |rel| + if klass.const_defined?(rel) + str << " (#{klass.const_get(rel)})" + end + end + puts str + result = true + end + if pkgs.size == 1 and pkgs[0] == "all" + self.search_const(::Object, /\AV(?:ERSION|ersion)\z/) do |klass, cname, version| + unless cname[1] == ?e and klass.const_defined?(:Version) + show.call(klass, cname.intern, version) + end + end + else + pkgs.each do |pkg| + begin + pkg = pkg.split(/::|\//).inject(::Object) {|m, c| m.const_get(c)} + v = case + when pkg.const_defined?(:Version) + pkg.const_get(n = :Version) + when pkg.const_defined?(:VERSION) + pkg.const_get(n = :VERSION) + else + n = nil + "unknown" + end + show.call(pkg, n, v) + rescue NameError + end + end + end + result + end + + def each_const(path, base = ::Object) + path.split(/::|\//).inject(base) do |klass, name| + raise NameError, path unless Module === klass + klass.constants.grep(/#{name}/i) do |c| + klass.const_defined?(c) or next + c = klass.const_get(c) + end + end + end + + def search_const(klass, name) + klasses = [klass] + while klass = klasses.shift + klass.constants.each do |cname| + klass.const_defined?(cname) or next + const = klass.const_get(cname) + yield klass, cname, const if name === cname + klasses << const if Module === const and const != ::Object + end + end + end +end diff --git a/lib/ostruct.rb b/lib/ostruct.rb new file mode 100644 index 0000000..35a14b4 --- /dev/null +++ b/lib/ostruct.rb @@ -0,0 +1,145 @@ +# +# = ostruct.rb: OpenStruct implementation +# +# Author:: Yukihiro Matsumoto +# Documentation:: Gavin Sinclair +# +# OpenStruct allows the creation of data objects with arbitrary attributes. +# See OpenStruct for an example. +# + +# +# OpenStruct allows you to create data objects and set arbitrary attributes. +# For example: +# +# require 'ostruct' +# +# record = OpenStruct.new +# record.name = "John Smith" +# record.age = 70 +# record.pension = 300 +# +# puts record.name # -> "John Smith" +# puts record.address # -> nil +# +# It is like a hash with a different way to access the data. In fact, it is +# implemented with a hash, and you can initialize it with one. +# +# hash = { "country" => "Australia", :population => 20_000_000 } +# data = OpenStruct.new(hash) +# +# p data # -> +# +class OpenStruct + # + # Create a new OpenStruct object. The optional +hash+, if given, will + # generate attributes and values. For example. + # + # require 'ostruct' + # hash = { "country" => "Australia", :population => 20_000_000 } + # data = OpenStruct.new(hash) + # + # p data # -> + # + # By default, the resulting OpenStruct object will have no attributes. + # + def initialize(hash=nil) + @table = {} + if hash + for k,v in hash + @table[k.to_sym] = v + new_ostruct_member(k) + end + end + end + + # Duplicate an OpenStruct object members. + def initialize_copy(orig) + super + @table = @table.dup + end + + def marshal_dump + @table + end + def marshal_load(x) + @table = x + @table.each_key{|key| new_ostruct_member(key)} + end + + def new_ostruct_member(name) + name = name.to_sym + unless self.respond_to?(name) + class << self; self; end.class_eval do + define_method(name) { @table[name] } + define_method(:"#{name}=") { |x| @table[name] = x } + end + end + end + + def method_missing(mid, *args) # :nodoc: + mname = mid.id2name + len = args.length + if mname =~ /=$/ + if len != 1 + raise ArgumentError, "wrong number of arguments (#{len} for 1)", caller(1) + end + if self.frozen? + raise TypeError, "can't modify frozen #{self.class}", caller(1) + end + mname.chop! + self.new_ostruct_member(mname) + @table[mname.intern] = args[0] + elsif len == 0 + @table[mid] + else + raise NoMethodError, "undefined method `#{mname}' for #{self}", caller(1) + end + end + + # + # Remove the named field from the object. + # + def delete_field(name) + @table.delete name.to_sym + end + + InspectKey = :__inspect_key__ # :nodoc: + + # + # Returns a string containing a detailed summary of the keys and values. + # + def inspect + str = "#<#{self.class}" + + ids = (Thread.current[InspectKey] ||= []) + if ids.include?(object_id) + return str << ' ...>' + end + + ids << object_id + begin + first = true + for k,v in @table + str << "," unless first + first = false + str << " #{k}=#{v.inspect}" + end + return str << '>' + ensure + ids.pop + end + end + alias :to_s :inspect + + attr_reader :table # :nodoc: + protected :table + + # + # Compare this object and +other+ for equality. + # + def ==(other) + return false unless(other.kind_of?(OpenStruct)) + return @table == other.table + end +end diff --git a/lib/pathname.rb b/lib/pathname.rb new file mode 100644 index 0000000..fbb42a6 --- /dev/null +++ b/lib/pathname.rb @@ -0,0 +1,1099 @@ +# +# = pathname.rb +# +# Object-Oriented Pathname Class +# +# Author:: Tanaka Akira +# Documentation:: Author and Gavin Sinclair +# +# For documentation, see class Pathname. +# +# pathname.rb is distributed with Ruby since 1.8.0. +# + +# +# == Pathname +# +# Pathname represents a pathname which locates a file in a filesystem. +# The pathname depends on OS: Unix, Windows, etc. +# Pathname library works with pathnames of local OS. +# However non-Unix pathnames are supported experimentally. +# +# It does not represent the file itself. +# A Pathname can be relative or absolute. It's not until you try to +# reference the file that it even matters whether the file exists or not. +# +# Pathname is immutable. It has no method for destructive update. +# +# The value of this class is to manipulate file path information in a neater +# way than standard Ruby provides. The examples below demonstrate the +# difference. *All* functionality from File, FileTest, and some from Dir and +# FileUtils is included, in an unsurprising way. It is essentially a facade for +# all of these, and more. +# +# == Examples +# +# === Example 1: Using Pathname +# +# require 'pathname' +# p = Pathname.new("/usr/bin/ruby") +# size = p.size # 27662 +# isdir = p.directory? # false +# dir = p.dirname # Pathname:/usr/bin +# base = p.basename # Pathname:ruby +# dir, base = p.split # [Pathname:/usr/bin, Pathname:ruby] +# data = p.read +# p.open { |f| _ } +# p.each_line { |line| _ } +# +# === Example 2: Using standard Ruby +# +# p = "/usr/bin/ruby" +# size = File.size(p) # 27662 +# isdir = File.directory?(p) # false +# dir = File.dirname(p) # "/usr/bin" +# base = File.basename(p) # "ruby" +# dir, base = File.split(p) # ["/usr/bin", "ruby"] +# data = File.read(p) +# File.open(p) { |f| _ } +# File.foreach(p) { |line| _ } +# +# === Example 3: Special features +# +# p1 = Pathname.new("/usr/lib") # Pathname:/usr/lib +# p2 = p1 + "ruby/1.8" # Pathname:/usr/lib/ruby/1.8 +# p3 = p1.parent # Pathname:/usr +# p4 = p2.relative_path_from(p3) # Pathname:lib/ruby/1.8 +# pwd = Pathname.pwd # Pathname:/home/gavin +# pwd.absolute? # true +# p5 = Pathname.new "." # Pathname:. +# p5 = p5 + "music/../articles" # Pathname:music/../articles +# p5.cleanpath # Pathname:articles +# p5.realpath # Pathname:/home/gavin/articles +# p5.children # [Pathname:/home/gavin/articles/linux, ...] +# +# == Breakdown of functionality +# +# === Core methods +# +# These methods are effectively manipulating a String, because that's all a path +# is. Except for #mountpoint?, #children, and #realpath, they don't access the +# filesystem. +# +# - + +# - #join +# - #parent +# - #root? +# - #absolute? +# - #relative? +# - #relative_path_from +# - #each_filename +# - #cleanpath +# - #realpath +# - #children +# - #mountpoint? +# +# === File status predicate methods +# +# These methods are a facade for FileTest: +# - #blockdev? +# - #chardev? +# - #directory? +# - #executable? +# - #executable_real? +# - #exist? +# - #file? +# - #grpowned? +# - #owned? +# - #pipe? +# - #readable? +# - #world_readable? +# - #readable_real? +# - #setgid? +# - #setuid? +# - #size +# - #size? +# - #socket? +# - #sticky? +# - #symlink? +# - #writable? +# - #world_writable? +# - #writable_real? +# - #zero? +# +# === File property and manipulation methods +# +# These methods are a facade for File: +# - #atime +# - #ctime +# - #mtime +# - #chmod(mode) +# - #lchmod(mode) +# - #chown(owner, group) +# - #lchown(owner, group) +# - #fnmatch(pattern, *args) +# - #fnmatch?(pattern, *args) +# - #ftype +# - #make_link(old) +# - #open(*args, &block) +# - #readlink +# - #rename(to) +# - #stat +# - #lstat +# - #make_symlink(old) +# - #truncate(length) +# - #utime(atime, mtime) +# - #basename(*args) +# - #dirname +# - #extname +# - #expand_path(*args) +# - #split +# +# === Directory methods +# +# These methods are a facade for Dir: +# - Pathname.glob(*args) +# - Pathname.getwd / Pathname.pwd +# - #rmdir +# - #entries +# - #each_entry(&block) +# - #mkdir(*args) +# - #opendir(*args) +# +# === IO +# +# These methods are a facade for IO: +# - #each_line(*args, &block) +# - #read(*args) +# - #readlines(*args) +# - #sysopen(*args) +# +# === Utilities +# +# These methods are a mixture of Find, FileUtils, and others: +# - #find(&block) +# - #mkpath +# - #rmtree +# - #unlink / #delete +# +# +# == Method documentation +# +# As the above section shows, most of the methods in Pathname are facades. The +# documentation for these methods generally just says, for instance, "See +# FileTest.writable?", as you should be familiar with the original method +# anyway, and its documentation (e.g. through +ri+) will contain more +# information. In some cases, a brief description will follow. +# +class Pathname + + # :stopdoc: + if RUBY_VERSION < "1.9" + TO_PATH = :to_str + else + # to_path is implemented so Pathname objects are usable with File.open, etc. + TO_PATH = :to_path + end + + SAME_PATHS = if File::FNM_SYSCASE + proc {|a, b| a.casecmp(b).zero?} + else + proc {|a, b| a == b} + end + + # :startdoc: + + # + # Create a Pathname object from the given String (or String-like object). + # If +path+ contains a NUL character (\0), an ArgumentError is raised. + # + def initialize(path) + path = path.__send__(TO_PATH) if path.respond_to? TO_PATH + @path = path.dup + + if /\0/ =~ @path + raise ArgumentError, "pathname contains \\0: #{@path.inspect}" + end + + self.taint if @path.tainted? + end + + def freeze() super; @path.freeze; self end + def taint() super; @path.taint; self end + def untaint() super; @path.untaint; self end + + # + # Compare this pathname with +other+. The comparison is string-based. + # Be aware that two different paths (foo.txt and ./foo.txt) + # can refer to the same file. + # + def ==(other) + return false unless Pathname === other + other.to_s == @path + end + alias === == + alias eql? == + + # Provides for comparing pathnames, case-sensitively. + def <=>(other) + return nil unless Pathname === other + @path.tr('/', "\0") <=> other.to_s.tr('/', "\0") + end + + def hash # :nodoc: + @path.hash + end + + # Return the path as a String. + def to_s + @path.dup + end + + # to_path is implemented so Pathname objects are usable with File.open, etc. + alias_method TO_PATH, :to_s + + def inspect # :nodoc: + "#<#{self.class}:#{@path}>" + end + + # Return a pathname which is substituted by String#sub. + def sub(pattern, *rest, &block) + if block + path = @path.sub(pattern, *rest) {|*args| + begin + old = Thread.current[:pathname_sub_matchdata] + Thread.current[:pathname_sub_matchdata] = $~ + eval("$~ = Thread.current[:pathname_sub_matchdata]", block.binding) + ensure + Thread.current[:pathname_sub_matchdata] = old + end + yield *args + } + else + path = @path.sub(pattern, *rest) + end + self.class.new(path) + end + + if File::ALT_SEPARATOR + SEPARATOR_LIST = "#{Regexp.quote File::ALT_SEPARATOR}#{Regexp.quote File::SEPARATOR}" + SEPARATOR_PAT = /[#{SEPARATOR_LIST}]/ + else + SEPARATOR_LIST = "#{Regexp.quote File::SEPARATOR}" + SEPARATOR_PAT = /#{Regexp.quote File::SEPARATOR}/ + end + + # Return a pathname which the extension of the basename is substituted by + # repl. + # + # If self has no extension part, repl is appended. + def sub_ext(repl) + ext = File.extname(@path) + self.class.new(@path.chomp(ext) + repl) + end + + # chop_basename(path) -> [pre-basename, basename] or nil + def chop_basename(path) + base = File.basename(path) + if /\A#{SEPARATOR_PAT}?\z/ =~ base + return nil + else + return path[0, path.rindex(base)], base + end + end + private :chop_basename + + # split_names(path) -> prefix, [name, ...] + def split_names(path) + names = [] + while r = chop_basename(path) + path, basename = r + names.unshift basename + end + return path, names + end + private :split_names + + def prepend_prefix(prefix, relpath) + if relpath.empty? + File.dirname(prefix) + elsif /#{SEPARATOR_PAT}/ =~ prefix + prefix = File.dirname(prefix) + prefix = File.join(prefix, "") if File.basename(prefix + 'a') != 'a' + prefix + relpath + else + prefix + relpath + end + end + private :prepend_prefix + + # Returns clean pathname of +self+ with consecutive slashes and useless dots + # removed. The filesystem is not accessed. + # + # If +consider_symlink+ is +true+, then a more conservative algorithm is used + # to avoid breaking symbolic linkages. This may retain more .. + # entries than absolutely necessary, but without accessing the filesystem, + # this can't be avoided. See #realpath. + # + def cleanpath(consider_symlink=false) + if consider_symlink + cleanpath_conservative + else + cleanpath_aggressive + end + end + + # + # Clean the path simply by resolving and removing excess "." and ".." entries. + # Nothing more, nothing less. + # + def cleanpath_aggressive + path = @path + names = [] + pre = path + while r = chop_basename(pre) + pre, base = r + case base + when '.' + when '..' + names.unshift base + else + if names[0] == '..' + names.shift + else + names.unshift base + end + end + end + if /#{SEPARATOR_PAT}/o =~ File.basename(pre) + names.shift while names[0] == '..' + end + self.class.new(prepend_prefix(pre, File.join(*names))) + end + private :cleanpath_aggressive + + # has_trailing_separator?(path) -> bool + def has_trailing_separator?(path) + if r = chop_basename(path) + pre, basename = r + pre.length + basename.length < path.length + else + false + end + end + private :has_trailing_separator? + + # add_trailing_separator(path) -> path + def add_trailing_separator(path) + if File.basename(path + 'a') == 'a' + path + else + File.join(path, "") # xxx: Is File.join is appropriate to add separator? + end + end + private :add_trailing_separator + + def del_trailing_separator(path) + if r = chop_basename(path) + pre, basename = r + pre + basename + elsif /#{SEPARATOR_PAT}+\z/o =~ path + $` + File.dirname(path)[/#{SEPARATOR_PAT}*\z/o] + else + path + end + end + private :del_trailing_separator + + def cleanpath_conservative + path = @path + names = [] + pre = path + while r = chop_basename(pre) + pre, base = r + names.unshift base if base != '.' + end + if /#{SEPARATOR_PAT}/o =~ File.basename(pre) + names.shift while names[0] == '..' + end + if names.empty? + self.class.new(File.dirname(pre)) + else + if names.last != '..' && File.basename(path) == '.' + names << '.' + end + result = prepend_prefix(pre, File.join(*names)) + if /\A(?:\.|\.\.)\z/ !~ names.last && has_trailing_separator?(path) + self.class.new(add_trailing_separator(result)) + else + self.class.new(result) + end + end + end + private :cleanpath_conservative + + def realpath_rec(prefix, unresolved, h) + resolved = [] + until unresolved.empty? + n = unresolved.shift + if n == '.' + next + elsif n == '..' + resolved.pop + else + path = prepend_prefix(prefix, File.join(*(resolved + [n]))) + if h.include? path + if h[path] == :resolving + raise Errno::ELOOP.new(path) + else + prefix, *resolved = h[path] + end + else + s = File.lstat(path) + if s.symlink? + h[path] = :resolving + link_prefix, link_names = split_names(File.readlink(path)) + if link_prefix == '' + prefix, *resolved = h[path] = realpath_rec(prefix, resolved + link_names, h) + else + prefix, *resolved = h[path] = realpath_rec(link_prefix, link_names, h) + end + else + resolved << n + h[path] = [prefix, *resolved] + end + end + end + end + return prefix, *resolved + end + private :realpath_rec + + # + # Returns a real (absolute) pathname of +self+ in the actual filesystem. + # The real pathname doesn't contain symlinks or useless dots. + # + # No arguments should be given; the old behaviour is *obsoleted*. + # + def realpath + path = @path + prefix, names = split_names(path) + if prefix == '' + prefix, names2 = split_names(Dir.pwd) + names = names2 + names + end + prefix, *names = realpath_rec(prefix, names, {}) + self.class.new(prepend_prefix(prefix, File.join(*names))) + end + + # #parent returns the parent directory. + # + # This is same as self + '..'. + def parent + self + '..' + end + + # #mountpoint? returns +true+ if self points to a mountpoint. + def mountpoint? + begin + stat1 = self.lstat + stat2 = self.parent.lstat + stat1.dev == stat2.dev && stat1.ino == stat2.ino || + stat1.dev != stat2.dev + rescue Errno::ENOENT + false + end + end + + # + # #root? is a predicate for root directories. I.e. it returns +true+ if the + # pathname consists of consecutive slashes. + # + # It doesn't access actual filesystem. So it may return +false+ for some + # pathnames which points to roots such as /usr/... + # + def root? + !!(chop_basename(@path) == nil && /#{SEPARATOR_PAT}/o =~ @path) + end + + # Predicate method for testing whether a path is absolute. + # It returns +true+ if the pathname begins with a slash. + def absolute? + !relative? + end + + # The opposite of #absolute? + def relative? + path = @path + while r = chop_basename(path) + path, basename = r + end + path == '' + end + + # + # Iterates over each component of the path. + # + # Pathname.new("/usr/bin/ruby").each_filename {|filename| ... } + # # yields "usr", "bin", and "ruby". + # + def each_filename # :yield: filename + return to_enum(__method__) unless block_given? + prefix, names = split_names(@path) + names.each {|filename| yield filename } + nil + end + + # Iterates over and yields a new Pathname object + # for each element in the given path in descending order. + # + # Pathname.new('/path/to/some/file.rb').descend {|v| p v} + # # + # # + # # + # # + # # + # + # Pathname.new('path/to/some/file.rb').descend {|v| p v} + # # + # # + # # + # # + # + # It doesn't access actual filesystem. + # + # This method is available since 1.8.5. + # + def descend + vs = [] + ascend {|v| vs << v } + vs.reverse_each {|v| yield v } + nil + end + + # Iterates over and yields a new Pathname object + # for each element in the given path in ascending order. + # + # Pathname.new('/path/to/some/file.rb').ascend {|v| p v} + # # + # # + # # + # # + # # + # + # Pathname.new('path/to/some/file.rb').ascend {|v| p v} + # # + # # + # # + # # + # + # It doesn't access actual filesystem. + # + # This method is available since 1.8.5. + # + def ascend + path = @path + yield self + while r = chop_basename(path) + path, name = r + break if path.empty? + yield self.class.new(del_trailing_separator(path)) + end + end + + # + # Pathname#+ appends a pathname fragment to this one to produce a new Pathname + # object. + # + # p1 = Pathname.new("/usr") # Pathname:/usr + # p2 = p1 + "bin/ruby" # Pathname:/usr/bin/ruby + # p3 = p1 + "/etc/passwd" # Pathname:/etc/passwd + # + # This method doesn't access the file system; it is pure string manipulation. + # + def +(other) + other = Pathname.new(other) unless Pathname === other + Pathname.new(plus(@path, other.to_s)) + end + + def plus(path1, path2) # -> path + prefix2 = path2 + index_list2 = [] + basename_list2 = [] + while r2 = chop_basename(prefix2) + prefix2, basename2 = r2 + index_list2.unshift prefix2.length + basename_list2.unshift basename2 + end + return path2 if prefix2 != '' + prefix1 = path1 + while true + while !basename_list2.empty? && basename_list2.first == '.' + index_list2.shift + basename_list2.shift + end + break unless r1 = chop_basename(prefix1) + prefix1, basename1 = r1 + next if basename1 == '.' + if basename1 == '..' || basename_list2.empty? || basename_list2.first != '..' + prefix1 = prefix1 + basename1 + break + end + index_list2.shift + basename_list2.shift + end + r1 = chop_basename(prefix1) + if !r1 && /#{SEPARATOR_PAT}/o =~ File.basename(prefix1) + while !basename_list2.empty? && basename_list2.first == '..' + index_list2.shift + basename_list2.shift + end + end + if !basename_list2.empty? + suffix2 = path2[index_list2.first..-1] + r1 ? File.join(prefix1, suffix2) : prefix1 + suffix2 + else + r1 ? prefix1 : File.dirname(prefix1) + end + end + private :plus + + # + # Pathname#join joins pathnames. + # + # path0.join(path1, ..., pathN) is the same as + # path0 + path1 + ... + pathN. + # + def join(*args) + args.unshift self + result = args.pop + result = Pathname.new(result) unless Pathname === result + return result if result.absolute? + args.reverse_each {|arg| + arg = Pathname.new(arg) unless Pathname === arg + result = arg + result + return result if result.absolute? + } + result + end + + # + # Returns the children of the directory (files and subdirectories, not + # recursive) as an array of Pathname objects. By default, the returned + # pathnames will have enough information to access the files. If you set + # +with_directory+ to +false+, then the returned pathnames will contain the + # filename only. + # + # For example: + # p = Pathname("/usr/lib/ruby/1.8") + # p.children + # # -> [ Pathname:/usr/lib/ruby/1.8/English.rb, + # Pathname:/usr/lib/ruby/1.8/Env.rb, + # Pathname:/usr/lib/ruby/1.8/abbrev.rb, ... ] + # p.children(false) + # # -> [ Pathname:English.rb, Pathname:Env.rb, Pathname:abbrev.rb, ... ] + # + # Note that the result never contain the entries . and .. in + # the directory because they are not children. + # + # This method has existed since 1.8.1. + # + def children(with_directory=true) + with_directory = false if @path == '.' + result = [] + Dir.foreach(@path) {|e| + next if e == '.' || e == '..' + if with_directory + result << self.class.new(File.join(@path, e)) + else + result << self.class.new(e) + end + } + result + end + + # + # #relative_path_from returns a relative path from the argument to the + # receiver. If +self+ is absolute, the argument must be absolute too. If + # +self+ is relative, the argument must be relative too. + # + # #relative_path_from doesn't access the filesystem. It assumes no symlinks. + # + # ArgumentError is raised when it cannot find a relative path. + # + # This method has existed since 1.8.1. + # + def relative_path_from(base_directory) + dest_directory = self.cleanpath.to_s + base_directory = base_directory.cleanpath.to_s + dest_prefix = dest_directory + dest_names = [] + while r = chop_basename(dest_prefix) + dest_prefix, basename = r + dest_names.unshift basename if basename != '.' + end + base_prefix = base_directory + base_names = [] + while r = chop_basename(base_prefix) + base_prefix, basename = r + base_names.unshift basename if basename != '.' + end + unless SAME_PATHS[dest_prefix, base_prefix] + raise ArgumentError, "different prefix: #{dest_prefix.inspect} and #{base_directory.inspect}" + end + while !dest_names.empty? && + !base_names.empty? && + SAME_PATHS[dest_names.first, base_names.first] + dest_names.shift + base_names.shift + end + if base_names.include? '..' + raise ArgumentError, "base_directory has ..: #{base_directory.inspect}" + end + base_names.fill('..') + relpath_names = base_names + dest_names + if relpath_names.empty? + Pathname.new('.') + else + Pathname.new(File.join(*relpath_names)) + end + end +end + +class Pathname # * IO * + # + # #each_line iterates over the line in the file. It yields a String object + # for each line. + # + # This method has existed since 1.8.1. + # + def each_line(*args, &block) # :yield: line + IO.foreach(@path, *args, &block) + end + + # Pathname#foreachline is *obsoleted* at 1.8.1. Use #each_line. + def foreachline(*args, &block) + warn "Pathname#foreachline is obsoleted. Use Pathname#each_line." + each_line(*args, &block) + end + + # See IO.read. Returns all the bytes from the file, or the first +N+ + # if specified. + def read(*args) IO.read(@path, *args) end + + # See IO.readlines. Returns all the lines from the file. + def readlines(*args) IO.readlines(@path, *args) end + + # See IO.sysopen. + def sysopen(*args) IO.sysopen(@path, *args) end +end + + +class Pathname # * File * + + # See File.atime. Returns last access time. + def atime() File.atime(@path) end + + # See File.ctime. Returns last (directory entry, not file) change time. + def ctime() File.ctime(@path) end + + # See File.mtime. Returns last modification time. + def mtime() File.mtime(@path) end + + # See File.chmod. Changes permissions. + def chmod(mode) File.chmod(mode, @path) end + + # See File.lchmod. + def lchmod(mode) File.lchmod(mode, @path) end + + # See File.chown. Change owner and group of file. + def chown(owner, group) File.chown(owner, group, @path) end + + # See File.lchown. + def lchown(owner, group) File.lchown(owner, group, @path) end + + # See File.fnmatch. Return +true+ if the receiver matches the given + # pattern. + def fnmatch(pattern, *args) File.fnmatch(pattern, @path, *args) end + + # See File.fnmatch? (same as #fnmatch). + def fnmatch?(pattern, *args) File.fnmatch?(pattern, @path, *args) end + + # See File.ftype. Returns "type" of file ("file", "directory", + # etc). + def ftype() File.ftype(@path) end + + # See File.link. Creates a hard link. + def make_link(old) File.link(old, @path) end + + # See File.open. Opens the file for reading or writing. + def open(*args, &block) # :yield: file + File.open(@path, *args, &block) + end + + # See File.readlink. Read symbolic link. + def readlink() self.class.new(File.readlink(@path)) end + + # See File.rename. Rename the file. + def rename(to) File.rename(@path, to) end + + # See File.stat. Returns a File::Stat object. + def stat() File.stat(@path) end + + # See File.lstat. + def lstat() File.lstat(@path) end + + # See File.symlink. Creates a symbolic link. + def make_symlink(old) File.symlink(old, @path) end + + # See File.truncate. Truncate the file to +length+ bytes. + def truncate(length) File.truncate(@path, length) end + + # See File.utime. Update the access and modification times. + def utime(atime, mtime) File.utime(atime, mtime, @path) end + + # See File.basename. Returns the last component of the path. + def basename(*args) self.class.new(File.basename(@path, *args)) end + + # See File.dirname. Returns all but the last component of the path. + def dirname() self.class.new(File.dirname(@path)) end + + # See File.extname. Returns the file's extension. + def extname() File.extname(@path) end + + # See File.expand_path. + def expand_path(*args) self.class.new(File.expand_path(@path, *args)) end + + # See File.split. Returns the #dirname and the #basename in an + # Array. + def split() File.split(@path).map {|f| self.class.new(f) } end + + # Pathname#link is confusing and *obsoleted* because the receiver/argument + # order is inverted to corresponding system call. + def link(old) + warn 'Pathname#link is obsoleted. Use Pathname#make_link.' + File.link(old, @path) + end + + # Pathname#symlink is confusing and *obsoleted* because the receiver/argument + # order is inverted to corresponding system call. + def symlink(old) + warn 'Pathname#symlink is obsoleted. Use Pathname#make_symlink.' + File.symlink(old, @path) + end +end + + +class Pathname # * FileTest * + + # See FileTest.blockdev?. + def blockdev?() FileTest.blockdev?(@path) end + + # See FileTest.chardev?. + def chardev?() FileTest.chardev?(@path) end + + # See FileTest.executable?. + def executable?() FileTest.executable?(@path) end + + # See FileTest.executable_real?. + def executable_real?() FileTest.executable_real?(@path) end + + # See FileTest.exist?. + def exist?() FileTest.exist?(@path) end + + # See FileTest.grpowned?. + def grpowned?() FileTest.grpowned?(@path) end + + # See FileTest.directory?. + def directory?() FileTest.directory?(@path) end + + # See FileTest.file?. + def file?() FileTest.file?(@path) end + + # See FileTest.pipe?. + def pipe?() FileTest.pipe?(@path) end + + # See FileTest.socket?. + def socket?() FileTest.socket?(@path) end + + # See FileTest.owned?. + def owned?() FileTest.owned?(@path) end + + # See FileTest.readable?. + def readable?() FileTest.readable?(@path) end + + # See FileTest.world_readable?. + def world_readable?() FileTest.world_readable?(@path) end + + # See FileTest.readable_real?. + def readable_real?() FileTest.readable_real?(@path) end + + # See FileTest.setuid?. + def setuid?() FileTest.setuid?(@path) end + + # See FileTest.setgid?. + def setgid?() FileTest.setgid?(@path) end + + # See FileTest.size. + def size() FileTest.size(@path) end + + # See FileTest.size?. + def size?() FileTest.size?(@path) end + + # See FileTest.sticky?. + def sticky?() FileTest.sticky?(@path) end + + # See FileTest.symlink?. + def symlink?() FileTest.symlink?(@path) end + + # See FileTest.writable?. + def writable?() FileTest.writable?(@path) end + + # See FileTest.world_writable?. + def world_writable?() FileTest.world_writable?(@path) end + + # See FileTest.writable_real?. + def writable_real?() FileTest.writable_real?(@path) end + + # See FileTest.zero?. + def zero?() FileTest.zero?(@path) end +end + + +class Pathname # * Dir * + # See Dir.glob. Returns or yields Pathname objects. + def Pathname.glob(*args) # :yield: p + if block_given? + Dir.glob(*args) {|f| yield self.new(f) } + else + Dir.glob(*args).map {|f| self.new(f) } + end + end + + # See Dir.getwd. Returns the current working directory as a Pathname. + def Pathname.getwd() self.new(Dir.getwd) end + class << self; alias pwd getwd end + + # Pathname#chdir is *obsoleted* at 1.8.1. + def chdir(&block) + warn "Pathname#chdir is obsoleted. Use Dir.chdir." + Dir.chdir(@path, &block) + end + + # Pathname#chroot is *obsoleted* at 1.8.1. + def chroot + warn "Pathname#chroot is obsoleted. Use Dir.chroot." + Dir.chroot(@path) + end + + # Return the entries (files and subdirectories) in the directory, each as a + # Pathname object. + def entries() Dir.entries(@path).map {|f| self.class.new(f) } end + + # Iterates over the entries (files and subdirectories) in the directory. It + # yields a Pathname object for each entry. + # + # This method has existed since 1.8.1. + def each_entry(&block) # :yield: p + Dir.foreach(@path) {|f| yield self.class.new(f) } + end + + # Pathname#dir_foreach is *obsoleted* at 1.8.1. + def dir_foreach(*args, &block) + warn "Pathname#dir_foreach is obsoleted. Use Pathname#each_entry." + each_entry(*args, &block) + end + + # See Dir.mkdir. Create the referenced directory. + def mkdir(*args) Dir.mkdir(@path, *args) end + + # See Dir.rmdir. Remove the referenced directory. + def rmdir() Dir.rmdir(@path) end + + # See Dir.open. + def opendir(&block) # :yield: dir + Dir.open(@path, &block) + end +end + + +class Pathname # * Find * + # + # Pathname#find is an iterator to traverse a directory tree in a depth first + # manner. It yields a Pathname for each file under "this" directory. + # + # Since it is implemented by find.rb, Find.prune can be used + # to control the traverse. + # + # If +self+ is ., yielded pathnames begin with a filename in the + # current directory, not ./. + # + def find(&block) # :yield: p + require 'find' + if @path == '.' + Find.find(@path) {|f| yield self.class.new(f.sub(%r{\A\./}, '')) } + else + Find.find(@path) {|f| yield self.class.new(f) } + end + end +end + + +class Pathname # * FileUtils * + # See FileUtils.mkpath. Creates a full path, including any + # intermediate directories that don't yet exist. + def mkpath + require 'fileutils' + FileUtils.mkpath(@path) + nil + end + + # See FileUtils.rm_r. Deletes a directory and all beneath it. + def rmtree + # The name "rmtree" is borrowed from File::Path of Perl. + # File::Path provides "mkpath" and "rmtree". + require 'fileutils' + FileUtils.rm_r(@path) + nil + end +end + + +class Pathname # * mixed * + # Removes a file or directory, using File.unlink or + # Dir.unlink as necessary. + def unlink() + begin + Dir.unlink @path + rescue Errno::ENOTDIR + File.unlink @path + end + end + alias delete unlink + + # This method is *obsoleted* at 1.8.1. Use #each_line or #each_entry. + def foreach(*args, &block) + warn "Pathname#foreach is obsoleted. Use each_line or each_entry." + if FileTest.directory? @path + # For polymorphism between Dir.foreach and IO.foreach, + # Pathname#foreach doesn't yield Pathname object. + Dir.foreach(@path, *args, &block) + else + IO.foreach(@path, *args, &block) + end + end +end + +class Pathname + undef =~ +end + +module Kernel + # create a pathname object. + # + # This method is available since 1.8.5. + def Pathname(path) # :doc: + Pathname.new(path) + end + private :Pathname +end diff --git a/lib/pp.rb b/lib/pp.rb new file mode 100644 index 0000000..e8819d7 --- /dev/null +++ b/lib/pp.rb @@ -0,0 +1,532 @@ +# == Pretty-printer for Ruby objects. +# +# = Which seems better? +# +# non-pretty-printed output by #p is: +# #, @group_queue=#], []]>, @buffer=[], @newline="\n", @group_stack=[#], @buffer_width=0, @indent=0, @maxwidth=79, @output_width=2, @output=#> +# +# pretty-printed output by #pp is: +# #, +# @group_queue= +# #], +# []]>, +# @group_stack= +# [#], +# @indent=0, +# @maxwidth=79, +# @newline="\n", +# @output=#, +# @output_width=2> +# +# I like the latter. If you do too, this library is for you. +# +# = Usage +# +# pp(obj) +# +# output +obj+ to +$>+ in pretty printed format. +# +# It returns +nil+. +# +# = Output Customization +# To define your customized pretty printing function for your classes, +# redefine a method #pretty_print(+pp+) in the class. +# It takes an argument +pp+ which is an instance of the class PP. +# The method should use PP#text, PP#breakable, PP#nest, PP#group and +# PP#pp to print the object. +# +# = Author +# Tanaka Akira + +require 'prettyprint' + +module Kernel + # returns a pretty printed object as a string. + def pretty_inspect + PP.pp(self, '') + end + + private + # prints arguments in pretty form. + # + # pp returns nil. + def pp(*objs) # :doc: + objs.each {|obj| + PP.pp(obj) + } + nil + end + module_function :pp +end + +class PP < PrettyPrint + # Outputs +obj+ to +out+ in pretty printed format of + # +width+ columns in width. + # + # If +out+ is omitted, +$>+ is assumed. + # If +width+ is omitted, 79 is assumed. + # + # PP.pp returns +out+. + def PP.pp(obj, out=$>, width=79) + q = PP.new(out, width) + q.guard_inspect_key {q.pp obj} + q.flush + #$pp = q + out << "\n" + end + + # Outputs +obj+ to +out+ like PP.pp but with no indent and + # newline. + # + # PP.singleline_pp returns +out+. + def PP.singleline_pp(obj, out=$>) + q = SingleLine.new(out) + q.guard_inspect_key {q.pp obj} + q.flush + out + end + + # :stopdoc: + def PP.mcall(obj, mod, meth, *args, &block) + mod.instance_method(meth).bind(obj).call(*args, &block) + end + # :startdoc: + + @sharing_detection = false + class << self + # Returns the sharing detection flag as a boolean value. + # It is false by default. + attr_accessor :sharing_detection + end + + module PPMethods + def guard_inspect_key + if Thread.current[:__recursive_key__] == nil + Thread.current[:__recursive_key__] = {}.untrust + end + + if Thread.current[:__recursive_key__][:inspect] == nil + Thread.current[:__recursive_key__][:inspect] = {}.untrust + end + + save = Thread.current[:__recursive_key__][:inspect] + + begin + Thread.current[:__recursive_key__][:inspect] = {}.untrust + yield + ensure + Thread.current[:__recursive_key__][:inspect] = save + end + end + + def check_inspect_key(id) + Thread.current[:__recursive_key__] && + Thread.current[:__recursive_key__][:inspect] && + Thread.current[:__recursive_key__][:inspect].include?(id) + end + def push_inspect_key(id) + Thread.current[:__recursive_key__][:inspect][id] = true + end + def pop_inspect_key(id) + Thread.current[:__recursive_key__][:inspect].delete id + end + + # Adds +obj+ to the pretty printing buffer + # using Object#pretty_print or Object#pretty_print_cycle. + # + # Object#pretty_print_cycle is used when +obj+ is already + # printed, a.k.a the object reference chain has a cycle. + def pp(obj) + id = obj.object_id + + if check_inspect_key(id) + group {obj.pretty_print_cycle self} + return + end + + begin + push_inspect_key(id) + group {obj.pretty_print self} + ensure + pop_inspect_key(id) unless PP.sharing_detection + end + end + + # A convenience method which is same as follows: + # + # group(1, '#<' + obj.class.name, '>') { ... } + def object_group(obj, &block) # :yield: + group(1, '#<' + obj.class.name, '>', &block) + end + + if 0x100000000.class == Bignum + # 32bit + PointerMask = 0xffffffff + else + # 64bit + PointerMask = 0xffffffffffffffff + end + + case Object.new.inspect + when /\A\#\z/ + PointerFormat = "%0#{$1.length}x" + else + PointerFormat = "%x" + end + + def object_address_group(obj, &block) + id = PointerFormat % (obj.object_id * 2 & PointerMask) + group(1, "\#<#{obj.class}:0x#{id}", '>', &block) + end + + # A convenience method which is same as follows: + # + # text ',' + # breakable + def comma_breakable + text ',' + breakable + end + + # Adds a separated list. + # The list is separated by comma with breakable space, by default. + # + # #seplist iterates the +list+ using +iter_method+. + # It yields each object to the block given for #seplist. + # The procedure +separator_proc+ is called between each yields. + # + # If the iteration is zero times, +separator_proc+ is not called at all. + # + # If +separator_proc+ is nil or not given, + # +lambda { comma_breakable }+ is used. + # If +iter_method+ is not given, :each is used. + # + # For example, following 3 code fragments has similar effect. + # + # q.seplist([1,2,3]) {|v| xxx v } + # + # q.seplist([1,2,3], lambda { q.comma_breakable }, :each) {|v| xxx v } + # + # xxx 1 + # q.comma_breakable + # xxx 2 + # q.comma_breakable + # xxx 3 + def seplist(list, sep=nil, iter_method=:each) # :yield: element + sep ||= lambda { comma_breakable } + first = true + list.__send__(iter_method) {|*v| + if first + first = false + else + sep.call + end + yield(*v) + } + end + + def pp_object(obj) + object_address_group(obj) { + seplist(obj.pretty_print_instance_variables, lambda { text ',' }) {|v| + breakable + v = v.to_s if Symbol === v + text v + text '=' + group(1) { + breakable '' + pp(obj.instance_eval(v)) + } + } + } + end + + def pp_hash(obj) + group(1, '{', '}') { + seplist(obj, nil, :each_pair) {|k, v| + group { + pp k + text '=>' + group(1) { + breakable '' + pp v + } + } + } + } + end + end + + include PPMethods + + class SingleLine < PrettyPrint::SingleLine + include PPMethods + end + + module ObjectMixin + # 1. specific pretty_print + # 2. specific inspect + # 3. specific to_s if instance variable is empty + # 4. generic pretty_print + + # A default pretty printing method for general objects. + # It calls #pretty_print_instance_variables to list instance variables. + # + # If +self+ has a customized (redefined) #inspect method, + # the result of self.inspect is used but it obviously has no + # line break hints. + # + # This module provides predefined #pretty_print methods for some of + # the most commonly used built-in classes for convenience. + def pretty_print(q) + method_method = Object.instance_method(:method).bind(self) + begin + inspect_method = method_method.call(:inspect) + rescue NameError + end + begin + to_s_method = method_method.call(:to_s) + rescue NameError + end + if inspect_method && /\(Kernel\)#/ !~ inspect_method.inspect + q.text self.inspect + elsif !inspect_method && self.respond_to?(:inspect) + q.text self.inspect + elsif to_s_method && /\(Kernel\)#/ !~ to_s_method.inspect && + instance_variables.empty? + q.text self.to_s + elsif !to_s_method && self.respond_to?(:to_s) + q.text self.to_s + else + q.pp_object(self) + end + end + + # A default pretty printing method for general objects that are + # detected as part of a cycle. + def pretty_print_cycle(q) + q.object_address_group(self) { + q.breakable + q.text '...' + } + end + + # Returns a sorted array of instance variable names. + # + # This method should return an array of names of instance variables as symbols or strings as: + # +[:@a, :@b]+. + def pretty_print_instance_variables + instance_variables.sort + end + + # Is #inspect implementation using #pretty_print. + # If you implement #pretty_print, it can be used as follows. + # + # alias inspect pretty_print_inspect + # + # However, doing this requires that every class that #inspect is called on + # implement #pretty_print, or a RuntimeError will be raised. + def pretty_print_inspect + if /\(PP::ObjectMixin\)#/ =~ Object.instance_method(:method).bind(self).call(:pretty_print).inspect + raise "pretty_print is not overridden for #{self.class}" + end + PP.singleline_pp(self, '') + end + end +end + +class Array + def pretty_print(q) + q.group(1, '[', ']') { + q.seplist(self) {|v| + q.pp v + } + } + end + + def pretty_print_cycle(q) + q.text(empty? ? '[]' : '[...]') + end +end + +class Hash + def pretty_print(q) + q.pp_hash self + end + + def pretty_print_cycle(q) + q.text(empty? ? '{}' : '{...}') + end +end + +class << ENV + def pretty_print(q) + h = {} + ENV.keys.sort.each {|k| + h[k] = ENV[k] + } + q.pp_hash h + end +end + +class Struct + def pretty_print(q) + q.group(1, '#') { + q.seplist(PP.mcall(self, Struct, :members), lambda { q.text "," }) {|member| + q.breakable + q.text member.to_s + q.text '=' + q.group(1) { + q.breakable '' + q.pp self[member] + } + } + } + end + + def pretty_print_cycle(q) + q.text sprintf("#", PP.mcall(self, Kernel, :class).name) + end +end + +class Range + def pretty_print(q) + q.pp self.begin + q.breakable '' + q.text(self.exclude_end? ? '...' : '..') + q.breakable '' + q.pp self.end + end +end + +class File + class Stat + def pretty_print(q) + require 'etc.so' + q.object_group(self) { + q.breakable + q.text sprintf("dev=0x%x", self.dev); q.comma_breakable + q.text "ino="; q.pp self.ino; q.comma_breakable + q.group { + m = self.mode + q.text sprintf("mode=0%o", m) + q.breakable + q.text sprintf("(%s %c%c%c%c%c%c%c%c%c)", + self.ftype, + (m & 0400 == 0 ? ?- : ?r), + (m & 0200 == 0 ? ?- : ?w), + (m & 0100 == 0 ? (m & 04000 == 0 ? ?- : ?S) : + (m & 04000 == 0 ? ?x : ?s)), + (m & 0040 == 0 ? ?- : ?r), + (m & 0020 == 0 ? ?- : ?w), + (m & 0010 == 0 ? (m & 02000 == 0 ? ?- : ?S) : + (m & 02000 == 0 ? ?x : ?s)), + (m & 0004 == 0 ? ?- : ?r), + (m & 0002 == 0 ? ?- : ?w), + (m & 0001 == 0 ? (m & 01000 == 0 ? ?- : ?T) : + (m & 01000 == 0 ? ?x : ?t))) + } + q.comma_breakable + q.text "nlink="; q.pp self.nlink; q.comma_breakable + q.group { + q.text "uid="; q.pp self.uid + begin + pw = Etc.getpwuid(self.uid) + rescue ArgumentError + end + if pw + q.breakable; q.text "(#{pw.name})" + end + } + q.comma_breakable + q.group { + q.text "gid="; q.pp self.gid + begin + gr = Etc.getgrgid(self.gid) + rescue ArgumentError + end + if gr + q.breakable; q.text "(#{gr.name})" + end + } + q.comma_breakable + q.group { + q.text sprintf("rdev=0x%x", self.rdev) + q.breakable + q.text sprintf('(%d, %d)', self.rdev_major, self.rdev_minor) + } + q.comma_breakable + q.text "size="; q.pp self.size; q.comma_breakable + q.text "blksize="; q.pp self.blksize; q.comma_breakable + q.text "blocks="; q.pp self.blocks; q.comma_breakable + q.group { + t = self.atime + q.text "atime="; q.pp t + q.breakable; q.text "(#{t.tv_sec})" + } + q.comma_breakable + q.group { + t = self.mtime + q.text "mtime="; q.pp t + q.breakable; q.text "(#{t.tv_sec})" + } + q.comma_breakable + q.group { + t = self.ctime + q.text "ctime="; q.pp t + q.breakable; q.text "(#{t.tv_sec})" + } + } + end + end +end + +class MatchData + def pretty_print(q) + nc = [] + self.regexp.named_captures.each {|name, indexes| + indexes.each {|i| nc[i] = name } + } + q.object_group(self) { + q.breakable + q.seplist(0...self.size, lambda { q.breakable }) {|i| + if i == 0 + q.pp self[i] + else + if nc[i] + q.text nc[i] + else + q.pp i + end + q.text ':' + q.pp self[i] + end + } + } + end +end + +class Object + include PP::ObjectMixin +end + +[Numeric, Symbol, FalseClass, TrueClass, NilClass, Module].each {|c| + c.class_eval { + def pretty_print_cycle(q) + q.text inspect + end + } +} + +[Numeric, FalseClass, TrueClass, Module].each {|c| + c.class_eval { + def pretty_print(q) + q.text inspect + end + } +} +# :enddoc: diff --git a/lib/prettyprint.rb b/lib/prettyprint.rb new file mode 100644 index 0000000..48f2ebf --- /dev/null +++ b/lib/prettyprint.rb @@ -0,0 +1,896 @@ +# $Id$ + +# This class implements a pretty printing algorithm. It finds line breaks and +# nice indentations for grouped structure. +# +# By default, the class assumes that primitive elements are strings and each +# byte in the strings have single column in width. But it can be used for +# other situations by giving suitable arguments for some methods: +# * newline object and space generation block for PrettyPrint.new +# * optional width argument for PrettyPrint#text +# * PrettyPrint#breakable +# +# There are several candidate uses: +# * text formatting using proportional fonts +# * multibyte characters which has columns different to number of bytes +# * non-string formatting +# +# == Bugs +# * Box based formatting? +# * Other (better) model/algorithm? +# +# == References +# Christian Lindig, Strictly Pretty, March 2000, +# http://www.st.cs.uni-sb.de/~lindig/papers/#pretty +# +# Philip Wadler, A prettier printer, March 1998, +# http://homepages.inf.ed.ac.uk/wadler/topics/language-design.html#prettier +# +# == Author +# Tanaka Akira +# +class PrettyPrint + + # This is a convenience method which is same as follows: + # + # begin + # q = PrettyPrint.new(output, maxwidth, newline, &genspace) + # ... + # q.flush + # output + # end + # + def PrettyPrint.format(output='', maxwidth=79, newline="\n", genspace=lambda {|n| ' ' * n}) + q = PrettyPrint.new(output, maxwidth, newline, &genspace) + yield q + q.flush + output + end + + # This is similar to PrettyPrint::format but the result has no breaks. + # + # +maxwidth+, +newline+ and +genspace+ are ignored. + # + # The invocation of +breakable+ in the block doesn't break a line and is + # treated as just an invocation of +text+. + # + def PrettyPrint.singleline_format(output='', maxwidth=nil, newline=nil, genspace=nil) + q = SingleLine.new(output) + yield q + output + end + + # Creates a buffer for pretty printing. + # + # +output+ is an output target. If it is not specified, '' is assumed. It + # should have a << method which accepts the first argument +obj+ of + # PrettyPrint#text, the first argument +sep+ of PrettyPrint#breakable, the + # first argument +newline+ of PrettyPrint.new, and the result of a given + # block for PrettyPrint.new. + # + # +maxwidth+ specifies maximum line length. If it is not specified, 79 is + # assumed. However actual outputs may overflow +maxwidth+ if long + # non-breakable texts are provided. + # + # +newline+ is used for line breaks. "\n" is used if it is not specified. + # + # The block is used to generate spaces. {|width| ' ' * width} is used if it + # is not given. + # + def initialize(output='', maxwidth=79, newline="\n", &genspace) + @output = output + @maxwidth = maxwidth + @newline = newline + @genspace = genspace || lambda {|n| ' ' * n} + + @output_width = 0 + @buffer_width = 0 + @buffer = [] + + root_group = Group.new(0) + @group_stack = [root_group] + @group_queue = GroupQueue.new(root_group) + @indent = 0 + end + attr_reader :output, :maxwidth, :newline, :genspace + attr_reader :indent, :group_queue + + def current_group + @group_stack.last + end + + # first? is a predicate to test the call is a first call to first? with + # current group. + # + # It is useful to format comma separated values as: + # + # q.group(1, '[', ']') { + # xxx.each {|yyy| + # unless q.first? + # q.text ',' + # q.breakable + # end + # ... pretty printing yyy ... + # } + # } + # + # first? is obsoleted in 1.8.2. + # + def first? + warn "PrettyPrint#first? is obsoleted at 1.8.2." + current_group.first? + end + + def break_outmost_groups + while @maxwidth < @output_width + @buffer_width + return unless group = @group_queue.deq + until group.breakables.empty? + data = @buffer.shift + @output_width = data.output(@output, @output_width) + @buffer_width -= data.width + end + while !@buffer.empty? && Text === @buffer.first + text = @buffer.shift + @output_width = text.output(@output, @output_width) + @buffer_width -= text.width + end + end + end + + # This adds +obj+ as a text of +width+ columns in width. + # + # If +width+ is not specified, obj.length is used. + # + def text(obj, width=obj.length) + if @buffer.empty? + @output << obj + @output_width += width + else + text = @buffer.last + unless Text === text + text = Text.new + @buffer << text + end + text.add(obj, width) + @buffer_width += width + break_outmost_groups + end + end + + def fill_breakable(sep=' ', width=sep.length) + group { breakable sep, width } + end + + # This tells "you can break a line here if necessary", and a +width+\-column + # text +sep+ is inserted if a line is not broken at the point. + # + # If +sep+ is not specified, " " is used. + # + # If +width+ is not specified, +sep.length+ is used. You will have to + # specify this when +sep+ is a multibyte character, for example. + # + def breakable(sep=' ', width=sep.length) + group = @group_stack.last + if group.break? + flush + @output << @newline + @output << @genspace.call(@indent) + @output_width = @indent + @buffer_width = 0 + else + @buffer << Breakable.new(sep, width, self) + @buffer_width += width + break_outmost_groups + end + end + + # Groups line break hints added in the block. The line break hints are all + # to be used or not. + # + # If +indent+ is specified, the method call is regarded as nested by + # nest(indent) { ... }. + # + # If +open_obj+ is specified, text open_obj, open_width is called + # before grouping. If +close_obj+ is specified, text close_obj, + # close_width is called after grouping. + # + def group(indent=0, open_obj='', close_obj='', open_width=open_obj.length, close_width=close_obj.length) + text open_obj, open_width + group_sub { + nest(indent) { + yield + } + } + text close_obj, close_width + end + + def group_sub + group = Group.new(@group_stack.last.depth + 1) + @group_stack.push group + @group_queue.enq group + begin + yield + ensure + @group_stack.pop + if group.breakables.empty? + @group_queue.delete group + end + end + end + + # Increases left margin after newline with +indent+ for line breaks added in + # the block. + # + def nest(indent) + @indent += indent + begin + yield + ensure + @indent -= indent + end + end + + # outputs buffered data. + # + def flush + @buffer.each {|data| + @output_width = data.output(@output, @output_width) + } + @buffer.clear + @buffer_width = 0 + end + + class Text + def initialize + @objs = [] + @width = 0 + end + attr_reader :width + + def output(out, output_width) + @objs.each {|obj| out << obj} + output_width + @width + end + + def add(obj, width) + @objs << obj + @width += width + end + end + + class Breakable + def initialize(sep, width, q) + @obj = sep + @width = width + @pp = q + @indent = q.indent + @group = q.current_group + @group.breakables.push self + end + attr_reader :obj, :width, :indent + + def output(out, output_width) + @group.breakables.shift + if @group.break? + out << @pp.newline + out << @pp.genspace.call(@indent) + @indent + else + @pp.group_queue.delete @group if @group.breakables.empty? + out << @obj + output_width + @width + end + end + end + + class Group + def initialize(depth) + @depth = depth + @breakables = [] + @break = false + end + attr_reader :depth, :breakables + + def break + @break = true + end + + def break? + @break + end + + def first? + if defined? @first + false + else + @first = false + true + end + end + end + + class GroupQueue + def initialize(*groups) + @queue = [] + groups.each {|g| enq g} + end + + def enq(group) + depth = group.depth + @queue << [] until depth < @queue.length + @queue[depth] << group + end + + def deq + @queue.each {|gs| + (gs.length-1).downto(0) {|i| + unless gs[i].breakables.empty? + group = gs.slice!(i, 1).first + group.break + return group + end + } + gs.each {|group| group.break} + gs.clear + } + return nil + end + + def delete(group) + @queue[group.depth].delete(group) + end + end + + class SingleLine + def initialize(output, maxwidth=nil, newline=nil) + @output = output + @first = [true] + end + + def text(obj, width=nil) + @output << obj + end + + def breakable(sep=' ', width=nil) + @output << sep + end + + def nest(indent) + yield + end + + def group(indent=nil, open_obj='', close_obj='', open_width=nil, close_width=nil) + @first.push true + @output << open_obj + yield + @output << close_obj + @first.pop + end + + def flush + end + + def first? + result = @first[-1] + @first[-1] = false + result + end + end +end + +if __FILE__ == $0 + require 'test/unit' + + class WadlerExample < Test::Unit::TestCase # :nodoc: + def setup + @tree = Tree.new("aaaa", Tree.new("bbbbb", Tree.new("ccc"), + Tree.new("dd")), + Tree.new("eee"), + Tree.new("ffff", Tree.new("gg"), + Tree.new("hhh"), + Tree.new("ii"))) + end + + def hello(width) + PrettyPrint.format('', width) {|hello| + hello.group { + hello.group { + hello.group { + hello.group { + hello.text 'hello' + hello.breakable; hello.text 'a' + } + hello.breakable; hello.text 'b' + } + hello.breakable; hello.text 'c' + } + hello.breakable; hello.text 'd' + } + } + end + + def test_hello_00_06 + expected = <<'End'.chomp +hello +a +b +c +d +End + assert_equal(expected, hello(0)) + assert_equal(expected, hello(6)) + end + + def test_hello_07_08 + expected = <<'End'.chomp +hello a +b +c +d +End + assert_equal(expected, hello(7)) + assert_equal(expected, hello(8)) + end + + def test_hello_09_10 + expected = <<'End'.chomp +hello a b +c +d +End + out = hello(9); assert_equal(expected, out) + out = hello(10); assert_equal(expected, out) + end + + def test_hello_11_12 + expected = <<'End'.chomp +hello a b c +d +End + assert_equal(expected, hello(11)) + assert_equal(expected, hello(12)) + end + + def test_hello_13 + expected = <<'End'.chomp +hello a b c d +End + assert_equal(expected, hello(13)) + end + + def tree(width) + PrettyPrint.format('', width) {|q| @tree.show(q)} + end + + def test_tree_00_19 + expected = <<'End'.chomp +aaaa[bbbbb[ccc, + dd], + eee, + ffff[gg, + hhh, + ii]] +End + assert_equal(expected, tree(0)) + assert_equal(expected, tree(19)) + end + + def test_tree_20_22 + expected = <<'End'.chomp +aaaa[bbbbb[ccc, dd], + eee, + ffff[gg, + hhh, + ii]] +End + assert_equal(expected, tree(20)) + assert_equal(expected, tree(22)) + end + + def test_tree_23_43 + expected = <<'End'.chomp +aaaa[bbbbb[ccc, dd], + eee, + ffff[gg, hhh, ii]] +End + assert_equal(expected, tree(23)) + assert_equal(expected, tree(43)) + end + + def test_tree_44 + assert_equal(<<'End'.chomp, tree(44)) +aaaa[bbbbb[ccc, dd], eee, ffff[gg, hhh, ii]] +End + end + + def tree_alt(width) + PrettyPrint.format('', width) {|q| @tree.altshow(q)} + end + + def test_tree_alt_00_18 + expected = <<'End'.chomp +aaaa[ + bbbbb[ + ccc, + dd + ], + eee, + ffff[ + gg, + hhh, + ii + ] +] +End + assert_equal(expected, tree_alt(0)) + assert_equal(expected, tree_alt(18)) + end + + def test_tree_alt_19_20 + expected = <<'End'.chomp +aaaa[ + bbbbb[ ccc, dd ], + eee, + ffff[ + gg, + hhh, + ii + ] +] +End + assert_equal(expected, tree_alt(19)) + assert_equal(expected, tree_alt(20)) + end + + def test_tree_alt_20_49 + expected = <<'End'.chomp +aaaa[ + bbbbb[ ccc, dd ], + eee, + ffff[ gg, hhh, ii ] +] +End + assert_equal(expected, tree_alt(21)) + assert_equal(expected, tree_alt(49)) + end + + def test_tree_alt_50 + expected = <<'End'.chomp +aaaa[ bbbbb[ ccc, dd ], eee, ffff[ gg, hhh, ii ] ] +End + assert_equal(expected, tree_alt(50)) + end + + class Tree # :nodoc: + def initialize(string, *children) + @string = string + @children = children + end + + def show(q) + q.group { + q.text @string + q.nest(@string.length) { + unless @children.empty? + q.text '[' + q.nest(1) { + first = true + @children.each {|t| + if first + first = false + else + q.text ',' + q.breakable + end + t.show(q) + } + } + q.text ']' + end + } + } + end + + def altshow(q) + q.group { + q.text @string + unless @children.empty? + q.text '[' + q.nest(2) { + q.breakable + first = true + @children.each {|t| + if first + first = false + else + q.text ',' + q.breakable + end + t.altshow(q) + } + } + q.breakable + q.text ']' + end + } + end + + end + end + + class StrictPrettyExample < Test::Unit::TestCase # :nodoc: + def prog(width) + PrettyPrint.format('', width) {|q| + q.group { + q.group {q.nest(2) { + q.text "if"; q.breakable; + q.group { + q.nest(2) { + q.group {q.text "a"; q.breakable; q.text "=="} + q.breakable; q.text "b"}}}} + q.breakable + q.group {q.nest(2) { + q.text "then"; q.breakable; + q.group { + q.nest(2) { + q.group {q.text "a"; q.breakable; q.text "<<"} + q.breakable; q.text "2"}}}} + q.breakable + q.group {q.nest(2) { + q.text "else"; q.breakable; + q.group { + q.nest(2) { + q.group {q.text "a"; q.breakable; q.text "+"} + q.breakable; q.text "b"}}}}} + } + end + + def test_00_04 + expected = <<'End'.chomp +if + a + == + b +then + a + << + 2 +else + a + + + b +End + assert_equal(expected, prog(0)) + assert_equal(expected, prog(4)) + end + + def test_05 + expected = <<'End'.chomp +if + a + == + b +then + a + << + 2 +else + a + + b +End + assert_equal(expected, prog(5)) + end + + def test_06 + expected = <<'End'.chomp +if + a == + b +then + a << + 2 +else + a + + b +End + assert_equal(expected, prog(6)) + end + + def test_07 + expected = <<'End'.chomp +if + a == + b +then + a << + 2 +else + a + b +End + assert_equal(expected, prog(7)) + end + + def test_08 + expected = <<'End'.chomp +if + a == b +then + a << 2 +else + a + b +End + assert_equal(expected, prog(8)) + end + + def test_09 + expected = <<'End'.chomp +if a == b +then + a << 2 +else + a + b +End + assert_equal(expected, prog(9)) + end + + def test_10 + expected = <<'End'.chomp +if a == b +then + a << 2 +else a + b +End + assert_equal(expected, prog(10)) + end + + def test_11_31 + expected = <<'End'.chomp +if a == b +then a << 2 +else a + b +End + assert_equal(expected, prog(11)) + assert_equal(expected, prog(15)) + assert_equal(expected, prog(31)) + end + + def test_32 + expected = <<'End'.chomp +if a == b then a << 2 else a + b +End + assert_equal(expected, prog(32)) + end + + end + + class TailGroup < Test::Unit::TestCase # :nodoc: + def test_1 + out = PrettyPrint.format('', 10) {|q| + q.group { + q.group { + q.text "abc" + q.breakable + q.text "def" + } + q.group { + q.text "ghi" + q.breakable + q.text "jkl" + } + } + } + assert_equal("abc defghi\njkl", out) + end + end + + class NonString < Test::Unit::TestCase # :nodoc: + def format(width) + PrettyPrint.format([], width, 'newline', lambda {|n| "#{n} spaces"}) {|q| + q.text(3, 3) + q.breakable(1, 1) + q.text(3, 3) + } + end + + def test_6 + assert_equal([3, "newline", "0 spaces", 3], format(6)) + end + + def test_7 + assert_equal([3, 1, 3], format(7)) + end + + end + + class Fill < Test::Unit::TestCase # :nodoc: + def format(width) + PrettyPrint.format('', width) {|q| + q.group { + q.text 'abc' + q.fill_breakable + q.text 'def' + q.fill_breakable + q.text 'ghi' + q.fill_breakable + q.text 'jkl' + q.fill_breakable + q.text 'mno' + q.fill_breakable + q.text 'pqr' + q.fill_breakable + q.text 'stu' + } + } + end + + def test_00_06 + expected = <<'End'.chomp +abc +def +ghi +jkl +mno +pqr +stu +End + assert_equal(expected, format(0)) + assert_equal(expected, format(6)) + end + + def test_07_10 + expected = <<'End'.chomp +abc def +ghi jkl +mno pqr +stu +End + assert_equal(expected, format(7)) + assert_equal(expected, format(10)) + end + + def test_11_14 + expected = <<'End'.chomp +abc def ghi +jkl mno pqr +stu +End + assert_equal(expected, format(11)) + assert_equal(expected, format(14)) + end + + def test_15_18 + expected = <<'End'.chomp +abc def ghi jkl +mno pqr stu +End + assert_equal(expected, format(15)) + assert_equal(expected, format(18)) + end + + def test_19_22 + expected = <<'End'.chomp +abc def ghi jkl mno +pqr stu +End + assert_equal(expected, format(19)) + assert_equal(expected, format(22)) + end + + def test_23_26 + expected = <<'End'.chomp +abc def ghi jkl mno pqr +stu +End + assert_equal(expected, format(23)) + assert_equal(expected, format(26)) + end + + def test_27 + expected = <<'End'.chomp +abc def ghi jkl mno pqr stu +End + assert_equal(expected, format(27)) + end + + end +end diff --git a/lib/prime.rb b/lib/prime.rb new file mode 100644 index 0000000..8846b14 --- /dev/null +++ b/lib/prime.rb @@ -0,0 +1,471 @@ +# +# = prime.rb +# +# Prime numbers and factorization library. +# +# Copyright:: +# Copyright (c) 1998-2008 Keiju ISHITSUKA(SHL Japan Inc.) +# Copyright (c) 2008 Yuki Sonoda (Yugui) +# +# Documentation:: +# Yuki Sonoda +# + +require "singleton" +require "forwardable" + +class Integer + # Re-composes a prime factorization and returns the product. + # + # See Prime#int_from_prime_division for more details. + def Integer.from_prime_division(pd) + Prime.int_from_prime_division(pd) + end + + # Returns the factorization of +self+. + # + # See Prime#prime_division for more details. + def prime_division(generator = Prime::Generator23.new) + Prime.prime_division(self, generator) + end + + # Returns true if +self+ is a prime number, false for a composite. + def prime? + Prime.prime?(self) + end + + # Iterates the given block over all prime numbers. + # + # See +Prime+#each for more details. + def Integer.each_prime(ubound, &block) # :yields: prime + Prime.each(ubound, &block) + end +end + +# +# The set of all prime numbers. +# +# == Example +# Prime.each(100) do |prime| +# p prime #=> 2, 3, 5, 7, 11, ...., 97 +# end +# +# == Retrieving the instance +# +Prime+.new is obsolete. Now +Prime+ has the default instance and you can +# access it as +Prime+.instance. +# +# For convenience, each instance method of +Prime+.instance can be accessed +# as a class method of +Prime+. +# +# e.g. +# Prime.instance.prime?(2) #=> true +# Prime.prime?(2) #=> true +# +# == Generators +# A "generator" provides an implementation of enumerating pseudo-prime +# numbers and it remembers the position of enumeration and upper bound. +# Futhermore, it is a external iterator of prime enumeration which is +# compatible to an Enumerator. +# +# +Prime+::+PseudoPrimeGenerator+ is the base class for generators. +# There are few implementations of generator. +# +# [+Prime+::+EratosthenesGenerator+] +# Uses eratosthenes's sieve. +# [+Prime+::+TrialDivisionGenerator+] +# Uses the trial division method. +# [+Prime+::+Generator23+] +# Generates all positive integers which is not divided by 2 nor 3. +# This sequence is very bad as a pseudo-prime sequence. But this +# is faster and uses much less memory than other generators. So, +# it is suitable for factorizing an integer which is not large but +# has many prime factors. e.g. for Prime#prime? . +class Prime + include Enumerable + @the_instance = Prime.new + + # obsolete. Use +Prime+::+instance+ or class methods of +Prime+. + def initialize + @generator = EratosthenesGenerator.new + extend OldCompatibility + warn "Prime::new is obsolete. use Prime::instance or class methods of Prime." + end + + class << self + extend Forwardable + include Enumerable + # Returns the default instance of Prime. + def instance; @the_instance end + + def method_added(method) # :nodoc: + (class<< self;self;end).def_delegator :instance, method + end + end + + # Iterates the given block over all prime numbers. + # + # == Parameters + # +ubound+:: + # Optional. An arbitrary positive number. + # The upper bound of enumeration. The method enumerates + # prime numbers infinitely if +ubound+ is nil. + # +generator+:: + # Optional. An implementation of pseudo-prime generator. + # + # == Return value + # An evaluated value of the given block at the last time. + # Or an enumerator which is compatible to an +Enumerator+ + # if no block given. + # + # == Description + # Calls +block+ once for each prime number, passing the prime as + # a parameter. + # + # +ubound+:: + # Upper bound of prime numbers. The iterator stops after + # yields all prime numbers p <= +ubound+. + # + # == Note + # +Prime+.+new+ returns a object extended by +Prime+::+OldCompatibility+ + # in order to compatibility to Ruby 1.8, and +Prime+#each is overwritten + # by +Prime+::+OldCompatibility+#+each+. + # + # +Prime+.+new+ is now obsolete. Use +Prime+.+instance+.+each+ or simply + # +Prime+.+each+. + def each(ubound = nil, generator = EratosthenesGenerator.new, &block) + generator.upper_bound = ubound + generator.each(&block) + end + + + # Returns true if +value+ is prime, false for a composite. + # + # == Parameters + # +value+:: an arbitrary integer to be checked. + # +generator+:: optional. A pseudo-prime generator. + def prime?(value, generator = Prime::Generator23.new) + value = -value if value < 0 + return false if value < 2 + for num in generator + q,r = value.divmod num + return true if q < num + return false if r == 0 + end + end + + # Re-composes a prime factorization and returns the product. + # + # == Parameters + # +pd+:: Array of pairs of integers. The each internal + # pair consists of a prime number -- a prime factor -- + # and a natural number -- an exponent. + # + # == Example + # For [[p_1, e_1], [p_2, e_2], ...., [p_n, e_n]], it returns + # p_1**e_1 * p_2**e_2 * .... * p_n**e_n. + # + # Prime.int_from_prime_division([[2,2], [3,1]]) #=> 12 + def int_from_prime_division(pd) + pd.inject(1){|value, (prime, index)| + value *= prime**index + } + end + + # Returns the factorization of +value+. + # + # == Parameters + # +value+:: An arbitrary integer. + # +generator+:: Optional. A pseudo-prime generator. + # +generator+.succ must return the next + # pseudo-prime number in the ascendent + # order. It must generate all prime numbers, + # but may generate non prime numbers. + # + # === Exceptions + # +ZeroDivisionError+:: when +value+ is zero. + # + # == Example + # For an arbitrary integer + # n = p_1**e_1 * p_2**e_2 * .... * p_n**e_n, + # prime_division(n) returns + # [[p_1, e_1], [p_2, e_2], ...., [p_n, e_n]]. + # + # Prime.prime_division(12) #=> [[2,2], [3,1]] + # + def prime_division(value, generator= Prime::Generator23.new) + raise ZeroDivisionError if value == 0 + if value < 0 + value = -value + pv = [[-1, 1]] + else + pv = [] + end + for prime in generator + count = 0 + while (value1, mod = value.divmod(prime) + mod) == 0 + value = value1 + count += 1 + end + if count != 0 + pv.push [prime, count] + end + break if value1 <= prime + end + if value > 1 + pv.push [value, 1] + end + return pv + end + + # An abstract class for enumerating pseudo-prime numbers. + # + # Concrete subclasses should override succ, next, rewind. + class PseudoPrimeGenerator + include Enumerable + + def initialize(ubound = nil) + @ubound = ubound + end + + def upper_bound=(ubound) + @ubound = ubound + end + def upper_bound + @ubound + end + + # returns the next pseudo-prime number, and move the internal + # position forward. + # + # +PseudoPrimeGenerator+#succ raises +NotImplementedError+. + def succ + raise NotImplementedError, "need to define `succ'" + end + + # alias of +succ+. + def next + raise NotImplementedError, "need to define `next'" + end + + # Rewinds the internal position for enumeration. + # + # See +Enumerator+#rewind. + def rewind + raise NotImplementedError, "need to define `rewind'" + end + + # Iterates the given block for each prime numbers. + def each(&block) + return self.dup unless block + if @ubound + last_value = nil + loop do + prime = succ + break last_value if prime > @ubound + last_value = block.call(prime) + end + else + loop do + block.call(succ) + end + end + end + + # see +Enumerator+#with_index. + alias with_index each_with_index + + # see +Enumerator+#with_object. + def with_object(obj) + return enum_for(:with_object) unless block_given? + each do |prime| + yield prime, obj + end + end + end + + # An implementation of +PseudoPrimeGenerator+. + # + # Uses +EratosthenesSieve+. + class EratosthenesGenerator < PseudoPrimeGenerator + def initialize + @last_prime = nil + super + end + + def succ + @last_prime = @last_prime ? EratosthenesSieve.instance.next_to(@last_prime) : 2 + end + def rewind + initialize + end + alias next succ + end + + # An implementation of +PseudoPrimeGenerator+ which uses + # a prime table generated by trial division. + class TrialDivisionGenerator= @primes.length + # Only check for prime factors up to the square root of the potential primes, + # but without the performance hit of an actual square root calculation. + if @next_to_check + 4 > @ulticheck_next_squared + @ulticheck_index += 1 + @ulticheck_next_squared = @primes.at(@ulticheck_index + 1) ** 2 + end + # Only check numbers congruent to one and five, modulo six. All others + + # are divisible by two or three. This also allows us to skip checking against + # two and three. + @primes.push @next_to_check if @primes[2..@ulticheck_index].find {|prime| @next_to_check % prime == 0 }.nil? + @next_to_check += 4 + @primes.push @next_to_check if @primes[2..@ulticheck_index].find {|prime| @next_to_check % prime == 0 }.nil? + @next_to_check += 2 + end + return @primes[index] + end + end + + # Internal use. An implementation of eratosthenes's sieve + class EratosthenesSieve + include Singleton + + def initialize # :nodoc: + # bitmap for odd prime numbers less than 256. + # For an arbitrary odd number n, @table[i][j] is 1 when n is prime where i,j = n.divmod(32) . + @table = [0xcb6e, 0x64b4, 0x129a, 0x816d, 0x4c32, 0x864a, 0x820d, 0x2196] + end + + # returns the least odd prime number which is greater than +n+. + def next_to(n) + n = (n-1).div(2)*2+3 # the next odd number of given n + i,j = n.divmod(32) + loop do + extend_table until @table.length > i + if !@table[i].zero? + (j...32).step(2) do |k| + return 32*i+k if !@table[i][k.div(2)].zero? + end + end + i += 1; j = 1 + end + end + + private + def extend_table + orig_len = @table.length + new_len = [orig_len**2, orig_len+256].min + lbound = orig_len*32 + ubound = new_len*32 + @table.fill(0xFFFF, orig_len...new_len) + (3..Integer(Math.sqrt(ubound))).step(2) do |p| + i, j = p.divmod(32) + next if @table[i][j.div(2)].zero? + + start = (lbound.div(2*p)*2+1)*p # odd multiple of p which is greater than or equal to lbound + (start...ubound).step(2*p) do |n| + i, j = n.divmod(32) + @table[i] &= 0xFFFF ^ (1<<(j.div(2))) + end + end + end + end + + # Provides a +Prime+ object with compatibility to Ruby 1.8 when instanciated via +Prime+.+new+. + module OldCompatibility + # Returns the next prime number and forwards internal pointer. + def succ + @generator.succ + end + alias next succ + + # Overwrites Prime#each. + # + # Iterates the given block over all prime numbers. Note that enumeration starts from + # the current position of internal pointer, not rewound. + def each(&block) + return @generator.dup unless block_given? + loop do + yield succ + end + end + end +end diff --git a/lib/profile.rb b/lib/profile.rb new file mode 100644 index 0000000..2aeecce --- /dev/null +++ b/lib/profile.rb @@ -0,0 +1,10 @@ +require 'profiler' + +RubyVM::InstructionSequence.compile_option = { + :trace_instruction => true, + :specialized_instruction => false +} +END { + Profiler__::print_profile(STDERR) +} +Profiler__::start_profile diff --git a/lib/profiler.rb b/lib/profiler.rb new file mode 100644 index 0000000..a4b8889 --- /dev/null +++ b/lib/profiler.rb @@ -0,0 +1,59 @@ +module Profiler__ + # internal values + @@start = @@stack = @@map = nil + PROFILE_PROC = proc{|event, file, line, id, binding, klass| + case event + when "call", "c-call" + now = Process.times[0] + @@stack.push [now, 0.0] + when "return", "c-return" + now = Process.times[0] + key = [klass, id] + if tick = @@stack.pop + data = (@@map[key] ||= [0, 0.0, 0.0, key]) + data[0] += 1 + cost = now - tick[0] + data[1] += cost + data[2] += cost - tick[1] + @@stack[-1][1] += cost if @@stack[-1] + end + end + } +module_function + def start_profile + @@start = Process.times[0] + @@stack = [] + @@map = {} + set_trace_func PROFILE_PROC + end + def stop_profile + set_trace_func nil + end + def print_profile(f) + stop_profile + total = Process.times[0] - @@start + if total == 0 then total = 0.01 end + data = @@map.values + data = data.sort_by{|x| -x[2]} + sum = 0 + f.printf " %% cumulative self self total\n" + f.printf " time seconds seconds calls ms/call ms/call name\n" + for d in data + sum += d[2] + f.printf "%6.2f %8.2f %8.2f %8d ", d[2]/total*100, sum, d[2], d[0] + f.printf "%8.2f %8.2f %s\n", d[2]*1000/d[0], d[1]*1000/d[0], get_name(*d[3]) + end + f.printf "%6.2f %8.2f %8.2f %8d ", 0.0, total, 0.0, 1 # ??? + f.printf "%8.2f %8.2f %s\n", 0.0, total*1000, "#toplevel" # ??? + end + def get_name(klass, id) + name = klass.to_s || "" + if klass.kind_of? Class + name += "#" + else + name += "." + end + name + id.id2name + end + private :get_name +end diff --git a/lib/pstore.rb b/lib/pstore.rb new file mode 100644 index 0000000..947dc1d --- /dev/null +++ b/lib/pstore.rb @@ -0,0 +1,543 @@ +# = PStore -- Transactional File Storage for Ruby Objects +# +# pstore.rb - +# originally by matz +# documentation by Kev Jackson and James Edward Gray II +# improved by Hongli Lai +# +# See PStore for documentation. + + +require "fileutils" +require "digest/md5" +require "thread" + +# +# PStore implements a file based persistence mechanism based on a Hash. User +# code can store hierarchies of Ruby objects (values) into the data store file +# by name (keys). An object hierarchy may be just a single object. User code +# may later read values back from the data store or even update data, as needed. +# +# The transactional behavior ensures that any changes succeed or fail together. +# This can be used to ensure that the data store is not left in a transitory +# state, where some values were updated but others were not. +# +# Behind the scenes, Ruby objects are stored to the data store file with +# Marshal. That carries the usual limitations. Proc objects cannot be +# marshalled, for example. +# +# == Usage example: +# +# require "pstore" +# +# # a mock wiki object... +# class WikiPage +# def initialize( page_name, author, contents ) +# @page_name = page_name +# @revisions = Array.new +# +# add_revision(author, contents) +# end +# +# attr_reader :page_name +# +# def add_revision( author, contents ) +# @revisions << { :created => Time.now, +# :author => author, +# :contents => contents } +# end +# +# def wiki_page_references +# [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/) +# end +# +# # ... +# end +# +# # create a new page... +# home_page = WikiPage.new( "HomePage", "James Edward Gray II", +# "A page about the JoysOfDocumentation..." ) +# +# # then we want to update page data and the index together, or not at all... +# wiki = PStore.new("wiki_pages.pstore") +# wiki.transaction do # begin transaction; do all of this or none of it +# # store page... +# wiki[home_page.page_name] = home_page +# # ensure that an index has been created... +# wiki[:wiki_index] ||= Array.new +# # update wiki index... +# wiki[:wiki_index].push(*home_page.wiki_page_references) +# end # commit changes to wiki data store file +# +# ### Some time later... ### +# +# # read wiki data... +# wiki.transaction(true) do # begin read-only transaction, no changes allowed +# wiki.roots.each do |data_root_name| +# p data_root_name +# p wiki[data_root_name] +# end +# end +# +# == Transaction modes +# +# By default, file integrity is only ensured as long as the operating system +# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an +# I/O error occurs while PStore is writing to its file, then the file will +# become corrupted. +# +# You can prevent this by setting pstore.ultra_safe = true. +# However, this results in a minor performance loss, and only works on platforms +# that support atomic file renames. Please consult the documentation for +# +ultra_safe+ for details. +# +# Needless to say, if you're storing valuable data with PStore, then you should +# backup the PStore files from time to time. +class PStore + binmode = defined?(File::BINARY) ? File::BINARY : 0 + RDWR_ACCESS = File::RDWR | File::CREAT | binmode + RD_ACCESS = File::RDONLY | binmode + WR_ACCESS = File::WRONLY | File::CREAT | File::TRUNC | binmode + + # The error type thrown by all PStore methods. + class Error < StandardError + end + + # Whether PStore should do its best to prevent file corruptions, even when under + # unlikely-to-occur error conditions such as out-of-space conditions and other + # unusual OS filesystem errors. Setting this flag comes at the price in the form + # of a performance loss. + # + # This flag only has effect on platforms on which file renames are atomic (e.g. + # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false. + attr_accessor :ultra_safe + + # + # To construct a PStore object, pass in the _file_ path where you would like + # the data to be stored. + # + # PStore objects are always reentrant. But if _thread_safe_ is set to true, + # then it will become thread-safe at the cost of a minor performance hit. + # + def initialize(file, thread_safe = false) + dir = File::dirname(file) + unless File::directory? dir + raise PStore::Error, format("directory %s does not exist", dir) + end + if File::exist? file and not File::readable? file + raise PStore::Error, format("file %s not readable", file) + end + @transaction = false + @filename = file + @abort = false + @ultra_safe = false + if @thread_safe + @lock = Mutex.new + else + @lock = DummyMutex.new + end + end + + # Raises PStore::Error if the calling code is not in a PStore#transaction. + def in_transaction + raise PStore::Error, "not in transaction" unless @transaction + end + # + # Raises PStore::Error if the calling code is not in a PStore#transaction or + # if the code is in a read-only PStore#transaction. + # + def in_transaction_wr() + in_transaction() + raise PStore::Error, "in read-only transaction" if @rdonly + end + private :in_transaction, :in_transaction_wr + + # + # Retrieves a value from the PStore file data, by _name_. The hierarchy of + # Ruby objects stored under that root _name_ will be returned. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def [](name) + in_transaction + @table[name] + end + # + # This method is just like PStore#[], save that you may also provide a + # _default_ value for the object. In the event the specified _name_ is not + # found in the data store, your _default_ will be returned instead. If you do + # not specify a default, PStore::Error will be raised if the object is not + # found. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def fetch(name, default=PStore::Error) + in_transaction + unless @table.key? name + if default == PStore::Error + raise PStore::Error, format("undefined root name `%s'", name) + else + return default + end + end + @table[name] + end + # + # Stores an individual Ruby object or a hierarchy of Ruby objects in the data + # store file under the root _name_. Assigning to a _name_ already in the data + # store clobbers the old data. + # + # == Example: + # + # require "pstore" + # + # store = PStore.new("data_file.pstore") + # store.transaction do # begin transaction + # # load some data into the store... + # store[:single_object] = "My data..." + # store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"], + # "James Gray" => ["erb.rb", "pstore.rb"] } + # end # commit changes to data store file + # + # *WARNING*: This method is only valid in a PStore#transaction and it cannot + # be read-only. It will raise PStore::Error if called at any other time. + # + def []=(name, value) + in_transaction_wr() + @table[name] = value + end + # + # Removes an object hierarchy from the data store, by _name_. + # + # *WARNING*: This method is only valid in a PStore#transaction and it cannot + # be read-only. It will raise PStore::Error if called at any other time. + # + def delete(name) + in_transaction_wr() + @table.delete name + end + + # + # Returns the names of all object hierarchies currently in the store. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def roots + in_transaction + @table.keys + end + # + # Returns true if the supplied _name_ is currently in the data store. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def root?(name) + in_transaction + @table.key? name + end + # Returns the path to the data store file. + def path + @filename + end + + # + # Ends the current PStore#transaction, committing any changes to the data + # store immediately. + # + # == Example: + # + # require "pstore" + # + # store = PStore.new("data_file.pstore") + # store.transaction do # begin transaction + # # load some data into the store... + # store[:one] = 1 + # store[:two] = 2 + # + # store.commit # end transaction here, committing changes + # + # store[:three] = 3 # this change is never reached + # end + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def commit + in_transaction + @abort = false + throw :pstore_abort_transaction + end + # + # Ends the current PStore#transaction, discarding any changes to the data + # store. + # + # == Example: + # + # require "pstore" + # + # store = PStore.new("data_file.pstore") + # store.transaction do # begin transaction + # store[:one] = 1 # this change is not applied, see below... + # store[:two] = 2 # this change is not applied, see below... + # + # store.abort # end transaction here, discard all changes + # + # store[:three] = 3 # this change is never reached + # end + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def abort + in_transaction + @abort = true + throw :pstore_abort_transaction + end + + # + # Opens a new transaction for the data store. Code executed inside a block + # passed to this method may read and write data to and from the data store + # file. + # + # At the end of the block, changes are committed to the data store + # automatically. You may exit the transaction early with a call to either + # PStore#commit or PStore#abort. See those methods for details about how + # changes are handled. Raising an uncaught Exception in the block is + # equivalent to calling PStore#abort. + # + # If _read_only_ is set to +true+, you will only be allowed to read from the + # data store during the transaction and any attempts to change the data will + # raise a PStore::Error. + # + # Note that PStore does not support nested transactions. + # + def transaction(read_only = false, &block) # :yields: pstore + value = nil + raise PStore::Error, "nested transaction" if @transaction + @lock.synchronize do + @rdonly = read_only + @transaction = true + @abort = false + file = open_and_lock_file(@filename, read_only) + if file + begin + @table, checksum, original_data_size = load_data(file, read_only) + + catch(:pstore_abort_transaction) do + value = yield(self) + end + + if !@abort && !read_only + save_data(checksum, original_data_size, file) + end + ensure + file.close if !file.closed? + end + else + # This can only occur if read_only == true. + @table = {} + catch(:pstore_abort_transaction) do + value = yield(self) + end + end + end + value + ensure + @transaction = false + end + + private + # Constant for relieving Ruby's garbage collector. + EMPTY_STRING = "" + EMPTY_MARSHAL_DATA = Marshal.dump({}) + EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA) + + class DummyMutex + def synchronize + yield + end + end + + # + # Open the specified filename (either in read-only mode or in + # read-write mode) and lock it for reading or writing. + # + # The opened File object will be returned. If _read_only_ is true, + # and the file does not exist, then nil will be returned. + # + # All exceptions are propagated. + # + def open_and_lock_file(filename, read_only) + if read_only + begin + file = File.new(filename, RD_ACCESS) + begin + file.flock(File::LOCK_SH) + return file + rescue + file.close + raise + end + rescue Errno::ENOENT + return nil + end + else + file = File.new(filename, RDWR_ACCESS) + file.flock(File::LOCK_EX) + return file + end + end + + # Load the given PStore file. + # If +read_only+ is true, the unmarshalled Hash will be returned. + # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled + # Hash, an MD5 checksum of the data, and the size of the data. + def load_data(file, read_only) + if read_only + begin + table = load(file) + if !table.is_a?(Hash) + raise Error, "PStore file seems to be corrupted." + end + rescue EOFError + # This seems to be a newly-created file. + table = {} + end + table + else + data = file.read + if data.empty? + # This seems to be a newly-created file. + table = {} + checksum = empty_marshal_checksum + size = empty_marshal_data.size + else + table = load(data) + checksum = Digest::MD5.digest(data) + size = data.size + if !table.is_a?(Hash) + raise Error, "PStore file seems to be corrupted." + end + end + data.replace(EMPTY_STRING) + [table, checksum, size] + end + end + + def on_windows? + is_windows = RUBY_PLATFORM =~ /mswin/ || + RUBY_PLATFORM =~ /mingw/ || + RUBY_PLATFORM =~ /bccwin/ || + RUBY_PLATFORM =~ /wince/ + self.class.__send__(:define_method, :on_windows?) do + is_windows + end + is_windows + end + + # Check whether Marshal.dump supports the 'canonical' option. This option + # makes sure that Marshal.dump always dumps data structures in the same order. + # This is important because otherwise, the checksums that we generate may differ. + def marshal_dump_supports_canonical_option? + begin + Marshal.dump(nil, -1, true) + result = true + rescue + result = false + end + self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do + result + end + result + end + + def save_data(original_checksum, original_file_size, file) + # We only want to save the new data if the size or checksum has changed. + # This results in less filesystem calls, which is good for performance. + if marshal_dump_supports_canonical_option? + new_data = Marshal.dump(@table, -1, true) + else + new_data = dump(@table) + end + new_checksum = Digest::MD5.digest(new_data) + + if new_data.size != original_file_size || new_checksum != original_checksum + if @ultra_safe && !on_windows? + # Windows doesn't support atomic file renames. + save_data_with_atomic_file_rename_strategy(new_data, file) + else + save_data_with_fast_strategy(new_data, file) + end + end + + new_data.replace(EMPTY_STRING) + end + + def save_data_with_atomic_file_rename_strategy(data, file) + temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}" + temp_file = File.new(temp_filename, WR_ACCESS) + begin + temp_file.flock(File::LOCK_EX) + temp_file.write(data) + temp_file.flush + File.rename(temp_filename, @filename) + rescue + File.unlink(temp_file) rescue nil + raise + ensure + temp_file.close + end + end + + def save_data_with_fast_strategy(data, file) + file.rewind + file.truncate(0) + file.write(data) + end + + + # This method is just a wrapped around Marshal.dump + # to allow subclass overriding used in YAML::Store. + def dump(table) # :nodoc: + Marshal::dump(table) + end + + # This method is just a wrapped around Marshal.load. + # to allow subclass overriding used in YAML::Store. + def load(content) # :nodoc: + Marshal::load(content) + end + + def empty_marshal_data + EMPTY_MARSHAL_DATA + end + def empty_marshal_checksum + EMPTY_MARSHAL_CHECKSUM + end +end + +# :enddoc: + +if __FILE__ == $0 + db = PStore.new("/tmp/foo") + db.transaction do + p db.roots + ary = db["root"] = [1,2,3,4] + ary[1] = [1,1.5] + end + + 1000.times do + db.transaction do + db["root"][0] += 1 + p db["root"][0] + end + end + + db.transaction(true) do + p db["root"] + end +end diff --git a/lib/racc/parser.rb b/lib/racc/parser.rb new file mode 100644 index 0000000..e87a250 --- /dev/null +++ b/lib/racc/parser.rb @@ -0,0 +1,441 @@ +# +# $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# +# As a special exception, when this code is copied by Racc +# into a Racc output file, you may use that output file +# without restriction. +# + +unless defined?(NotImplementedError) + NotImplementedError = NotImplementError +end + +module Racc + class ParseError < StandardError; end +end +unless defined?(::ParseError) + ParseError = Racc::ParseError +end + +module Racc + + unless defined?(Racc_No_Extentions) + Racc_No_Extentions = false + end + + class Parser + + Racc_Runtime_Version = '1.4.5' + Racc_Runtime_Revision = '$originalRevision: 1.8 $'.split[1] + + Racc_Runtime_Core_Version_R = '1.4.5' + Racc_Runtime_Core_Revision_R = '$originalRevision: 1.8 $'.split[1] + begin + require 'racc/cparse' + # Racc_Runtime_Core_Version_C = (defined in extention) + Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] + unless new.respond_to?(:_racc_do_parse_c, true) + raise LoadError, 'old cparse.so' + end + if Racc_No_Extentions + raise LoadError, 'selecting ruby version of racc runtime core' + end + + Racc_Main_Parsing_Routine = :_racc_do_parse_c + Racc_YY_Parse_Method = :_racc_yyparse_c + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C + Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C + Racc_Runtime_Type = 'c' + rescue LoadError + Racc_Main_Parsing_Routine = :_racc_do_parse_rb + Racc_YY_Parse_Method = :_racc_yyparse_rb + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R + Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_R + Racc_Runtime_Type = 'ruby' + end + + def Parser.racc_runtime_type + Racc_Runtime_Type + end + + private + + def _racc_setup + @yydebug = false unless self.class::Racc_debug_parser + @yydebug = false unless defined?(@yydebug) + if @yydebug + @racc_debug_out = $stderr unless defined?(@racc_debug_out) + @racc_debug_out ||= $stderr + end + arg = self.class::Racc_arg + arg[13] = true if arg.size < 14 + arg + end + + def _racc_init_sysvars + @racc_state = [0] + @racc_tstack = [] + @racc_vstack = [] + + @racc_t = nil + @racc_val = nil + + @racc_read_next = true + + @racc_user_yyerror = false + @racc_error_status = 0 + end + + ### + ### do_parse + ### + + def do_parse + __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) + end + + def next_token + raise NotImplementedError, "#{self.class}\#next_token is not defined" + end + + def _racc_do_parse_rb(arg, in_debug) + action_table, action_check, action_default, action_pointer, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, token_table, shift_n, + reduce_n, use_result, * = arg + + _racc_init_sysvars + tok = act = i = nil + nerr = 0 + + catch(:racc_end_parse) { + while true + if i = action_pointer[@racc_state[-1]] + if @racc_read_next + if @racc_t != 0 # not EOF + tok, @racc_val = next_token() + unless tok # EOF + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + racc_read_token(@racc_t, tok, @racc_val) if @yydebug + @racc_read_next = false + end + end + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + else + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + } + end + + ### + ### yyparse + ### + + def yyparse(recv, mid) + __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true) + end + + def _racc_yyparse_rb(recv, mid, arg, c_debug) + action_table, action_check, action_default, action_pointer, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, token_table, shift_n, + reduce_n, use_result, * = arg + + _racc_init_sysvars + act = nil + i = nil + nerr = 0 + + catch(:racc_end_parse) { + until i = action_pointer[@racc_state[-1]] + while act = _racc_evalact(action_default[@racc_state[-1]], arg) + ; + end + end + recv.__send__(mid) do |tok, val| + unless tok + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + @racc_val = val + @racc_read_next = false + + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + + while not(i = action_pointer[@racc_state[-1]]) or + not @racc_read_next or + @racc_t == 0 # $ + unless i and i += @racc_t and + i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + end + } + end + + ### + ### common + ### + + def _racc_evalact(act, arg) + action_table, action_check, action_default, action_pointer, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, token_table, shift_n, + reduce_n, use_result, * = arg + nerr = 0 # tmp + + if act > 0 and act < shift_n + # + # shift + # + if @racc_error_status > 0 + @racc_error_status -= 1 unless @racc_t == 1 # error token + end + @racc_vstack.push @racc_val + @racc_state.push act + @racc_read_next = true + if @yydebug + @racc_tstack.push @racc_t + racc_shift @racc_t, @racc_tstack, @racc_vstack + end + + elsif act < 0 and act > -reduce_n + # + # reduce + # + code = catch(:racc_jump) { + @racc_state.push _racc_do_reduce(arg, act) + false + } + if code + case code + when 1 # yyerror + @racc_user_yyerror = true # user_yyerror + return -reduce_n + when 2 # yyaccept + return shift_n + else + raise '[Racc Bug] unknown jump code' + end + end + + elsif act == shift_n + # + # accept + # + racc_accept if @yydebug + throw :racc_end_parse, @racc_vstack[0] + + elsif act == -reduce_n + # + # error + # + case @racc_error_status + when 0 + unless arg[21] # user_yyerror + nerr += 1 + on_error @racc_t, @racc_val, @racc_vstack + end + when 3 + if @racc_t == 0 # is $ + throw :racc_end_parse, nil + end + @racc_read_next = true + end + @racc_user_yyerror = false + @racc_error_status = 3 + while true + if i = action_pointer[@racc_state[-1]] + i += 1 # error token + if i >= 0 and + (act = action_table[i]) and + action_check[i] == @racc_state[-1] + break + end + end + throw :racc_end_parse, nil if @racc_state.size <= 1 + @racc_state.pop + @racc_vstack.pop + if @yydebug + @racc_tstack.pop + racc_e_pop @racc_state, @racc_tstack, @racc_vstack + end + end + return act + + else + raise "[Racc Bug] unknown action #{act.inspect}" + end + + racc_next_state(@racc_state[-1], @racc_state) if @yydebug + + nil + end + + def _racc_do_reduce(arg, act) + action_table, action_check, action_default, action_pointer, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, token_table, shift_n, + reduce_n, use_result, * = arg + state = @racc_state + vstack = @racc_vstack + tstack = @racc_tstack + + i = act * -3 + len = reduce_table[i] + reduce_to = reduce_table[i+1] + method_id = reduce_table[i+2] + void_array = [] + + tmp_t = tstack[-len, len] if @yydebug + tmp_v = vstack[-len, len] + tstack[-len, len] = void_array if @yydebug + vstack[-len, len] = void_array + state[-len, len] = void_array + + # tstack must be updated AFTER method call + if use_result + vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) + else + vstack.push __send__(method_id, tmp_v, vstack) + end + tstack.push reduce_to + + racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug + + k1 = reduce_to - nt_base + if i = goto_pointer[k1] + i += state[-1] + if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 + return curstate + end + end + goto_default[k1] + end + + def on_error(t, val, vstack) + raise ParseError, sprintf("\nparse error on value %s (%s)", + val.inspect, token_to_str(t) || '?') + end + + def yyerror + throw :racc_jump, 1 + end + + def yyaccept + throw :racc_jump, 2 + end + + def yyerrok + @racc_error_status = 0 + end + + # + # for debugging output + # + + def racc_read_token(t, tok, val) + @racc_debug_out.print 'read ' + @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' + @racc_debug_out.puts val.inspect + @racc_debug_out.puts + end + + def racc_shift(tok, tstack, vstack) + @racc_debug_out.puts "shift #{racc_token2str tok}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_reduce(toks, sim, tstack, vstack) + out = @racc_debug_out + out.print 'reduce ' + if toks.empty? + out.print ' ' + else + toks.each {|t| out.print ' ', racc_token2str(t) } + end + out.puts " --> #{racc_token2str(sim)}" + + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_accept + @racc_debug_out.puts 'accept' + @racc_debug_out.puts + end + + def racc_e_pop(state, tstack, vstack) + @racc_debug_out.puts 'error recovering mode: pop token' + racc_print_states state + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_next_state(curstate, state) + @racc_debug_out.puts "goto #{curstate}" + racc_print_states state + @racc_debug_out.puts + end + + def racc_print_stacks(t, v) + out = @racc_debug_out + out.print ' [' + t.each_index do |i| + out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' + end + out.puts ' ]' + end + + def racc_print_states(s) + out = @racc_debug_out + out.print ' [' + s.each {|st| out.print ' ', st } + out.puts ' ]' + end + + def racc_token2str(tok) + self.class::Racc_token_to_s_table[tok] or + raise "[Racc Bug] can't convert token #{tok} to string" + end + + def token_to_str(t) + self.class::Racc_token_to_s_table[t] + end + + end + +end diff --git a/lib/rake.rb b/lib/rake.rb new file mode 100644 index 0000000..d46c49d --- /dev/null +++ b/lib/rake.rb @@ -0,0 +1,2465 @@ +#!/usr/bin/env ruby + +#-- + +# Copyright (c) 2003, 2004, 2005, 2006, 2007 Jim Weirich +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#++ +# +# = Rake -- Ruby Make +# +# This is the main file for the Rake application. Normally it is referenced +# as a library via a require statement, but it can be distributed +# independently as an application. + +RAKEVERSION = '0.8.3' + +require 'rbconfig' +require 'fileutils' +require 'singleton' +require 'monitor' +require 'optparse' +require 'ostruct' + +require 'rake/win32' + +###################################################################### +# Rake extensions to Module. +# +class Module + # Check for an existing method in the current class before extending. IF + # the method already exists, then a warning is printed and the extension is + # not added. Otherwise the block is yielded and any definitions in the + # block will take effect. + # + # Usage: + # + # class String + # rake_extension("xyz") do + # def xyz + # ... + # end + # end + # end + # + def rake_extension(method) + if method_defined?(method) + $stderr.puts "WARNING: Possible conflict with Rake extension: #{self}##{method} already exists" + else + yield + end + end +end # module Module + + +###################################################################### +# User defined methods to be added to String. +# +class String + rake_extension("ext") do + # Replace the file extension with +newext+. If there is no extenson on + # the string, append the new extension to the end. If the new extension + # is not given, or is the empty string, remove any existing extension. + # + # +ext+ is a user added method for the String class. + def ext(newext='') + return self.dup if ['.', '..'].include? self + if newext != '' + newext = (newext =~ /^\./) ? newext : ("." + newext) + end + self.chomp(File.extname(self)) << newext + end + end + + rake_extension("pathmap") do + # Explode a path into individual components. Used by +pathmap+. + def pathmap_explode + head, tail = File.split(self) + return [self] if head == self + return [tail] if head == '.' || tail == '/' + return [head, tail] if head == '/' + return head.pathmap_explode + [tail] + end + protected :pathmap_explode + + # Extract a partial path from the path. Include +n+ directories from the + # front end (left hand side) if +n+ is positive. Include |+n+| + # directories from the back end (right hand side) if +n+ is negative. + def pathmap_partial(n) + dirs = File.dirname(self).pathmap_explode + partial_dirs = + if n > 0 + dirs[0...n] + elsif n < 0 + dirs.reverse[0...-n].reverse + else + "." + end + File.join(partial_dirs) + end + protected :pathmap_partial + + # Preform the pathmap replacement operations on the given path. The + # patterns take the form 'pat1,rep1;pat2,rep2...'. + def pathmap_replace(patterns, &block) + result = self + patterns.split(';').each do |pair| + pattern, replacement = pair.split(',') + pattern = Regexp.new(pattern) + if replacement == '*' && block_given? + result = result.sub(pattern, &block) + elsif replacement + result = result.sub(pattern, replacement) + else + result = result.sub(pattern, '') + end + end + result + end + protected :pathmap_replace + + # Map the path according to the given specification. The specification + # controls the details of the mapping. The following special patterns are + # recognized: + # + # * %p -- The complete path. + # * %f -- The base file name of the path, with its file extension, + # but without any directories. + # * %n -- The file name of the path without its file extension. + # * %d -- The directory list of the path. + # * %x -- The file extension of the path. An empty string if there + # is no extension. + # * %X -- Everything *but* the file extension. + # * %s -- The alternate file separater if defined, otherwise use + # the standard file separator. + # * %% -- A percent sign. + # + # The %d specifier can also have a numeric prefix (e.g. '%2d'). If the + # number is positive, only return (up to) +n+ directories in the path, + # starting from the left hand side. If +n+ is negative, return (up to) + # |+n+| directories from the right hand side of the path. + # + # Examples: + # + # 'a/b/c/d/file.txt'.pathmap("%2d") => 'a/b' + # 'a/b/c/d/file.txt'.pathmap("%-2d") => 'c/d' + # + # Also the %d, %p, %f, %n, %x, and %X operators can take a + # pattern/replacement argument to perform simple string substititions on a + # particular part of the path. The pattern and replacement are speparated + # by a comma and are enclosed by curly braces. The replacement spec comes + # after the % character but before the operator letter. (e.g. + # "%{old,new}d"). Muliple replacement specs should be separated by + # semi-colons (e.g. "%{old,new;src,bin}d"). + # + # Regular expressions may be used for the pattern, and back refs may be + # used in the replacement text. Curly braces, commas and semi-colons are + # excluded from both the pattern and replacement text (let's keep parsing + # reasonable). + # + # For example: + # + # "src/org/onestepback/proj/A.java".pathmap("%{^src,bin}X.class") + # + # returns: + # + # "bin/org/onestepback/proj/A.class" + # + # If the replacement text is '*', then a block may be provided to perform + # some arbitrary calculation for the replacement. + # + # For example: + # + # "/path/to/file.TXT".pathmap("%X%{.*,*}x") { |ext| + # ext.downcase + # } + # + # Returns: + # + # "/path/to/file.txt" + # + def pathmap(spec=nil, &block) + return self if spec.nil? + result = '' + spec.scan(/%\{[^}]*\}-?\d*[sdpfnxX%]|%-?\d+d|%.|[^%]+/) do |frag| + case frag + when '%f' + result << File.basename(self) + when '%n' + result << File.basename(self, '.*') + when '%d' + result << File.dirname(self) + when '%x' + result << File.extname(self) + when '%X' + result << self.ext + when '%p' + result << self + when '%s' + result << (File::ALT_SEPARATOR || File::SEPARATOR) + when '%-' + # do nothing + when '%%' + result << "%" + when /%(-?\d+)d/ + result << pathmap_partial($1.to_i) + when /^%\{([^}]*)\}(\d*[dpfnxX])/ + patterns, operator = $1, $2 + result << pathmap('%' + operator).pathmap_replace(patterns, &block) + when /^%/ + fail ArgumentError, "Unknown pathmap specifier #{frag} in '#{spec}'" + else + result << frag + end + end + result + end + end +end # class String + +############################################################################## +module Rake + + # Errors ----------------------------------------------------------- + + # Error indicating an ill-formed task declaration. + class TaskArgumentError < ArgumentError + end + + # Error indicating a recursion overflow error in task selection. + class RuleRecursionOverflowError < StandardError + def initialize(*args) + super + @targets = [] + end + + def add_target(target) + @targets << target + end + + def message + super + ": [" + @targets.reverse.join(' => ') + "]" + end + end + + # -------------------------------------------------------------------------- + # Rake module singleton methods. + # + class << self + # Current Rake Application + def application + @application ||= Rake::Application.new + end + + # Set the current Rake application object. + def application=(app) + @application = app + end + + # Return the original directory where the Rake application was started. + def original_dir + application.original_dir + end + + end + + # ########################################################################## + # Mixin for creating easily cloned objects. + # + module Cloneable + # Clone an object by making a new object and setting all the instance + # variables to the same values. + def dup + sibling = self.class.new + instance_variables.each do |ivar| + value = self.instance_variable_get(ivar) + new_value = value.clone rescue value + sibling.instance_variable_set(ivar, new_value) + end + sibling.taint if tainted? + sibling + end + + def clone + sibling = dup + sibling.freeze if frozen? + sibling + end + end + + #################################################################### + # TaskAguments manage the arguments passed to a task. + # + class TaskArguments + include Enumerable + + attr_reader :names + + # Create a TaskArgument object with a list of named arguments + # (given by :names) and a set of associated values (given by + # :values). :parent is the parent argument object. + def initialize(names, values, parent=nil) + @names = names + @parent = parent + @hash = {} + names.each_with_index { |name, i| + @hash[name.to_sym] = values[i] unless values[i].nil? + } + end + + # Create a new argument scope using the prerequisite argument + # names. + def new_scope(names) + values = names.collect { |n| self[n] } + self.class.new(names, values, self) + end + + # Find an argument value by name or index. + def [](index) + lookup(index.to_sym) + end + + # Specify a hash of default values for task arguments. Use the + # defaults only if there is no specific value for the given + # argument. + def with_defaults(defaults) + @hash = defaults.merge(@hash) + end + + def each(&block) + @hash.each(&block) + end + + def method_missing(sym, *args, &block) + lookup(sym.to_sym) + end + + def to_hash + @hash + end + + def to_s + @hash.inspect + end + + def inspect + to_s + end + + protected + + def lookup(name) + if @hash.has_key?(name) + @hash[name] + elsif ENV.has_key?(name.to_s) + ENV[name.to_s] + elsif ENV.has_key?(name.to_s.upcase) + ENV[name.to_s.upcase] + elsif @parent + @parent.lookup(name) + end + end + end + + EMPTY_TASK_ARGS = TaskArguments.new([], []) + + #################################################################### + # InvocationChain tracks the chain of task invocations to detect + # circular dependencies. + class InvocationChain + def initialize(value, tail) + @value = value + @tail = tail + end + + def member?(obj) + @value == obj || @tail.member?(obj) + end + + def append(value) + if member?(value) + fail RuntimeError, "Circular dependency detected: #{to_s} => #{value}" + end + self.class.new(value, self) + end + + def to_s + "#{prefix}#{@value}" + end + + def self.append(value, chain) + chain.append(value) + end + + private + + def prefix + "#{@tail.to_s} => " + end + + class EmptyInvocationChain + def member?(obj) + false + end + def append(value) + InvocationChain.new(value, self) + end + def to_s + "TOP" + end + end + + EMPTY = EmptyInvocationChain.new + + end # class InvocationChain + +end # module Rake + +module Rake + + # ######################################################################### + # A Task is the basic unit of work in a Rakefile. Tasks have associated + # actions (possibly more than one) and a list of prerequisites. When + # invoked, a task will first ensure that all of its prerequisites have an + # opportunity to run and then it will execute its own actions. + # + # Tasks are not usually created directly using the new method, but rather + # use the +file+ and +task+ convenience methods. + # + class Task + # List of prerequisites for a task. + attr_reader :prerequisites + + # List of actions attached to a task. + attr_reader :actions + + # Application owning this task. + attr_accessor :application + + # Comment for this task. Restricted to a single line of no more than 50 + # characters. + attr_reader :comment + + # Full text of the (possibly multi-line) comment. + attr_reader :full_comment + + # Array of nested namespaces names used for task lookup by this task. + attr_reader :scope + + # Return task name + def to_s + name + end + + def inspect + "<#{self.class} #{name} => [#{prerequisites.join(', ')}]>" + end + + # List of sources for task. + attr_writer :sources + def sources + @sources ||= [] + end + + # First source from a rule (nil if no sources) + def source + @sources.first if defined?(@sources) + end + + # Create a task named +task_name+ with no actions or prerequisites. Use + # +enhance+ to add actions and prerequisites. + def initialize(task_name, app) + @name = task_name.to_s + @prerequisites = [] + @actions = [] + @already_invoked = false + @full_comment = nil + @comment = nil + @lock = Monitor.new + @application = app + @scope = app.current_scope + @arg_names = nil + end + + # Enhance a task with prerequisites or actions. Returns self. + def enhance(deps=nil, &block) + @prerequisites |= deps if deps + @actions << block if block_given? + self + end + + # Name of the task, including any namespace qualifiers. + def name + @name.to_s + end + + # Name of task with argument list description. + def name_with_args # :nodoc: + if arg_description + "#{name}#{arg_description}" + else + name + end + end + + # Argument description (nil if none). + def arg_description # :nodoc: + @arg_names ? "[#{(arg_names || []).join(',')}]" : nil + end + + # Name of arguments for this task. + def arg_names + @arg_names || [] + end + + # Reenable the task, allowing its tasks to be executed if the task + # is invoked again. + def reenable + @already_invoked = false + end + + # Clear the existing prerequisites and actions of a rake task. + def clear + clear_prerequisites + clear_actions + self + end + + # Clear the existing prerequisites of a rake task. + def clear_prerequisites + prerequisites.clear + self + end + + # Clear the existing actions on a rake task. + def clear_actions + actions.clear + self + end + + # Invoke the task if it is needed. Prerequites are invoked first. + def invoke(*args) + task_args = TaskArguments.new(arg_names, args) + invoke_with_call_chain(task_args, InvocationChain::EMPTY) + end + + # Same as invoke, but explicitly pass a call chain to detect + # circular dependencies. + def invoke_with_call_chain(task_args, invocation_chain) # :nodoc: + new_chain = InvocationChain.append(self, invocation_chain) + @lock.synchronize do + if application.options.trace + puts "** Invoke #{name} #{format_trace_flags}" + end + return if @already_invoked + @already_invoked = true + invoke_prerequisites(task_args, new_chain) + execute(task_args) if needed? + end + end + protected :invoke_with_call_chain + + # Invoke all the prerequisites of a task. + def invoke_prerequisites(task_args, invocation_chain) # :nodoc: + @prerequisites.each { |n| + prereq = application[n, @scope] + prereq_args = task_args.new_scope(prereq.arg_names) + prereq.invoke_with_call_chain(prereq_args, invocation_chain) + } + end + + # Format the trace flags for display. + def format_trace_flags + flags = [] + flags << "first_time" unless @already_invoked + flags << "not_needed" unless needed? + flags.empty? ? "" : "(" + flags.join(", ") + ")" + end + private :format_trace_flags + + # Execute the actions associated with this task. + def execute(args=nil) + args ||= EMPTY_TASK_ARGS + if application.options.dryrun + puts "** Execute (dry run) #{name}" + return + end + if application.options.trace + puts "** Execute #{name}" + end + application.enhance_with_matching_rule(name) if @actions.empty? + @actions.each do |act| + case act.arity + when 1 + act.call(self) + else + act.call(self, args) + end + end + end + + # Is this task needed? + def needed? + true + end + + # Timestamp for this task. Basic tasks return the current time for their + # time stamp. Other tasks can be more sophisticated. + def timestamp + @prerequisites.collect { |p| application[p].timestamp }.max || Time.now + end + + # Add a description to the task. The description can consist of an option + # argument list (enclosed brackets) and an optional comment. + def add_description(description) + return if ! description + comment = description.strip + add_comment(comment) if comment && ! comment.empty? + end + + # Writing to the comment attribute is the same as adding a description. + def comment=(description) + add_description(description) + end + + # Add a comment to the task. If a comment alread exists, separate + # the new comment with " / ". + def add_comment(comment) + if @full_comment + @full_comment << " / " + else + @full_comment = '' + end + @full_comment << comment + if @full_comment =~ /\A([^.]+?\.)( |$)/ + @comment = $1 + else + @comment = @full_comment + end + end + private :add_comment + + # Set the names of the arguments for this task. +args+ should be + # an array of symbols, one for each argument name. + def set_arg_names(args) + @arg_names = args.map { |a| a.to_sym } + end + + # Return a string describing the internal state of a task. Useful for + # debugging. + def investigation + result = "------------------------------\n" + result << "Investigating #{name}\n" + result << "class: #{self.class}\n" + result << "task needed: #{needed?}\n" + result << "timestamp: #{timestamp}\n" + result << "pre-requisites: \n" + prereqs = @prerequisites.collect {|name| application[name]} + prereqs.sort! {|a,b| a.timestamp <=> b.timestamp} + prereqs.each do |p| + result << "--#{p.name} (#{p.timestamp})\n" + end + latest_prereq = @prerequisites.collect{|n| application[n].timestamp}.max + result << "latest-prerequisite time: #{latest_prereq}\n" + result << "................................\n\n" + return result + end + + # ---------------------------------------------------------------- + # Rake Module Methods + # + class << self + + # Clear the task list. This cause rake to immediately forget all the + # tasks that have been assigned. (Normally used in the unit tests.) + def clear + Rake.application.clear + end + + # List of all defined tasks. + def tasks + Rake.application.tasks + end + + # Return a task with the given name. If the task is not currently + # known, try to synthesize one from the defined rules. If no rules are + # found, but an existing file matches the task name, assume it is a file + # task with no dependencies or actions. + def [](task_name) + Rake.application[task_name] + end + + # TRUE if the task name is already defined. + def task_defined?(task_name) + Rake.application.lookup(task_name) != nil + end + + # Define a task given +args+ and an option block. If a rule with the + # given name already exists, the prerequisites and actions are added to + # the existing task. Returns the defined task. + def define_task(*args, &block) + Rake.application.define_task(self, *args, &block) + end + + # Define a rule for synthesizing tasks. + def create_rule(*args, &block) + Rake.application.create_rule(*args, &block) + end + + # Apply the scope to the task name according to the rules for + # this kind of task. Generic tasks will accept the scope as + # part of the name. + def scope_name(scope, task_name) + (scope + [task_name]).join(':') + end + + end # class << Rake::Task + end # class Rake::Task + + + # ######################################################################### + # A FileTask is a task that includes time based dependencies. If any of a + # FileTask's prerequisites have a timestamp that is later than the file + # represented by this task, then the file must be rebuilt (using the + # supplied actions). + # + class FileTask < Task + + # Is this file task needed? Yes if it doesn't exist, or if its time stamp + # is out of date. + def needed? + return true unless File.exist?(name) + return true if out_of_date?(timestamp) + false + end + + # Time stamp for file task. + def timestamp + if File.exist?(name) + File.mtime(name.to_s) + else + Rake::EARLY + end + end + + private + + # Are there any prerequisites with a later time than the given time stamp? + def out_of_date?(stamp) + @prerequisites.any? { |n| application[n].timestamp > stamp} + end + + # ---------------------------------------------------------------- + # Task class methods. + # + class << self + # Apply the scope to the task name according to the rules for this kind + # of task. File based tasks ignore the scope when creating the name. + def scope_name(scope, task_name) + task_name + end + end + end # class Rake::FileTask + + # ######################################################################### + # A FileCreationTask is a file task that when used as a dependency will be + # needed if and only if the file has not been created. Once created, it is + # not re-triggered if any of its dependencies are newer, nor does trigger + # any rebuilds of tasks that depend on it whenever it is updated. + # + class FileCreationTask < FileTask + # Is this file task needed? Yes if it doesn't exist. + def needed? + ! File.exist?(name) + end + + # Time stamp for file creation task. This time stamp is earlier + # than any other time stamp. + def timestamp + Rake::EARLY + end + end + + # ######################################################################### + # Same as a regular task, but the immediate prerequisites are done in + # parallel using Ruby threads. + # + class MultiTask < Task + private + def invoke_prerequisites(args, invocation_chain) + threads = @prerequisites.collect { |p| + Thread.new(p) { |r| application[r].invoke_with_call_chain(args, invocation_chain) } + } + threads.each { |t| t.join } + end + end +end # module Rake + +# ########################################################################### +# Task Definition Functions ... + +# Declare a basic task. +# +# Example: +# task :clobber => [:clean] do +# rm_rf "html" +# end +# +def task(*args, &block) + Rake::Task.define_task(*args, &block) +end + + +# Declare a file task. +# +# Example: +# file "config.cfg" => ["config.template"] do +# open("config.cfg", "w") do |outfile| +# open("config.template") do |infile| +# while line = infile.gets +# outfile.puts line +# end +# end +# end +# end +# +def file(*args, &block) + Rake::FileTask.define_task(*args, &block) +end + +# Declare a file creation task. +# (Mainly used for the directory command). +def file_create(args, &block) + Rake::FileCreationTask.define_task(args, &block) +end + +# Declare a set of files tasks to create the given directories on demand. +# +# Example: +# directory "testdata/doc" +# +def directory(dir) + Rake.each_dir_parent(dir) do |d| + file_create d do |t| + mkdir_p t.name if ! File.exist?(t.name) + end + end +end + +# Declare a task that performs its prerequisites in parallel. Multitasks does +# *not* guarantee that its prerequisites will execute in any given order +# (which is obvious when you think about it) +# +# Example: +# multitask :deploy => [:deploy_gem, :deploy_rdoc] +# +def multitask(args, &block) + Rake::MultiTask.define_task(args, &block) +end + +# Create a new rake namespace and use it for evaluating the given block. +# Returns a NameSpace object that can be used to lookup tasks defined in the +# namespace. +# +# E.g. +# +# ns = namespace "nested" do +# task :run +# end +# task_run = ns[:run] # find :run in the given namespace. +# +def namespace(name=nil, &block) + Rake.application.in_namespace(name, &block) +end + +# Declare a rule for auto-tasks. +# +# Example: +# rule '.o' => '.c' do |t| +# sh %{cc -o #{t.name} #{t.source}} +# end +# +def rule(*args, &block) + Rake::Task.create_rule(*args, &block) +end + +# Describe the next rake task. +# +# Example: +# desc "Run the Unit Tests" +# task :test => [:build] +# runtests +# end +# +def desc(description) + Rake.application.last_description = description +end + +# Import the partial Rakefiles +fn+. Imported files are loaded _after_ the +# current file is completely loaded. This allows the import statement to +# appear anywhere in the importing file, and yet allowing the imported files +# to depend on objects defined in the importing file. +# +# A common use of the import statement is to include files containing +# dependency declarations. +# +# See also the --rakelibdir command line option. +# +# Example: +# import ".depend", "my_rules" +# +def import(*fns) + fns.each do |fn| + Rake.application.add_import(fn) + end +end + +# ########################################################################### +# This a FileUtils extension that defines several additional commands to be +# added to the FileUtils utility functions. +# +module FileUtils + RUBY = File.join(Config::CONFIG['bindir'], Config::CONFIG['ruby_install_name']). + sub(/.*\s.*/m, '"\&"') + + OPT_TABLE['sh'] = %w(noop verbose) + OPT_TABLE['ruby'] = %w(noop verbose) + + # Run the system command +cmd+. If multiple arguments are given the command + # is not run with the shell (same semantics as Kernel::exec and + # Kernel::system). + # + # Example: + # sh %{ls -ltr} + # + # sh 'ls', 'file with spaces' + # + # # check exit status after command runs + # sh %{grep pattern file} do |ok, res| + # if ! ok + # puts "pattern not found (status = #{res.exitstatus})" + # end + # end + # + def sh(*cmd, &block) + options = (Hash === cmd.last) ? cmd.pop : {} + unless block_given? + show_command = cmd.join(" ") + show_command = show_command[0,42] + "..." + # TODO code application logic heref show_command.length > 45 + block = lambda { |ok, status| + ok or fail "Command failed with status (#{status.exitstatus}): [#{show_command}]" + } + end + if RakeFileUtils.verbose_flag == :default + options[:verbose] = false + else + options[:verbose] ||= RakeFileUtils.verbose_flag + end + options[:noop] ||= RakeFileUtils.nowrite_flag + rake_check_options options, :noop, :verbose + rake_output_message cmd.join(" ") if options[:verbose] + unless options[:noop] + res = rake_system(*cmd) + block.call(res, $?) + end + end + + def rake_system(*cmd) + system(*cmd) + end + private :rake_system + + # Run a Ruby interpreter with the given arguments. + # + # Example: + # ruby %{-pe '$_.upcase!' 1 then + sh(*([RUBY] + args + [options]), &block) + else + sh("#{RUBY} #{args.first}", options, &block) + end + end + + LN_SUPPORTED = [true] + + # Attempt to do a normal file link, but fall back to a copy if the link + # fails. + def safe_ln(*args) + unless LN_SUPPORTED[0] + cp(*args) + else + begin + ln(*args) + rescue StandardError, NotImplementedError => ex + LN_SUPPORTED[0] = false + cp(*args) + end + end + end + + # Split a file path into individual directory names. + # + # Example: + # split_all("a/b/c") => ['a', 'b', 'c'] + # + def split_all(path) + head, tail = File.split(path) + return [tail] if head == '.' || tail == '/' + return [head, tail] if head == '/' + return split_all(head) + [tail] + end +end + +# ########################################################################### +# RakeFileUtils provides a custom version of the FileUtils methods that +# respond to the verbose and nowrite commands. +# +module RakeFileUtils + include FileUtils + + class << self + attr_accessor :verbose_flag, :nowrite_flag + end + RakeFileUtils.verbose_flag = :default + RakeFileUtils.nowrite_flag = false + + $fileutils_verbose = true + $fileutils_nowrite = false + + FileUtils::OPT_TABLE.each do |name, opts| + default_options = [] + if opts.include?(:verbose) || opts.include?("verbose") + default_options << ':verbose => RakeFileUtils.verbose_flag' + end + if opts.include?(:noop) || opts.include?("noop") + default_options << ':noop => RakeFileUtils.nowrite_flag' + end + + next if default_options.empty? + module_eval(<<-EOS, __FILE__, __LINE__ + 1) + def #{name}( *args, &block ) + super( + *rake_merge_option(args, + #{default_options.join(', ')} + ), &block) + end + EOS + end + + # Get/set the verbose flag controlling output from the FileUtils utilities. + # If verbose is true, then the utility method is echoed to standard output. + # + # Examples: + # verbose # return the current value of the verbose flag + # verbose(v) # set the verbose flag to _v_. + # verbose(v) { code } # Execute code with the verbose flag set temporarily to _v_. + # # Return to the original value when code is done. + def verbose(value=nil) + oldvalue = RakeFileUtils.verbose_flag + RakeFileUtils.verbose_flag = value unless value.nil? + if block_given? + begin + yield + ensure + RakeFileUtils.verbose_flag = oldvalue + end + end + RakeFileUtils.verbose_flag + end + + # Get/set the nowrite flag controlling output from the FileUtils utilities. + # If verbose is true, then the utility method is echoed to standard output. + # + # Examples: + # nowrite # return the current value of the nowrite flag + # nowrite(v) # set the nowrite flag to _v_. + # nowrite(v) { code } # Execute code with the nowrite flag set temporarily to _v_. + # # Return to the original value when code is done. + def nowrite(value=nil) + oldvalue = RakeFileUtils.nowrite_flag + RakeFileUtils.nowrite_flag = value unless value.nil? + if block_given? + begin + yield + ensure + RakeFileUtils.nowrite_flag = oldvalue + end + end + oldvalue + end + + # Use this function to prevent protentially destructive ruby code from + # running when the :nowrite flag is set. + # + # Example: + # + # when_writing("Building Project") do + # project.build + # end + # + # The following code will build the project under normal conditions. If the + # nowrite(true) flag is set, then the example will print: + # DRYRUN: Building Project + # instead of actually building the project. + # + def when_writing(msg=nil) + if RakeFileUtils.nowrite_flag + puts "DRYRUN: #{msg}" if msg + else + yield + end + end + + # Merge the given options with the default values. + def rake_merge_option(args, defaults) + if Hash === args.last + defaults.update(args.last) + args.pop + end + args.push defaults + args + end + private :rake_merge_option + + # Send the message to the default rake output (which is $stderr). + def rake_output_message(message) + $stderr.puts(message) + end + private :rake_output_message + + # Check that the options do not contain options not listed in +optdecl+. An + # ArgumentError exception is thrown if non-declared options are found. + def rake_check_options(options, *optdecl) + h = options.dup + optdecl.each do |name| + h.delete name + end + raise ArgumentError, "no such option: #{h.keys.join(' ')}" unless h.empty? + end + private :rake_check_options + + extend self +end + +# ########################################################################### +# Include the FileUtils file manipulation functions in the top level module, +# but mark them private so that they don't unintentionally define methods on +# other objects. + +include RakeFileUtils +private(*FileUtils.instance_methods(false)) +private(*RakeFileUtils.instance_methods(false)) + +###################################################################### +module Rake + + # ######################################################################### + # A FileList is essentially an array with a few helper methods defined to + # make file manipulation a bit easier. + # + # FileLists are lazy. When given a list of glob patterns for possible files + # to be included in the file list, instead of searching the file structures + # to find the files, a FileList holds the pattern for latter use. + # + # This allows us to define a number of FileList to match any number of + # files, but only search out the actual files when then FileList itself is + # actually used. The key is that the first time an element of the + # FileList/Array is requested, the pending patterns are resolved into a real + # list of file names. + # + class FileList + + include Cloneable + + # == Method Delegation + # + # The lazy evaluation magic of FileLists happens by implementing all the + # array specific methods to call +resolve+ before delegating the heavy + # lifting to an embedded array object (@items). + # + # In addition, there are two kinds of delegation calls. The regular kind + # delegates to the @items array and returns the result directly. Well, + # almost directly. It checks if the returned value is the @items object + # itself, and if so will return the FileList object instead. + # + # The second kind of delegation call is used in methods that normally + # return a new Array object. We want to capture the return value of these + # methods and wrap them in a new FileList object. We enumerate these + # methods in the +SPECIAL_RETURN+ list below. + + # List of array methods (that are not in +Object+) that need to be + # delegated. + ARRAY_METHODS = (Array.instance_methods - Object.instance_methods).map { |n| n.to_s } + + # List of additional methods that must be delegated. + MUST_DEFINE = %w[to_a inspect] + + # List of methods that should not be delegated here (we define special + # versions of them explicitly below). + MUST_NOT_DEFINE = %w[to_a to_ary partition *] + + # List of delegated methods that return new array values which need + # wrapping. + SPECIAL_RETURN = %w[ + map collect sort sort_by select find_all reject grep + compact flatten uniq values_at + + - & | + ] + + DELEGATING_METHODS = (ARRAY_METHODS + MUST_DEFINE - MUST_NOT_DEFINE).collect{ |s| s.to_s }.sort.uniq + + # Now do the delegation. + DELEGATING_METHODS.each_with_index do |sym, i| + if SPECIAL_RETURN.include?(sym) + ln = __LINE__+1 + class_eval %{ + def #{sym}(*args, &block) + resolve + result = @items.send(:#{sym}, *args, &block) + FileList.new.import(result) + end + }, __FILE__, ln + else + ln = __LINE__+1 + class_eval %{ + def #{sym}(*args, &block) + resolve + result = @items.send(:#{sym}, *args, &block) + result.object_id == @items.object_id ? self : result + end + }, __FILE__, ln + end + end + + # Create a file list from the globbable patterns given. If you wish to + # perform multiple includes or excludes at object build time, use the + # "yield self" pattern. + # + # Example: + # file_list = FileList.new('lib/**/*.rb', 'test/test*.rb') + # + # pkg_files = FileList.new('lib/**/*') do |fl| + # fl.exclude(/\bCVS\b/) + # end + # + def initialize(*patterns) + @pending_add = [] + @pending = false + @exclude_patterns = DEFAULT_IGNORE_PATTERNS.dup + @exclude_procs = DEFAULT_IGNORE_PROCS.dup + @exclude_re = nil + @items = [] + patterns.each { |pattern| include(pattern) } + yield self if block_given? + end + + # Add file names defined by glob patterns to the file list. If an array + # is given, add each element of the array. + # + # Example: + # file_list.include("*.java", "*.cfg") + # file_list.include %w( math.c lib.h *.o ) + # + def include(*filenames) + # TODO: check for pending + filenames.each do |fn| + if fn.respond_to? :to_ary + include(*fn.to_ary) + else + @pending_add << fn + end + end + @pending = true + self + end + alias :add :include + + # Register a list of file name patterns that should be excluded from the + # list. Patterns may be regular expressions, glob patterns or regular + # strings. In addition, a block given to exclude will remove entries that + # return true when given to the block. + # + # Note that glob patterns are expanded against the file system. If a file + # is explicitly added to a file list, but does not exist in the file + # system, then an glob pattern in the exclude list will not exclude the + # file. + # + # Examples: + # FileList['a.c', 'b.c'].exclude("a.c") => ['b.c'] + # FileList['a.c', 'b.c'].exclude(/^a/) => ['b.c'] + # + # If "a.c" is a file, then ... + # FileList['a.c', 'b.c'].exclude("a.*") => ['b.c'] + # + # If "a.c" is not a file, then ... + # FileList['a.c', 'b.c'].exclude("a.*") => ['a.c', 'b.c'] + # + def exclude(*patterns, &block) + patterns.each do |pat| + @exclude_patterns << pat + end + if block_given? + @exclude_procs << block + end + resolve_exclude if ! @pending + self + end + + + # Clear all the exclude patterns so that we exclude nothing. + def clear_exclude + @exclude_patterns = [] + @exclude_procs = [] + calculate_exclude_regexp if ! @pending + self + end + + # Define equality. + def ==(array) + to_ary == array + end + + # Return the internal array object. + def to_a + resolve + @items + end + + # Return the internal array object. + def to_ary + to_a + end + + # Lie about our class. + def is_a?(klass) + klass == Array || super(klass) + end + alias kind_of? is_a? + + # Redefine * to return either a string or a new file list. + def *(other) + result = @items * other + case result + when Array + FileList.new.import(result) + else + result + end + end + + # Resolve all the pending adds now. + def resolve + if @pending + @pending = false + @pending_add.each do |fn| resolve_add(fn) end + @pending_add = [] + resolve_exclude + end + self + end + + def calculate_exclude_regexp + ignores = [] + @exclude_patterns.each do |pat| + case pat + when Regexp + ignores << pat + when /[*?]/ + Dir[pat].each do |p| ignores << p end + else + ignores << Regexp.quote(pat) + end + end + if ignores.empty? + @exclude_re = /^$/ + else + re_str = ignores.collect { |p| "(" + p.to_s + ")" }.join("|") + @exclude_re = Regexp.new(re_str) + end + end + + def resolve_add(fn) + case fn + when %r{[*?\[\{]} + add_matching(fn) + else + self << fn + end + end + private :resolve_add + + def resolve_exclude + calculate_exclude_regexp + reject! { |fn| exclude?(fn) } + self + end + private :resolve_exclude + + # Return a new FileList with the results of running +sub+ against each + # element of the oringal list. + # + # Example: + # FileList['a.c', 'b.c'].sub(/\.c$/, '.o') => ['a.o', 'b.o'] + # + def sub(pat, rep) + inject(FileList.new) { |res, fn| res << fn.sub(pat,rep) } + end + + # Return a new FileList with the results of running +gsub+ against each + # element of the original list. + # + # Example: + # FileList['lib/test/file', 'x/y'].gsub(/\//, "\\") + # => ['lib\\test\\file', 'x\\y'] + # + def gsub(pat, rep) + inject(FileList.new) { |res, fn| res << fn.gsub(pat,rep) } + end + + # Same as +sub+ except that the oringal file list is modified. + def sub!(pat, rep) + each_with_index { |fn, i| self[i] = fn.sub(pat,rep) } + self + end + + # Same as +gsub+ except that the original file list is modified. + def gsub!(pat, rep) + each_with_index { |fn, i| self[i] = fn.gsub(pat,rep) } + self + end + + # Apply the pathmap spec to each of the included file names, returning a + # new file list with the modified paths. (See String#pathmap for + # details.) + def pathmap(spec=nil) + collect { |fn| fn.pathmap(spec) } + end + + # Return a new array with String#ext method applied to each + # member of the array. + # + # This method is a shortcut for: + # + # array.collect { |item| item.ext(newext) } + # + # +ext+ is a user added method for the Array class. + def ext(newext='') + collect { |fn| fn.ext(newext) } + end + + + # Grep each of the files in the filelist using the given pattern. If a + # block is given, call the block on each matching line, passing the file + # name, line number, and the matching line of text. If no block is given, + # a standard emac style file:linenumber:line message will be printed to + # standard out. + def egrep(pattern, *opt) + each do |fn| + open(fn, "rb", *opt) do |inf| + count = 0 + inf.each do |line| + count += 1 + if pattern.match(line) + if block_given? + yield fn, count, line + else + puts "#{fn}:#{count}:#{line}" + end + end + end + end + end + end + + # Return a new file list that only contains file names from the current + # file list that exist on the file system. + def existing + select { |fn| File.exist?(fn) } + end + + # Modify the current file list so that it contains only file name that + # exist on the file system. + def existing! + resolve + @items = @items.select { |fn| File.exist?(fn) } + self + end + + # FileList version of partition. Needed because the nested arrays should + # be FileLists in this version. + def partition(&block) # :nodoc: + resolve + result = @items.partition(&block) + [ + FileList.new.import(result[0]), + FileList.new.import(result[1]), + ] + end + + # Convert a FileList to a string by joining all elements with a space. + def to_s + resolve + self.join(' ') + end + + # Add matching glob patterns. + def add_matching(pattern) + Dir[pattern].each do |fn| + self << fn unless exclude?(fn) + end + end + private :add_matching + + # Should the given file name be excluded? + def exclude?(fn) + calculate_exclude_regexp unless @exclude_re + fn =~ @exclude_re || @exclude_procs.any? { |p| p.call(fn) } + end + + DEFAULT_IGNORE_PATTERNS = [ + /(^|[\/\\])CVS([\/\\]|$)/, + /(^|[\/\\])\.svn([\/\\]|$)/, + /\.bak$/, + /~$/ + ] + DEFAULT_IGNORE_PROCS = [ + proc { |fn| fn =~ /(^|[\/\\])core$/ && ! File.directory?(fn) } + ] +# @exclude_patterns = DEFAULT_IGNORE_PATTERNS.dup + + def import(array) + @items = array + self + end + + class << self + # Create a new file list including the files listed. Similar to: + # + # FileList.new(*args) + def [](*args) + new(*args) + end + end + end # FileList +end + +module Rake + class << self + + # Yield each file or directory component. + def each_dir_parent(dir) # :nodoc: + old_length = nil + while dir != '.' && dir.length != old_length + yield(dir) + old_length = dir.length + dir = File.dirname(dir) + end + end + end +end # module Rake + +# Alias FileList to be available at the top level. +FileList = Rake::FileList + +# ########################################################################### +module Rake + + # Default Rakefile loader used by +import+. + class DefaultLoader + def load(fn) + Kernel.load(File.expand_path(fn)) + end + end + + # EarlyTime is a fake timestamp that occurs _before_ any other time value. + class EarlyTime + include Comparable + include Singleton + + def <=>(other) + -1 + end + + def to_s + "" + end + end + + EARLY = EarlyTime.instance +end # module Rake + +# ########################################################################### +# Extensions to time to allow comparisons with an early time class. +# +class Time + alias rake_original_time_compare :<=> + def <=>(other) + if Rake::EarlyTime === other + - other.<=>(self) + else + rake_original_time_compare(other) + end + end +end # class Time + +module Rake + + #################################################################### + # The NameSpace class will lookup task names in the the scope + # defined by a +namespace+ command. + # + class NameSpace + + # Create a namespace lookup object using the given task manager + # and the list of scopes. + def initialize(task_manager, scope_list) + @task_manager = task_manager + @scope = scope_list.dup + end + + # Lookup a task named +name+ in the namespace. + def [](name) + @task_manager.lookup(name, @scope) + end + + # Return the list of tasks defined in this namespace. + def tasks + @task_manager.tasks + end + end # NameSpace + + + #################################################################### + # The TaskManager module is a mixin for managing tasks. + module TaskManager + # Track the last comment made in the Rakefile. + attr_accessor :last_description + alias :last_comment :last_description # Backwards compatibility + + def initialize + super + @tasks = Hash.new + @rules = Array.new + @scope = Array.new + @last_description = nil + end + + def create_rule(*args, &block) + pattern, arg_names, deps = resolve_args(args) + pattern = Regexp.new(Regexp.quote(pattern) + '$') if String === pattern + @rules << [pattern, deps, block] + end + + def define_task(task_class, *args, &block) + task_name, arg_names, deps = resolve_args(args) + task_name = task_class.scope_name(@scope, task_name) + deps = [deps] unless deps.respond_to?(:to_ary) + deps = deps.collect {|d| d.to_s } + task = intern(task_class, task_name) + task.set_arg_names(arg_names) unless arg_names.empty? + task.add_description(@last_description) + @last_description = nil + task.enhance(deps, &block) + task + end + + # Lookup a task. Return an existing task if found, otherwise + # create a task of the current type. + def intern(task_class, task_name) + @tasks[task_name.to_s] ||= task_class.new(task_name, self) + end + + # Find a matching task for +task_name+. + def [](task_name, scopes=nil) + task_name = task_name.to_s + self.lookup(task_name, scopes) or + enhance_with_matching_rule(task_name) or + synthesize_file_task(task_name) or + fail "Don't know how to build task '#{task_name}'" + end + + def synthesize_file_task(task_name) + return nil unless File.exist?(task_name) + define_task(Rake::FileTask, task_name) + end + + # Resolve the arguments for a task/rule. Returns a triplet of + # [task_name, arg_name_list, prerequisites]. + def resolve_args(args) + if args.last.is_a?(Hash) + deps = args.pop + resolve_args_with_dependencies(args, deps) + else + resolve_args_without_dependencies(args) + end + end + + # Resolve task arguments for a task or rule when there are no + # dependencies declared. + # + # The patterns recognized by this argument resolving function are: + # + # task :t + # task :t, [:a] + # task :t, :a (deprecated) + # + def resolve_args_without_dependencies(args) + task_name = args.shift + if args.size == 1 && args.first.respond_to?(:to_ary) + arg_names = args.first.to_ary + else + arg_names = args + end + [task_name, arg_names, []] + end + private :resolve_args_without_dependencies + + # Resolve task arguments for a task or rule when there are + # dependencies declared. + # + # The patterns recognized by this argument resolving function are: + # + # task :t => [:d] + # task :t, [a] => [:d] + # task :t, :needs => [:d] (deprecated) + # task :t, :a, :needs => [:d] (deprecated) + # + def resolve_args_with_dependencies(args, hash) # :nodoc: + fail "Task Argument Error" if hash.size != 1 + key, value = hash.map { |k, v| [k,v] }.first + if args.empty? + task_name = key + arg_names = [] + deps = value + elsif key == :needs + task_name = args.shift + arg_names = args + deps = value + else + task_name = args.shift + arg_names = key + deps = value + end + deps = [deps] unless deps.respond_to?(:to_ary) + [task_name, arg_names, deps] + end + private :resolve_args_with_dependencies + + # If a rule can be found that matches the task name, enhance the + # task with the prerequisites and actions from the rule. Set the + # source attribute of the task appropriately for the rule. Return + # the enhanced task or nil of no rule was found. + def enhance_with_matching_rule(task_name, level=0) + fail Rake::RuleRecursionOverflowError, + "Rule Recursion Too Deep" if level >= 16 + @rules.each do |pattern, extensions, block| + if md = pattern.match(task_name) + task = attempt_rule(task_name, extensions, block, level) + return task if task + end + end + nil + rescue Rake::RuleRecursionOverflowError => ex + ex.add_target(task_name) + fail ex + end + + # List of all defined tasks in this application. + def tasks + @tasks.values.sort_by { |t| t.name } + end + + # Clear all tasks in this application. + def clear + @tasks.clear + @rules.clear + end + + # Lookup a task, using scope and the scope hints in the task name. + # This method performs straight lookups without trying to + # synthesize file tasks or rules. Special scope names (e.g. '^') + # are recognized. If no scope argument is supplied, use the + # current scope. Return nil if the task cannot be found. + def lookup(task_name, initial_scope=nil) + initial_scope ||= @scope + task_name = task_name.to_s + if task_name =~ /^rake:/ + scopes = [] + task_name = task_name.sub(/^rake:/, '') + elsif task_name =~ /^(\^+)/ + scopes = initial_scope[0, initial_scope.size - $1.size] + task_name = task_name.sub(/^(\^+)/, '') + else + scopes = initial_scope + end + lookup_in_scope(task_name, scopes) + end + + # Lookup the task name + def lookup_in_scope(name, scope) + n = scope.size + while n >= 0 + tn = (scope[0,n] + [name]).join(':') + task = @tasks[tn] + return task if task + n -= 1 + end + nil + end + private :lookup_in_scope + + # Return the list of scope names currently active in the task + # manager. + def current_scope + @scope.dup + end + + # Evaluate the block in a nested namespace named +name+. Create + # an anonymous namespace if +name+ is nil. + def in_namespace(name) + name ||= generate_name + @scope.push(name) + ns = NameSpace.new(self, @scope) + yield(ns) + ns + ensure + @scope.pop + end + + private + + # Generate an anonymous namespace name. + def generate_name + @seed ||= 0 + @seed += 1 + "_anon_#{@seed}" + end + + def trace_rule(level, message) + puts "#{" "*level}#{message}" if Rake.application.options.trace_rules + end + + # Attempt to create a rule given the list of prerequisites. + def attempt_rule(task_name, extensions, block, level) + sources = make_sources(task_name, extensions) + prereqs = sources.collect { |source| + trace_rule level, "Attempting Rule #{task_name} => #{source}" + if File.exist?(source) || Rake::Task.task_defined?(source) + trace_rule level, "(#{task_name} => #{source} ... EXIST)" + source + elsif parent = enhance_with_matching_rule(source, level+1) + trace_rule level, "(#{task_name} => #{source} ... ENHANCE)" + parent.name + else + trace_rule level, "(#{task_name} => #{source} ... FAIL)" + return nil + end + } + task = FileTask.define_task({task_name => prereqs}, &block) + task.sources = prereqs + task + end + + # Make a list of sources from the list of file name extensions / + # translation procs. + def make_sources(task_name, extensions) + extensions.collect { |ext| + case ext + when /%/ + task_name.pathmap(ext) + when %r{/} + ext + when /^\./ + task_name.ext(ext) + when String + ext + when Proc + if ext.arity == 1 + ext.call(task_name) + else + ext.call + end + else + fail "Don't know how to handle rule dependent: #{ext.inspect}" + end + }.flatten + end + + end # TaskManager + + ###################################################################### + # Rake main application object. When invoking +rake+ from the + # command line, a Rake::Application object is created and run. + # + class Application + include TaskManager + + # The name of the application (typically 'rake') + attr_reader :name + + # The original directory where rake was invoked. + attr_reader :original_dir + + # Name of the actual rakefile used. + attr_reader :rakefile + + # List of the top level task names (task names from the command line). + attr_reader :top_level_tasks + + DEFAULT_RAKEFILES = ['rakefile', 'Rakefile', 'rakefile.rb', 'Rakefile.rb'].freeze + + # Initialize a Rake::Application object. + def initialize + super + @name = 'rake' + @rakefiles = DEFAULT_RAKEFILES.dup + @rakefile = nil + @pending_imports = [] + @imported = [] + @loaders = {} + @default_loader = Rake::DefaultLoader.new + @original_dir = Dir.pwd + @top_level_tasks = [] + add_loader('rb', DefaultLoader.new) + add_loader('rf', DefaultLoader.new) + add_loader('rake', DefaultLoader.new) + @tty_output = STDOUT.tty? + end + + # Run the Rake application. The run method performs the following three steps: + # + # * Initialize the command line options (+init+). + # * Define the tasks (+load_rakefile+). + # * Run the top level tasks (+run_tasks+). + # + # If you wish to build a custom rake command, you should call +init+ on your + # application. The define any tasks. Finally, call +top_level+ to run your top + # level tasks. + def run + standard_exception_handling do + init + load_rakefile + top_level + end + end + + # Initialize the command line parameters and app name. + def init(app_name='rake') + standard_exception_handling do + @name = app_name + collect_tasks handle_options + end + end + + # Find the rakefile and then load it and any pending imports. + def load_rakefile + standard_exception_handling do + raw_load_rakefile + end + end + + # Run the top level tasks of a Rake application. + def top_level + standard_exception_handling do + if options.show_tasks + display_tasks_and_comments + elsif options.show_prereqs + display_prerequisites + else + top_level_tasks.each { |task_name| invoke_task(task_name) } + end + end + end + + # Add a loader to handle imported files ending in the extension + # +ext+. + def add_loader(ext, loader) + ext = ".#{ext}" unless ext =~ /^\./ + @loaders[ext] = loader + end + + # Application options from the command line + def options + @options ||= OpenStruct.new + end + + # private ---------------------------------------------------------------- + + def invoke_task(task_string) + name, args = parse_task_string(task_string) + t = self[name] + t.invoke(*args) + end + + def parse_task_string(string) + if string =~ /^([^\[]+)(\[(.*)\])$/ + name = $1 + args = $3.split(/\s*,\s*/) + else + name = string + args = [] + end + [name, args] + end + + # Provide standard execption handling for the given block. + def standard_exception_handling + begin + yield + rescue SystemExit => ex + # Exit silently with current status + raise + rescue OptionParser::InvalidOption => ex + # Exit silently + exit(false) + rescue Exception => ex + # Exit with error message + $stderr.puts "rake aborted!" + $stderr.puts ex.message + if options.trace + $stderr.puts ex.backtrace.join("\n") + else + $stderr.puts ex.backtrace.find {|str| str =~ /#{@rakefile}/ } || "" + $stderr.puts "(See full trace by running task with --trace)" + end + exit(false) + end + end + + # True if one of the files in RAKEFILES is in the current directory. + # If a match is found, it is copied into @rakefile. + def have_rakefile + @rakefiles.each do |fn| + if File.exist?(fn) || fn == '' + return fn + end + end + return nil + end + + # True if we are outputting to TTY, false otherwise + def tty_output? + @tty_output + end + + # Override the detected TTY output state (mostly for testing) + def tty_output=( tty_output_state ) + @tty_output = tty_output_state + end + + # We will truncate output if we are outputting to a TTY or if we've been + # given an explicit column width to honor + def truncate_output? + tty_output? || ENV['RAKE_COLUMNS'] + end + + # Display the tasks and dependencies. + def display_tasks_and_comments + displayable_tasks = tasks.select { |t| + t.comment && t.name =~ options.show_task_pattern + } + if options.full_description + displayable_tasks.each do |t| + puts "rake #{t.name_with_args}" + t.full_comment.split("\n").each do |line| + puts " #{line}" + end + puts + end + else + width = displayable_tasks.collect { |t| t.name_with_args.length }.max || 10 + max_column = truncate_output? ? terminal_width - name.size - width - 7 : nil + displayable_tasks.each do |t| + printf "#{name} %-#{width}s # %s\n", + t.name_with_args, max_column ? truncate(t.comment, max_column) : t.comment + end + end + end + + def terminal_width + if ENV['RAKE_COLUMNS'] + result = ENV['RAKE_COLUMNS'].to_i + else + result = unix? ? dynamic_width : 80 + end + (result < 10) ? 80 : result + rescue + 80 + end + + # Calculate the dynamic width of the + def dynamic_width + @dynamic_width ||= (dynamic_width_stty.nonzero? || dynamic_width_tput) + end + + def dynamic_width_stty + %x{stty size 2>/dev/null}.split[1].to_i + end + + def dynamic_width_tput + %x{tput cols 2>/dev/null}.to_i + end + + def unix? + RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris|irix|hpux)/i + end + + def windows? + Win32.windows? + end + + def truncate(string, width) + if string.length <= width + string + else + ( string[0, width-3] || "" ) + "..." + end + end + + # Display the tasks and prerequisites + def display_prerequisites + tasks.each do |t| + puts "rake #{t.name}" + t.prerequisites.each { |pre| puts " #{pre}" } + end + end + + # A list of all the standard options used in rake, suitable for + # passing to OptionParser. + def standard_rake_options + [ + ['--classic-namespace', '-C', "Put Task and FileTask in the top level namespace", + lambda { |value| + require 'rake/classic_namespace' + options.classic_namespace = true + } + ], + ['--describe', '-D [PATTERN]', "Describe the tasks (matching optional PATTERN), then exit.", + lambda { |value| + options.show_tasks = true + options.full_description = true + options.show_task_pattern = Regexp.new(value || '') + } + ], + ['--dry-run', '-n', "Do a dry run without executing actions.", + lambda { |value| + verbose(true) + nowrite(true) + options.dryrun = true + options.trace = true + } + ], + ['--execute', '-e CODE', "Execute some Ruby code and exit.", + lambda { |value| + eval(value) + exit + } + ], + ['--execute-print', '-p CODE', "Execute some Ruby code, print the result, then exit.", + lambda { |value| + puts eval(value) + exit + } + ], + ['--execute-continue', '-E CODE', + "Execute some Ruby code, then continue with normal task processing.", + lambda { |value| eval(value) } + ], + ['--libdir', '-I LIBDIR', "Include LIBDIR in the search path for required modules.", + lambda { |value| $:.push(value) } + ], + ['--prereqs', '-P', "Display the tasks and dependencies, then exit.", + lambda { |value| options.show_prereqs = true } + ], + ['--quiet', '-q', "Do not log messages to standard output.", + lambda { |value| verbose(false) } + ], + ['--rakefile', '-f [FILE]', "Use FILE as the rakefile.", + lambda { |value| + value ||= '' + @rakefiles.clear + @rakefiles << value + } + ], + ['--rakelibdir', '--rakelib', '-R RAKELIBDIR', + "Auto-import any .rake files in RAKELIBDIR. (default is 'rakelib')", + lambda { |value| options.rakelib = value.split(':') } + ], + ['--require', '-r MODULE', "Require MODULE before executing rakefile.", + lambda { |value| + begin + require value + rescue LoadError => ex + begin + rake_require value + rescue LoadError => ex2 + raise ex + end + end + } + ], + ['--rules', "Trace the rules resolution.", + lambda { |value| options.trace_rules = true } + ], + ['--no-search', '--nosearch', '-N', "Do not search parent directories for the Rakefile.", + lambda { |value| options.nosearch = true } + ], + ['--silent', '-s', "Like --quiet, but also suppresses the 'in directory' announcement.", + lambda { |value| + verbose(false) + options.silent = true + } + ], + ['--system', '-g', + "Using system wide (global) rakefiles (usually '~/.rake/*.rake').", + lambda { |value| options.load_system = true } + ], + ['--no-system', '--nosystem', '-G', + "Use standard project Rakefile search paths, ignore system wide rakefiles.", + lambda { |value| options.ignore_system = true } + ], + ['--tasks', '-T [PATTERN]', "Display the tasks (matching optional PATTERN) with descriptions, then exit.", + lambda { |value| + options.show_tasks = true + options.show_task_pattern = Regexp.new(value || '') + options.full_description = false + } + ], + ['--trace', '-t', "Turn on invoke/execute tracing, enable full backtrace.", + lambda { |value| + options.trace = true + verbose(true) + } + ], + ['--verbose', '-v', "Log message to standard output (default).", + lambda { |value| verbose(true) } + ], + ['--version', '-V', "Display the program version.", + lambda { |value| + puts "rake, version #{RAKEVERSION}" + exit + } + ] + ] + end + + # Read and handle the command line options. + def handle_options + options.rakelib = ['rakelib'] + + opts = OptionParser.new + opts.banner = "rake [-f rakefile] {options} targets..." + opts.separator "" + opts.separator "Options are ..." + + opts.on_tail("-h", "--help", "-H", "Display this help message.") do + puts opts + exit + end + + standard_rake_options.each { |args| opts.on(*args) } + parsed_argv = opts.parse(ARGV) + + # If class namespaces are requested, set the global options + # according to the values in the options structure. + if options.classic_namespace + $show_tasks = options.show_tasks + $show_prereqs = options.show_prereqs + $trace = options.trace + $dryrun = options.dryrun + $silent = options.silent + end + parsed_argv + end + + # Similar to the regular Ruby +require+ command, but will check + # for *.rake files in addition to *.rb files. + def rake_require(file_name, paths=$LOAD_PATH, loaded=$") + return false if loaded.include?(file_name) + paths.each do |path| + fn = file_name + ".rake" + full_path = File.join(path, fn) + if File.exist?(full_path) + load full_path + loaded << fn + return true + end + end + fail LoadError, "Can't find #{file_name}" + end + + def find_rakefile_location + here = Dir.pwd + while ! (fn = have_rakefile) + Dir.chdir("..") + if Dir.pwd == here || options.nosearch + return nil + end + here = Dir.pwd + end + [fn, here] + ensure + Dir.chdir(Rake.original_dir) + end + + def raw_load_rakefile # :nodoc: + rakefile, location = find_rakefile_location + if (! options.ignore_system) && + (options.load_system || rakefile.nil?) && + system_dir && File.directory?(system_dir) + puts "(in #{Dir.pwd})" unless options.silent + glob("#{system_dir}/*.rake") do |name| + add_import name + end + else + fail "No Rakefile found (looking for: #{@rakefiles.join(', ')})" if + rakefile.nil? + @rakefile = rakefile + Dir.chdir(location) + puts "(in #{Dir.pwd})" unless options.silent + $rakefile = @rakefile if options.classic_namespace + load File.expand_path(@rakefile) if @rakefile && @rakefile != '' + options.rakelib.each do |rlib| + glob("#{rlib}/*.rake") do |name| + add_import name + end + end + end + load_imports + end + + def glob(path, &block) + Dir[path.gsub("\\", '/')].each(&block) + end + private :glob + + # The directory path containing the system wide rakefiles. + def system_dir + @system_dir ||= + begin + if ENV['RAKE_SYSTEM'] + ENV['RAKE_SYSTEM'] + else + standard_system_dir + end + end + end + + # The standard directory containing system wide rake files. + if Win32.windows? + def standard_system_dir #:nodoc: + Win32.win32_system_dir + end + else + def standard_system_dir #:nodoc: + File.expand_path('.rake', '~') + end + end + private :standard_system_dir + + # Collect the list of tasks on the command line. If no tasks are + # given, return a list containing only the default task. + # Environmental assignments are processed at this time as well. + def collect_tasks(argv) + @top_level_tasks = [] + argv.each do |arg| + if arg =~ /^(\w+)=(.*)$/ + ENV[$1] = $2 + else + @top_level_tasks << arg unless arg =~ /^-/ + end + end + @top_level_tasks.push("default") if @top_level_tasks.size == 0 + end + + # Add a file to the list of files to be imported. + def add_import(fn) + @pending_imports << fn + end + + # Load the pending list of imported files. + def load_imports + while fn = @pending_imports.shift + next if @imported.member?(fn) + if fn_task = lookup(fn) + fn_task.invoke + end + ext = File.extname(fn) + loader = @loaders[ext] || @default_loader + loader.load(fn) + @imported << fn + end + end + + # Warn about deprecated use of top level constant names. + def const_warning(const_name) + @const_warning ||= false + if ! @const_warning + $stderr.puts %{WARNING: Deprecated reference to top-level constant '#{const_name}' } + + %{found at: #{rakefile_location}} # ' + $stderr.puts %{ Use --classic-namespace on rake command} + $stderr.puts %{ or 'require "rake/classic_namespace"' in Rakefile} + end + @const_warning = true + end + + def rakefile_location + begin + fail + rescue RuntimeError => ex + ex.backtrace.find {|str| str =~ /#{@rakefile}/ } || "" + end + end + end +end + + +class Module + # Rename the original handler to make it available. + alias :rake_original_const_missing :const_missing + + # Check for deprecated uses of top level (i.e. in Object) uses of + # Rake class names. If someone tries to reference the constant + # name, display a warning and return the proper object. Using the + # --classic-namespace command line option will define these + # constants in Object and avoid this handler. + def const_missing(const_name) + case const_name + when :Task + Rake.application.const_warning(const_name) + Rake::Task + when :FileTask + Rake.application.const_warning(const_name) + Rake::FileTask + when :FileCreationTask + Rake.application.const_warning(const_name) + Rake::FileCreationTask + when :RakeApp + Rake.application.const_warning(const_name) + Rake::Application + else + rake_original_const_missing(const_name) + end + end +end diff --git a/lib/rake/classic_namespace.rb b/lib/rake/classic_namespace.rb new file mode 100644 index 0000000..feb7569 --- /dev/null +++ b/lib/rake/classic_namespace.rb @@ -0,0 +1,8 @@ +# The following classes used to be in the top level namespace. +# Loading this file enables compatibility with older Rakefile that +# referenced Task from the top level. + +Task = Rake::Task +FileTask = Rake::FileTask +FileCreationTask = Rake::FileCreationTask +RakeApp = Rake::Application diff --git a/lib/rake/clean.rb b/lib/rake/clean.rb new file mode 100644 index 0000000..4ee2c5a --- /dev/null +++ b/lib/rake/clean.rb @@ -0,0 +1,33 @@ +#!/usr/bin/env ruby + +# The 'rake/clean' file defines two file lists (CLEAN and CLOBBER) and +# two rake tasks (:clean and :clobber). +# +# [:clean] Clean up the project by deleting scratch files and backup +# files. Add files to the CLEAN file list to have the :clean +# target handle them. +# +# [:clobber] Clobber all generated and non-source files in a project. +# The task depends on :clean, so all the clean files will +# be deleted as well as files in the CLOBBER file list. +# The intent of this task is to return a project to its +# pristine, just unpacked state. + +require 'rake' + +CLEAN = Rake::FileList["**/*~", "**/*.bak", "**/core"] +CLEAN.clear_exclude.exclude { |fn| + fn.pathmap("%f") == 'core' && File.directory?(fn) +} + +desc "Remove any temporary products." +task :clean do + CLEAN.each { |fn| rm_r fn rescue nil } +end + +CLOBBER = Rake::FileList.new + +desc "Remove any generated file." +task :clobber => [:clean] do + CLOBBER.each { |fn| rm_r fn rescue nil } +end diff --git a/lib/rake/gempackagetask.rb b/lib/rake/gempackagetask.rb new file mode 100644 index 0000000..1e4632a --- /dev/null +++ b/lib/rake/gempackagetask.rb @@ -0,0 +1,97 @@ +#!/usr/bin/env ruby + +# Define a package task library to aid in the definition of GEM +# packages. + +require 'rubygems' +require 'rake' +require 'rake/packagetask' +require 'rubygems/user_interaction' +require 'rubygems/builder' + +module Rake + + # Create a package based upon a Gem spec. Gem packages, as well as + # zip files and tar/gzipped packages can be produced by this task. + # + # In addition to the Rake targets generated by PackageTask, a + # GemPackageTask will also generate the following tasks: + # + # ["package_dir/name-version.gem"] + # Create a Ruby GEM package with the given name and version. + # + # Example using a Ruby GEM spec: + # + # require 'rubygems' + # + # spec = Gem::Specification.new do |s| + # s.platform = Gem::Platform::RUBY + # s.summary = "Ruby based make-like utility." + # s.name = 'rake' + # s.version = PKG_VERSION + # s.requirements << 'none' + # s.require_path = 'lib' + # s.autorequire = 'rake' + # s.files = PKG_FILES + # s.description = < [:gem] + desc "Build the gem file #{gem_file}" + task :gem => ["#{package_dir}/#{gem_file}"] + file "#{package_dir}/#{gem_file}" => [package_dir] + @gem_spec.files do + when_writing("Creating GEM") { + Gem::Builder.new(gem_spec).build + verbose(true) { + mv gem_file, "#{package_dir}/#{gem_file}" + } + } + end + end + + def gem_file + if @gem_spec.platform == Gem::Platform::RUBY + "#{package_name}.gem" + else + "#{package_name}-#{@gem_spec.platform}.gem" + end + end + + end +end diff --git a/lib/rake/loaders/makefile.rb b/lib/rake/loaders/makefile.rb new file mode 100644 index 0000000..9ade098 --- /dev/null +++ b/lib/rake/loaders/makefile.rb @@ -0,0 +1,35 @@ +#!/usr/bin/env ruby + +module Rake + + # Makefile loader to be used with the import file loader. + class MakefileLoader + + # Load the makefile dependencies in +fn+. + def load(fn) + open(fn) do |mf| + lines = mf.read + lines.gsub!(/#[^\n]*\n/m, "") + lines.gsub!(/\\\n/, ' ') + lines.split("\n").each do |line| + process_line(line) + end + end + end + + private + + # Process one logical line of makefile data. + def process_line(line) + file_tasks, args = line.split(':') + return if args.nil? + dependents = args.split + file_tasks.strip.split.each do |file_task| + file file_task => dependents + end + end + end + + # Install the handler + Rake.application.add_loader('mf', MakefileLoader.new) +end diff --git a/lib/rake/packagetask.rb b/lib/rake/packagetask.rb new file mode 100644 index 0000000..6158eaf --- /dev/null +++ b/lib/rake/packagetask.rb @@ -0,0 +1,185 @@ +#!/usr/bin/env ruby + +# Define a package task libarary to aid in the definition of +# redistributable package files. + +require 'rake' +require 'rake/tasklib' + +module Rake + + # Create a packaging task that will package the project into + # distributable files (e.g zip archive or tar files). + # + # The PackageTask will create the following targets: + # + # [:package] + # Create all the requested package files. + # + # [:clobber_package] + # Delete all the package files. This target is automatically + # added to the main clobber target. + # + # [:repackage] + # Rebuild the package files from scratch, even if they are not out + # of date. + # + # ["package_dir/name-version.tgz"] + # Create a gzipped tar package (if need_tar is true). + # + # ["package_dir/name-version.tar.gz"] + # Create a gzipped tar package (if need_tar_gz is true). + # + # ["package_dir/name-version.tar.bz2"] + # Create a bzip2'd tar package (if need_tar_bz2 is true). + # + # ["package_dir/name-version.zip"] + # Create a zip package archive (if need_zip is true). + # + # Example: + # + # Rake::PackageTask.new("rake", "1.2.3") do |p| + # p.need_tar = true + # p.package_files.include("lib/**/*.rb") + # end + # + class PackageTask < TaskLib + # Name of the package. + attr_accessor :name + + # Version of the package (e.g. '1.3.2'). + attr_accessor :version + + # Directory used to store the package files (default is 'pkg'). + attr_accessor :package_dir + + # True if a gzipped tar file (tgz) should be produced (default is false). + attr_accessor :need_tar + + # True if a gzipped tar file (tar.gz) should be produced (default is false). + attr_accessor :need_tar_gz + + # True if a bzip2'd tar file (tar.bz2) should be produced (default is false). + attr_accessor :need_tar_bz2 + + # True if a zip file should be produced (default is false) + attr_accessor :need_zip + + # List of files to be included in the package. + attr_accessor :package_files + + # Tar command for gzipped or bzip2ed archives. The default is 'tar'. + attr_accessor :tar_command + + # Zip command for zipped archives. The default is 'zip'. + attr_accessor :zip_command + + # Create a Package Task with the given name and version. + def initialize(name=nil, version=nil) + init(name, version) + yield self if block_given? + define unless name.nil? + end + + # Initialization that bypasses the "yield self" and "define" step. + def init(name, version) + @name = name + @version = version + @package_files = Rake::FileList.new + @package_dir = 'pkg' + @need_tar = false + @need_tar_gz = false + @need_tar_bz2 = false + @need_zip = false + @tar_command = 'tar' + @zip_command = 'zip' + end + + # Create the tasks defined by this task library. + def define + fail "Version required (or :noversion)" if @version.nil? + @version = nil if :noversion == @version + + desc "Build all the packages" + task :package + + desc "Force a rebuild of the package files" + task :repackage => [:clobber_package, :package] + + desc "Remove package products" + task :clobber_package do + rm_r package_dir rescue nil + end + + task :clobber => [:clobber_package] + + [ + [need_tar, tgz_file, "z"], + [need_tar_gz, tar_gz_file, "z"], + [need_tar_bz2, tar_bz2_file, "j"] + ].each do |(need, file, flag)| + if need + task :package => ["#{package_dir}/#{file}"] + file "#{package_dir}/#{file}" => [package_dir_path] + package_files do + chdir(package_dir) do + sh %{env} + sh %{#{@tar_command} #{flag}cvf #{file} #{package_name}} + end + end + end + end + + if need_zip + task :package => ["#{package_dir}/#{zip_file}"] + file "#{package_dir}/#{zip_file}" => [package_dir_path] + package_files do + chdir(package_dir) do + sh %{#{@zip_command} -r #{zip_file} #{package_name}} + end + end + end + + directory package_dir + + file package_dir_path => @package_files do + mkdir_p package_dir rescue nil + @package_files.each do |fn| + f = File.join(package_dir_path, fn) + fdir = File.dirname(f) + mkdir_p(fdir) if !File.exist?(fdir) + if File.directory?(fn) + mkdir_p(f) + else + rm_f f + safe_ln(fn, f) + end + end + end + self + end + + def package_name + @version ? "#{@name}-#{@version}" : @name + end + + def package_dir_path + "#{package_dir}/#{package_name}" + end + + def tgz_file + "#{package_name}.tgz" + end + + def tar_gz_file + "#{package_name}.tar.gz" + end + + def tar_bz2_file + "#{package_name}.tar.bz2" + end + + def zip_file + "#{package_name}.zip" + end + end + +end diff --git a/lib/rake/rake_test_loader.rb b/lib/rake/rake_test_loader.rb new file mode 100644 index 0000000..8d7dad3 --- /dev/null +++ b/lib/rake/rake_test_loader.rb @@ -0,0 +1,5 @@ +#!/usr/bin/env ruby + +# Load the test files from the command line. + +ARGV.each { |f| load f unless f =~ /^-/ } diff --git a/lib/rake/rdoctask.rb b/lib/rake/rdoctask.rb new file mode 100644 index 0000000..6cfbda1 --- /dev/null +++ b/lib/rake/rdoctask.rb @@ -0,0 +1,147 @@ +#!/usr/bin/env ruby + +require 'rake' +require 'rake/tasklib' + +module Rake + + # Create a documentation task that will generate the RDoc files for + # a project. + # + # The RDocTask will create the following targets: + # + # [rdoc] + # Main task for this RDOC task. + # + # [:clobber_rdoc] + # Delete all the rdoc files. This target is automatically + # added to the main clobber target. + # + # [:rerdoc] + # Rebuild the rdoc files from scratch, even if they are not out + # of date. + # + # Simple Example: + # + # Rake::RDocTask.new do |rd| + # rd.main = "README.rdoc" + # rd.rdoc_files.include("README.rdoc", "lib/**/*.rb") + # end + # + # You may wish to give the task a different name, such as if you are + # generating two sets of documentation. For instance, if you want to have a + # development set of documentation including private methods: + # + # Rake::RDocTask.new(:rdoc_dev) do |rd| + # rd.main = "README.doc" + # rd.rdoc_files.include("README.rdoc", "lib/**/*.rb") + # rd.options << "--all" + # end + # + # The tasks would then be named :rdoc_dev, :clobber_rdoc_dev, and + # :rerdoc_dev. + # + class RDocTask < TaskLib + # Name of the main, top level task. (default is :rdoc) + attr_accessor :name + + # Name of directory to receive the html output files. (default is "html") + attr_accessor :rdoc_dir + + # Title of RDoc documentation. (default is none) + attr_accessor :title + + # Name of file to be used as the main, top level file of the + # RDoc. (default is none) + attr_accessor :main + + # Name of template to be used by rdoc. (defaults to rdoc's default) + attr_accessor :template + + # List of files to be included in the rdoc generation. (default is []) + attr_accessor :rdoc_files + + # List of options to be passed rdoc. (default is []) + attr_accessor :options + + # Run the rdoc process as an external shell (default is false) + attr_accessor :external + + # Create an RDoc task named rdoc. Default task name is +rdoc+. + def initialize(name=:rdoc) # :yield: self + @name = name + @rdoc_files = Rake::FileList.new + @rdoc_dir = 'html' + @main = nil + @title = nil + @template = nil + @external = false + @options = [] + yield self if block_given? + define + end + + # Create the tasks defined by this task lib. + def define + if name.to_s != "rdoc" + desc "Build the RDOC HTML Files" + end + + desc "Build the #{name} HTML Files" + task name + + desc "Force a rebuild of the RDOC files" + task "re#{name}" => ["clobber_#{name}", name] + + desc "Remove rdoc products" + task "clobber_#{name}" do + rm_r rdoc_dir rescue nil + end + + task :clobber => ["clobber_#{name}"] + + directory @rdoc_dir + task name => [rdoc_target] + file rdoc_target => @rdoc_files + [Rake.application.rakefile] do + rm_r @rdoc_dir rescue nil + args = option_list + @rdoc_files + if @external + argstring = args.join(' ') + sh %{ruby -Ivendor vender/rd #{argstring}} + else + require 'rdoc/rdoc' + RDoc::RDoc.new.document(args) + end + end + self + end + + def option_list + result = @options.dup + result << "-o" << @rdoc_dir + result << "--main" << quote(main) if main + result << "--title" << quote(title) if title + result << "-T" << quote(template) if template + result + end + + def quote(str) + if @external + "'#{str}'" + else + str + end + end + + def option_string + option_list.join(' ') + end + + private + + def rdoc_target + "#{rdoc_dir}/index.html" + end + + end +end diff --git a/lib/rake/runtest.rb b/lib/rake/runtest.rb new file mode 100644 index 0000000..3f1d205 --- /dev/null +++ b/lib/rake/runtest.rb @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + +require 'test/unit' +require 'test/unit/assertions' + +module Rake + include Test::Unit::Assertions + + def run_tests(pattern='test/test*.rb', log_enabled=false) + Dir["#{pattern}"].each { |fn| + puts fn if log_enabled + begin + load fn + rescue Exception => ex + puts "Error in #{fn}: #{ex.message}" + puts ex.backtrace + assert false + end + } + end + + extend self +end diff --git a/lib/rake/tasklib.rb b/lib/rake/tasklib.rb new file mode 100644 index 0000000..c7fd981 --- /dev/null +++ b/lib/rake/tasklib.rb @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + +require 'rake' + +module Rake + + # Base class for Task Libraries. + class TaskLib + include Cloneable + + # Make a symbol by pasting two strings together. + # + # NOTE: DEPRECATED! This method is kinda stupid. I don't know why + # I didn't just use string interpolation. But now other task + # libraries depend on this so I can't remove it without breaking + # other people's code. So for now it stays for backwards + # compatibility. BUT DON'T USE IT. + def paste(a,b) # :nodoc: + (a.to_s + b.to_s).intern + end + end + +end diff --git a/lib/rake/testtask.rb b/lib/rake/testtask.rb new file mode 100644 index 0000000..79154e4 --- /dev/null +++ b/lib/rake/testtask.rb @@ -0,0 +1,161 @@ +#!/usr/bin/env ruby + +# Define a task library for running unit tests. + +require 'rake' +require 'rake/tasklib' + +module Rake + + # Create a task that runs a set of tests. + # + # Example: + # + # Rake::TestTask.new do |t| + # t.libs << "test" + # t.test_files = FileList['test/test*.rb'] + # t.verbose = true + # end + # + # If rake is invoked with a "TEST=filename" command line option, + # then the list of test files will be overridden to include only the + # filename specified on the command line. This provides an easy way + # to run just one test. + # + # If rake is invoked with a "TESTOPTS=options" command line option, + # then the given options are passed to the test process after a + # '--'. This allows Test::Unit options to be passed to the test + # suite. + # + # Examples: + # + # rake test # run tests normally + # rake test TEST=just_one_file.rb # run just one test file. + # rake test TESTOPTS="-v" # run in verbose mode + # rake test TESTOPTS="--runner=fox" # use the fox test runner + # + class TestTask < TaskLib + + # Name of test task. (default is :test) + attr_accessor :name + + # List of directories to added to $LOAD_PATH before running the + # tests. (default is 'lib') + attr_accessor :libs + + # True if verbose test output desired. (default is false) + attr_accessor :verbose + + # Test options passed to the test suite. An explicit + # TESTOPTS=opts on the command line will override this. (default + # is NONE) + attr_accessor :options + + # Request that the tests be run with the warning flag set. + # E.g. warning=true implies "ruby -w" used to run the tests. + attr_accessor :warning + + # Glob pattern to match test files. (default is 'test/test*.rb') + attr_accessor :pattern + + # Style of test loader to use. Options are: + # + # * :rake -- Rake provided test loading script (default). + # * :testrb -- Ruby provided test loading script. + # * :direct -- Load tests using command line loader. + # + attr_accessor :loader + + # Array of commandline options to pass to ruby when running test loader. + attr_accessor :ruby_opts + + # Explicitly define the list of test files to be included in a + # test. +list+ is expected to be an array of file names (a + # FileList is acceptable). If both +pattern+ and +test_files+ are + # used, then the list of test files is the union of the two. + def test_files=(list) + @test_files = list + end + + # Create a testing task. + def initialize(name=:test) + @name = name + @libs = ["lib"] + @pattern = nil + @options = nil + @test_files = nil + @verbose = false + @warning = false + @loader = :rake + @ruby_opts = [] + yield self if block_given? + @pattern = 'test/test*.rb' if @pattern.nil? && @test_files.nil? + define + end + + # Create the tasks defined by this task lib. + def define + lib_path = @libs.join(File::PATH_SEPARATOR) + desc "Run tests" + (@name==:test ? "" : " for #{@name}") + task @name do + run_code = '' + RakeFileUtils.verbose(@verbose) do + run_code = + case @loader + when :direct + "-e 'ARGV.each{|f| load f}'" + when :testrb + "-S testrb #{fix}" + when :rake + rake_loader + end + @ruby_opts.unshift( "-I#{lib_path}" ) + @ruby_opts.unshift( "-w" ) if @warning + ruby @ruby_opts.join(" ") + + " \"#{run_code}\" " + + file_list.collect { |fn| "\"#{fn}\"" }.join(' ') + + " #{option_list}" + end + end + self + end + + def option_list # :nodoc: + ENV['TESTOPTS'] || @options || "" + end + + def file_list # :nodoc: + if ENV['TEST'] + FileList[ ENV['TEST'] ] + else + result = [] + result += @test_files.to_a if @test_files + result += FileList[ @pattern ].to_a if @pattern + FileList[result] + end + end + + def fix # :nodoc: + case RUBY_VERSION + when '1.8.2' + find_file 'rake/ruby182_test_unit_fix' + else + nil + end || '' + end + + def rake_loader # :nodoc: + find_file('rake/rake_test_loader') or + fail "unable to find rake test loader" + end + + def find_file(fn) # :nodoc: + $LOAD_PATH.each do |path| + file_path = File.join(path, "#{fn}.rb") + return file_path if File.exist? file_path + end + nil + end + + end +end diff --git a/lib/rake/win32.rb b/lib/rake/win32.rb new file mode 100644 index 0000000..96f66d6 --- /dev/null +++ b/lib/rake/win32.rb @@ -0,0 +1,34 @@ +module Rake + + # Win 32 interface methods for Rake. Windows specific functionality + # will be placed here to collect that knowledge in one spot. + module Win32 + class << self + # True if running on a windows system. + def windows? + # assume other DOSish systems are extinct. + File::ALT_SEPARATOR == '\\' + end + end + + class << self + # The standard directory containing system wide rake files on + # Win 32 systems. Try the following environment variables (in + # order): + # + # * APPDATA + # * HOME + # * HOMEDRIVE + HOMEPATH + # * USERPROFILE + # + # If the above are not defined, retruns the personal folder. + def win32_system_dir #:nodoc: + win32_shared_path = ENV['APPDATA'] + if !win32_shared_path or win32_shared_path.empty? + win32_shared_path = '~' + end + File.expand_path('Rake', win32_shared_path) + end + end if windows? + end +end diff --git a/lib/rational.rb b/lib/rational.rb new file mode 100644 index 0000000..8bed856 --- /dev/null +++ b/lib/rational.rb @@ -0,0 +1,19 @@ +class Fixnum + + alias quof fdiv + alias rdiv quo + + alias power! ** unless method_defined? :power! + alias rpower ** + +end + +class Bignum + + alias quof fdiv + alias rdiv quo + + alias power! ** unless method_defined? :power! + alias rpower ** + +end diff --git a/lib/rbconfig/datadir.rb b/lib/rbconfig/datadir.rb new file mode 100644 index 0000000..5b8f077 --- /dev/null +++ b/lib/rbconfig/datadir.rb @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby +#-- +# Copyright 2006 by Chad Fowler, Rich Kilmer, Jim Weirich and others. +# All rights reserved. +# See LICENSE.txt for permissions. +#++ + + +module Config + + # Only define datadir if it doesn't already exist. + unless Config.respond_to?(:datadir) + + # Return the path to the data directory associated with the given + # package name. Normally this is just + # "#{Config::CONFIG['datadir']}/#{package_name}", but may be + # modified by packages like RubyGems to handle versioned data + # directories. + def Config.datadir(package_name) + File.join(CONFIG['datadir'], package_name) + end + + end +end diff --git a/lib/rdoc.rb b/lib/rdoc.rb new file mode 100644 index 0000000..f4fc386 --- /dev/null +++ b/lib/rdoc.rb @@ -0,0 +1,395 @@ +$DEBUG_RDOC = nil + +## +# = \RDoc - Ruby Documentation System +# +# This package contains RDoc and RDoc::Markup. RDoc is an application that +# produces documentation for one or more Ruby source files. It works similarly +# to JavaDoc, parsing the source, and extracting the definition for classes, +# modules, and methods (along with includes and requires). It associates with +# these optional documentation contained in the immediately preceding comment +# block, and then renders the result using a pluggable output formatter. +# RDoc::Markup is a library that converts plain text into various output +# formats. The markup library is used to interpret the comment blocks that +# RDoc uses to document methods, classes, and so on. +# +# == Roadmap +# +# * If you want to use RDoc to create documentation for your Ruby source files, +# read on. +# * If you want to include extensions written in C, see RDoc::Parser::C +# * If you want to drive RDoc programmatically, see RDoc::RDoc. +# * If you want to use the library to format text blocks into HTML, have a look +# at RDoc::Markup. +# * If you want to try writing your own HTML output template, see +# RDoc::Generator::HTML +# +# == Summary +# +# Once installed, you can create documentation using the +rdoc+ command +# +# % rdoc [options] [names...] +# +# For an up-to-date option summary, type +# % rdoc --help +# +# A typical use might be to generate documentation for a package of Ruby +# source (such as RDoc itself). +# +# % rdoc +# +# This command generates documentation for all the Ruby and C source +# files in and below the current directory. These will be stored in a +# documentation tree starting in the subdirectory +doc+. +# +# You can make this slightly more useful for your readers by having the +# index page contain the documentation for the primary file. In our +# case, we could type +# +# % rdoc --main rdoc.rb +# +# You'll find information on the various formatting tricks you can use +# in comment blocks in the documentation this generates. +# +# RDoc uses file extensions to determine how to process each file. File names +# ending +.rb+ and +.rbw+ are assumed to be Ruby source. Files +# ending +.c+ are parsed as C files. All other files are assumed to +# contain just Markup-style markup (with or without leading '#' comment +# markers). If directory names are passed to RDoc, they are scanned +# recursively for C and Ruby source files only. +# +# == \Options +# rdoc can be passed a variety of command-line options. In addition, +# options can be specified via the +RDOCOPT+ environment variable, which +# functions similarly to the +RUBYOPT+ environment variable. +# +# % export RDOCOPT="-S" +# +# will make rdoc default to inline method source code. Command-line options +# always will override those in +RDOCOPT+. +# +# Run +# +# % rdoc --help +# +# for full details on rdoc's options. +# +# Here are some of the most commonly used options. +# [-d, --diagram] +# Generate diagrams showing modules and +# classes. You need dot V1.8.6 or later to +# use the --diagram option correctly. Dot is +# available from http://graphviz.org +# +# [-S, --inline-source] +# Show method source code inline, rather than via a popup link. +# +# [-T, --template=NAME] +# Set the template used when generating output. +# +# == Documenting Source Code +# +# Comment blocks can be written fairly naturally, either using +#+ on +# successive lines of the comment, or by including the comment in +# a =begin/=end block. If you use the latter form, the =begin line must be +# flagged with an RDoc tag: +# +# =begin rdoc +# Documentation to be processed by RDoc. +# +# ... +# =end +# +# RDoc stops processing comments if it finds a comment line containing +# a --. This can be used to separate external from internal +# comments, or to stop a comment being associated with a method, class, or +# module. Commenting can be turned back on with a line that starts with a +# ++. +# +# ## +# # Extract the age and calculate the date-of-birth. +# #-- +# # FIXME: fails if the birthday falls on February 29th +# #++ +# # The DOB is returned as a Time object. +# +# def get_dob(person) +# # ... +# end +# +# Names of classes, files, and any method names containing an +# underscore or preceded by a hash character are automatically hyperlinked +# from comment text to their description. +# +# Method parameter lists are extracted and displayed with the method +# description. If a method calls +yield+, then the parameters passed to yield +# will also be displayed: +# +# def fred +# ... +# yield line, address +# +# This will get documented as: +# +# fred() { |line, address| ... } +# +# You can override this using a comment containing ':yields: ...' immediately +# after the method definition +# +# def fred # :yields: index, position +# # ... +# +# yield line, address +# +# which will get documented as +# +# fred() { |index, position| ... } +# +# +:yields:+ is an example of a documentation directive. These appear +# immediately after the start of the document element they are modifying. +# +# == \Markup +# +# * The markup engine looks for a document's natural left margin. This is +# used as the initial margin for the document. +# +# * Consecutive lines starting at this margin are considered to be a +# paragraph. +# +# * If a paragraph starts with a "*", "-", or with ".", then it is +# taken to be the start of a list. The margin in increased to be the first +# non-space following the list start flag. Subsequent lines should be +# indented to this new margin until the list ends. For example: +# +# * this is a list with three paragraphs in +# the first item. This is the first paragraph. +# +# And this is the second paragraph. +# +# 1. This is an indented, numbered list. +# 2. This is the second item in that list +# +# This is the third conventional paragraph in the +# first list item. +# +# * This is the second item in the original list +# +# * You can also construct labeled lists, sometimes called description +# or definition lists. Do this by putting the label in square brackets +# and indenting the list body: +# +# [cat] a small furry mammal +# that seems to sleep a lot +# +# [ant] a little insect that is known +# to enjoy picnics +# +# A minor variation on labeled lists uses two colons to separate the +# label from the list body: +# +# cat:: a small furry mammal +# that seems to sleep a lot +# +# ant:: a little insect that is known +# to enjoy picnics +# +# This latter style guarantees that the list bodies' left margins are +# aligned: think of them as a two column table. +# +# * Any line that starts to the right of the current margin is treated +# as verbatim text. This is useful for code listings. The example of a +# list above is also verbatim text. +# +# * A line starting with an equals sign (=) is treated as a +# heading. Level one headings have one equals sign, level two headings +# have two,and so on. +# +# * A line starting with three or more hyphens (at the current indent) +# generates a horizontal rule. The more hyphens, the thicker the rule +# (within reason, and if supported by the output device) +# +# * You can use markup within text (except verbatim) to change the +# appearance of parts of that text. Out of the box, RDoc::Markup +# supports word-based and general markup. +# +# Word-based markup uses flag characters around individual words: +# +# [\*word*] displays word in a *bold* font +# [\_word_] displays word in an _emphasized_ font +# [\+word+] displays word in a +code+ font +# +# General markup affects text between a start delimiter and and end +# delimiter. Not surprisingly, these delimiters look like HTML markup. +# +# [\text...] displays word in a *bold* font +# [\text...] displays word in an _emphasized_ font +# [\\text...] displays word in an italicized font +# [\text...] displays word in a +code+ font +# +# Unlike conventional Wiki markup, general markup can cross line +# boundaries. You can turn off the interpretation of markup by +# preceding the first character with a backslash. This only works for +# simple markup, not HTML-style markup. +# +# * Hyperlinks to the web starting http:, mailto:, ftp:, or www. are +# recognized. An HTTP url that references an external image file is +# converted into an inline . Hyperlinks starting 'link:' are +# assumed to refer to local files whose path is relative to the --op +# directory. +# +# Hyperlinks can also be of the form label[url], in which +# case the label is used in the displayed text, and +url+ is +# used as the target. If +label+ contains multiple words, +# put it in braces: {multi word label}[url]. +# +# == Directives +# +# [+:nodoc:+ / +:nodoc:+ all] +# This directive prevents documentation for the element from +# being generated. For classes and modules, the methods, aliases, +# constants, and attributes directly within the affected class or +# module also will be omitted. By default, though, modules and +# classes within that class of module _will_ be documented. This is +# turned off by adding the +all+ modifier. +# +# module MyModule # :nodoc: +# class Input +# end +# end +# +# module OtherModule # :nodoc: all +# class Output +# end +# end +# +# In the above code, only class MyModule::Input will be documented. +# The +:nodoc:+ directive is global across all files for the class or module +# to which it applies, so use +:stopdoc:+/+:startdoc:+ to suppress +# documentation only for a particular set of methods, etc. +# +# [+:doc:+] +# Forces a method or attribute to be documented even if it wouldn't be +# otherwise. Useful if, for example, you want to include documentation of a +# particular private method. +# +# [+:notnew:+] +# Only applicable to the +initialize+ instance method. Normally RDoc +# assumes that the documentation and parameters for +initialize+ are +# actually for the +new+ method, and so fakes out a +new+ for the class. +# The +:notnew:+ modifier stops this. Remember that +initialize+ is private, +# so you won't see the documentation unless you use the +-a+ command line +# option. +# +# Comment blocks can contain other directives: +# +# [:section: title] +# Starts a new section in the output. The title following +:section:+ is +# used as the section heading, and the remainder of the comment containing +# the section is used as introductory text. Subsequent methods, aliases, +# attributes, and classes will be documented in this section. A :section: +# comment block may have one or more lines before the :section: directive. +# These will be removed, and any identical lines at the end of the block are +# also removed. This allows you to add visual cues such as: +# +# # ---------------------------------------- +# # :section: My Section +# # This is the section that I wrote. +# # See it glisten in the noon-day sun. +# # ---------------------------------------- +# +# [+:call-seq:+] +# Lines up to the next blank line in the comment are treated as the method's +# calling sequence, overriding the default parsing of method parameters and +# yield arguments. +# +# [+:include:+ _filename_] +# \Include the contents of the named file at this point. The file will be +# searched for in the directories listed by the +--include+ option, or in +# the current directory by default. The contents of the file will be +# shifted to have the same indentation as the ':' at the start of +# the :include: directive. +# +# [+:title:+ _text_] +# Sets the title for the document. Equivalent to the --title +# command line parameter. (The command line parameter overrides any :title: +# directive in the source). +# +# [+:enddoc:+] +# Document nothing further at the current level. +# +# [+:main:+ _name_] +# Equivalent to the --main command line parameter. +# +# [+:stopdoc:+ / +:startdoc:+] +# Stop and start adding new documentation elements to the current container. +# For example, if a class has a number of constants that you don't want to +# document, put a +:stopdoc:+ before the first, and a +:startdoc:+ after the +# last. If you don't specify a +:startdoc:+ by the end of the container, +# disables documentation for the entire class or module. +# +# == Other stuff +# +# RDoc is currently being maintained by Eric Hodel +# +# Dave Thomas is the original author of RDoc. +# +# == Credits +# +# * The Ruby parser in rdoc/parse.rb is based heavily on the outstanding +# work of Keiju ISHITSUKA of Nippon Rational Inc, who produced the Ruby +# parser for irb and the rtags package. +# +# * Code to diagram classes and modules was written by Sergey A Yanovitsky +# (Jah) of Enticla. +# +# * Charset patch from MoonWolf. +# +# * Rich Kilmer wrote the kilmer.rb output template. +# +# * Dan Brickley led the design of the RDF format. +# +# == License +# +# RDoc is Copyright (c) 2001-2003 Dave Thomas, The Pragmatic Programmers. It +# is free software, and may be redistributed under the terms specified +# in the README file of the Ruby distribution. +# +# == Warranty +# +# This software is provided "as is" and without any express or implied +# warranties, including, without limitation, the implied warranties of +# merchantibility and fitness for a particular purpose. + +module RDoc + + ## + # Exception thrown by any rdoc error. + + class Error < RuntimeError; end + + RDocError = Error # :nodoc: + + ## + # RDoc version you are using + + VERSION = "2.2.2" + + ## + # Name of the dotfile that contains the description of files to be processed + # in the current directory + + DOT_DOC_FILENAME = ".document" + + GENERAL_MODIFIERS = %w[nodoc].freeze + + CLASS_MODIFIERS = GENERAL_MODIFIERS + + ATTR_MODIFIERS = GENERAL_MODIFIERS + + CONSTANT_MODIFIERS = GENERAL_MODIFIERS + + METHOD_MODIFIERS = GENERAL_MODIFIERS + + %w[arg args yield yields notnew not-new not_new doc] + +end + diff --git a/lib/resolv-replace.rb b/lib/resolv-replace.rb new file mode 100644 index 0000000..63d58ce --- /dev/null +++ b/lib/resolv-replace.rb @@ -0,0 +1,63 @@ +require 'socket' +require 'resolv' + +class << IPSocket + alias original_resolv_getaddress getaddress + def getaddress(host) + begin + return Resolv.getaddress(host).to_s + rescue Resolv::ResolvError + raise SocketError, "Hostname not known: #{host}" + end + end +end + +class TCPSocket + alias original_resolv_initialize initialize + def initialize(host, serv, *rest) + rest[0] = IPSocket.getaddress(rest[0]) unless rest.empty? + original_resolv_initialize(IPSocket.getaddress(host), serv, *rest) + end +end + +class UDPSocket + alias original_resolv_bind bind + def bind(host, port) + host = IPSocket.getaddress(host) if host != "" + original_resolv_bind(host, port) + end + + alias original_resolv_connect connect + def connect(host, port) + original_resolv_connect(IPSocket.getaddress(host), port) + end + + alias original_resolv_send send + def send(mesg, flags, *rest) + if rest.length == 2 + host, port = rest + begin + addrs = Resolv.getaddresses(host) + rescue Resolv::ResolvError + raise SocketError, "Hostname not known: #{host}" + end + err = nil + addrs[0...-1].each {|addr| + begin + return original_resolv_send(mesg, flags, addr, port) + rescue SystemCallError + end + } + original_resolv_send(mesg, flags, addrs[-1], port) + else + original_resolv_send(mesg, flags, *rest) + end + end +end + +class SOCKSSocket + alias original_resolv_initialize initialize + def initialize(host, serv) + original_resolv_initialize(IPSocket.getaddress(host), port) + end +end if defined? SOCKSSocket diff --git a/lib/resolv.rb b/lib/resolv.rb new file mode 100644 index 0000000..b201fcf --- /dev/null +++ b/lib/resolv.rb @@ -0,0 +1,2262 @@ +require 'socket' +require 'fcntl' +require 'timeout' +require 'thread' + +begin + require 'securerandom' +rescue LoadError +end + +# Resolv is a thread-aware DNS resolver library written in Ruby. Resolv can +# handle multiple DNS requests concurrently without blocking. The ruby +# interpreter. +# +# See also resolv-replace.rb to replace the libc resolver with # Resolv. +# +# Resolv can look up various DNS resources using the DNS module directly. +# +# Examples: +# +# p Resolv.getaddress "www.ruby-lang.org" +# p Resolv.getname "210.251.121.214" +# +# Resolv::DNS.open do |dns| +# ress = dns.getresources "www.ruby-lang.org", Resolv::DNS::Resource::IN::A +# p ress.map { |r| r.address } +# ress = dns.getresources "ruby-lang.org", Resolv::DNS::Resource::IN::MX +# p ress.map { |r| [r.exchange.to_s, r.preference] } +# end +# +# +# == Bugs +# +# * NIS is not supported. +# * /etc/nsswitch.conf is not supported. + +class Resolv + + ## + # Looks up the first IP address for +name+. + + def self.getaddress(name) + DefaultResolver.getaddress(name) + end + + ## + # Looks up all IP address for +name+. + + def self.getaddresses(name) + DefaultResolver.getaddresses(name) + end + + ## + # Iterates over all IP addresses for +name+. + + def self.each_address(name, &block) + DefaultResolver.each_address(name, &block) + end + + ## + # Looks up the hostname of +address+. + + def self.getname(address) + DefaultResolver.getname(address) + end + + ## + # Looks up all hostnames for +address+. + + def self.getnames(address) + DefaultResolver.getnames(address) + end + + ## + # Iterates over all hostnames for +address+. + + def self.each_name(address, &proc) + DefaultResolver.each_name(address, &proc) + end + + ## + # Creates a new Resolv using +resolvers+. + + def initialize(resolvers=[Hosts.new, DNS.new]) + @resolvers = resolvers + end + + ## + # Looks up the first IP address for +name+. + + def getaddress(name) + each_address(name) {|address| return address} + raise ResolvError.new("no address for #{name}") + end + + ## + # Looks up all IP address for +name+. + + def getaddresses(name) + ret = [] + each_address(name) {|address| ret << address} + return ret + end + + ## + # Iterates over all IP addresses for +name+. + + def each_address(name) + if AddressRegex =~ name + yield name + return + end + yielded = false + @resolvers.each {|r| + r.each_address(name) {|address| + yield address.to_s + yielded = true + } + return if yielded + } + end + + ## + # Looks up the hostname of +address+. + + def getname(address) + each_name(address) {|name| return name} + raise ResolvError.new("no name for #{address}") + end + + ## + # Looks up all hostnames for +address+. + + def getnames(address) + ret = [] + each_name(address) {|name| ret << name} + return ret + end + + ## + # Iterates over all hostnames for +address+. + + def each_name(address) + yielded = false + @resolvers.each {|r| + r.each_name(address) {|name| + yield name.to_s + yielded = true + } + return if yielded + } + end + + ## + # Indicates a failure to resolve a name or address. + + class ResolvError < StandardError; end + + ## + # Indicates a timeout resolving a name or address. + + class ResolvTimeout < TimeoutError; end + + ## + # DNS::Hosts is a hostname resolver that uses the system hosts file. + + class Hosts + if /mswin|mingw|bccwin/ =~ RUBY_PLATFORM + require 'win32/resolv' + DefaultFileName = Win32::Resolv.get_hosts_path + else + DefaultFileName = '/etc/hosts' + end + + ## + # Creates a new DNS::Hosts, using +filename+ for its data source. + + def initialize(filename = DefaultFileName) + @filename = filename + @mutex = Mutex.new + @initialized = nil + end + + def lazy_initialize # :nodoc: + @mutex.synchronize { + unless @initialized + @name2addr = {} + @addr2name = {} + open(@filename) {|f| + f.each {|line| + line.sub!(/#.*/, '') + addr, hostname, *aliases = line.split(/\s+/) + next unless addr + addr.untaint + hostname.untaint + @addr2name[addr] = [] unless @addr2name.include? addr + @addr2name[addr] << hostname + @addr2name[addr] += aliases + @name2addr[hostname] = [] unless @name2addr.include? hostname + @name2addr[hostname] << addr + aliases.each {|n| + n.untaint + @name2addr[n] = [] unless @name2addr.include? n + @name2addr[n] << addr + } + } + } + @name2addr.each {|name, arr| arr.reverse!} + @initialized = true + end + } + self + end + + ## + # Gets the IP address of +name+ from the hosts file. + + def getaddress(name) + each_address(name) {|address| return address} + raise ResolvError.new("#{@filename} has no name: #{name}") + end + + ## + # Gets all IP addresses for +name+ from the hosts file. + + def getaddresses(name) + ret = [] + each_address(name) {|address| ret << address} + return ret + end + + ## + # Iterates over all IP addresses for +name+ retrieved from the hosts file. + + def each_address(name, &proc) + lazy_initialize + if @name2addr.include?(name) + @name2addr[name].each(&proc) + end + end + + ## + # Gets the hostname of +address+ from the hosts file. + + def getname(address) + each_name(address) {|name| return name} + raise ResolvError.new("#{@filename} has no address: #{address}") + end + + ## + # Gets all hostnames for +address+ from the hosts file. + + def getnames(address) + ret = [] + each_name(address) {|name| ret << name} + return ret + end + + ## + # Iterates over all hostnames for +address+ retrieved from the hosts file. + + def each_name(address, &proc) + lazy_initialize + if @addr2name.include?(address) + @addr2name[address].each(&proc) + end + end + end + + ## + # Resolv::DNS is a DNS stub resolver. + # + # Information taken from the following places: + # + # * STD0013 + # * RFC 1035 + # * ftp://ftp.isi.edu/in-notes/iana/assignments/dns-parameters + # * etc. + + class DNS + + ## + # Default DNS Port + + Port = 53 + + ## + # Default DNS UDP packet size + + UDPSize = 512 + + ## + # Creates a new DNS resolver. See Resolv::DNS.new for argument details. + # + # Yields the created DNS resolver to the block, if given, otherwise + # returns it. + + def self.open(*args) + dns = new(*args) + return dns unless block_given? + begin + yield dns + ensure + dns.close + end + end + + ## + # Creates a new DNS resolver. + # + # +config_info+ can be: + # + # nil:: Uses /etc/resolv.conf. + # String:: Path to a file using /etc/resolv.conf's format. + # Hash:: Must contain :nameserver, :search and :ndots keys. + # + # Example: + # + # Resolv::DNS.new(:nameserver => ['210.251.121.21'], + # :search => ['ruby-lang.org'], + # :ndots => 1) + + def initialize(config_info=nil) + @mutex = Mutex.new + @config = Config.new(config_info) + @initialized = nil + end + + def lazy_initialize # :nodoc: + @mutex.synchronize { + unless @initialized + @config.lazy_initialize + @initialized = true + end + } + self + end + + ## + # Closes the DNS resolver. + + def close + @mutex.synchronize { + if @initialized + @initialized = false + end + } + end + + ## + # Gets the IP address of +name+ from the DNS resolver. + # + # +name+ can be a Resolv::DNS::Name or a String. Retrieved address will + # be a Resolv::IPv4 or Resolv::IPv6 + + def getaddress(name) + each_address(name) {|address| return address} + raise ResolvError.new("DNS result has no information for #{name}") + end + + ## + # Gets all IP addresses for +name+ from the DNS resolver. + # + # +name+ can be a Resolv::DNS::Name or a String. Retrieved addresses will + # be a Resolv::IPv4 or Resolv::IPv6 + + def getaddresses(name) + ret = [] + each_address(name) {|address| ret << address} + return ret + end + + ## + # Iterates over all IP addresses for +name+ retrieved from the DNS + # resolver. + # + # +name+ can be a Resolv::DNS::Name or a String. Retrieved addresses will + # be a Resolv::IPv4 or Resolv::IPv6 + + def each_address(name) + each_resource(name, Resource::IN::A) {|resource| yield resource.address} + each_resource(name, Resource::IN::AAAA) {|resource| yield resource.address} + end + + ## + # Gets the hostname for +address+ from the DNS resolver. + # + # +address+ must be a Resolv::IPv4, Resolv::IPv6 or a String. Retrieved + # name will be a Resolv::DNS::Name. + + def getname(address) + each_name(address) {|name| return name} + raise ResolvError.new("DNS result has no information for #{address}") + end + + ## + # Gets all hostnames for +address+ from the DNS resolver. + # + # +address+ must be a Resolv::IPv4, Resolv::IPv6 or a String. Retrieved + # names will be Resolv::DNS::Name instances. + + def getnames(address) + ret = [] + each_name(address) {|name| ret << name} + return ret + end + + ## + # Iterates over all hostnames for +address+ retrieved from the DNS + # resolver. + # + # +address+ must be a Resolv::IPv4, Resolv::IPv6 or a String. Retrieved + # names will be Resolv::DNS::Name instances. + + def each_name(address) + case address + when Name + ptr = address + when IPv4::Regex + ptr = IPv4.create(address).to_name + when IPv6::Regex + ptr = IPv6.create(address).to_name + else + raise ResolvError.new("cannot interpret as address: #{address}") + end + each_resource(ptr, Resource::IN::PTR) {|resource| yield resource.name} + end + + ## + # Look up the +typeclass+ DNS resource of +name+. + # + # +name+ must be a Resolv::DNS::Name or a String. + # + # +typeclass+ should be one of the following: + # + # * Resolv::DNS::Resource::IN::A + # * Resolv::DNS::Resource::IN::AAAA + # * Resolv::DNS::Resource::IN::ANY + # * Resolv::DNS::Resource::IN::CNAME + # * Resolv::DNS::Resource::IN::HINFO + # * Resolv::DNS::Resource::IN::MINFO + # * Resolv::DNS::Resource::IN::MX + # * Resolv::DNS::Resource::IN::NS + # * Resolv::DNS::Resource::IN::PTR + # * Resolv::DNS::Resource::IN::SOA + # * Resolv::DNS::Resource::IN::TXT + # * Resolv::DNS::Resource::IN::WKS + # + # Returned resource is represented as a Resolv::DNS::Resource instance, + # i.e. Resolv::DNS::Resource::IN::A. + + def getresource(name, typeclass) + each_resource(name, typeclass) {|resource| return resource} + raise ResolvError.new("DNS result has no information for #{name}") + end + + ## + # Looks up all +typeclass+ DNS resources for +name+. See #getresource for + # argument details. + + def getresources(name, typeclass) + ret = [] + each_resource(name, typeclass) {|resource| ret << resource} + return ret + end + + ## + # Iterates over all +typeclass+ DNS resources for +name+. See + # #getresource for argument details. + + def each_resource(name, typeclass, &proc) + lazy_initialize + requester = make_requester + senders = {} + begin + @config.resolv(name) {|candidate, tout, nameserver| + msg = Message.new + msg.rd = 1 + msg.add_question(candidate, typeclass) + unless sender = senders[[candidate, nameserver]] + sender = senders[[candidate, nameserver]] = + requester.sender(msg, candidate, nameserver) + end + reply, reply_name = requester.request(sender, tout) + case reply.rcode + when RCode::NoError + extract_resources(reply, reply_name, typeclass, &proc) + return + when RCode::NXDomain + raise Config::NXDomain.new(reply_name.to_s) + else + raise Config::OtherResolvError.new(reply_name.to_s) + end + } + ensure + requester.close + end + end + + def make_requester # :nodoc: + if nameserver = @config.single? + Requester::ConnectedUDP.new(nameserver) + else + Requester::UnconnectedUDP.new + end + end + + def extract_resources(msg, name, typeclass) # :nodoc: + if typeclass < Resource::ANY + n0 = Name.create(name) + msg.each_answer {|n, ttl, data| + yield data if n0 == n + } + end + yielded = false + n0 = Name.create(name) + msg.each_answer {|n, ttl, data| + if n0 == n + case data + when typeclass + yield data + yielded = true + when Resource::CNAME + n0 = data.name + end + end + } + return if yielded + msg.each_answer {|n, ttl, data| + if n0 == n + case data + when typeclass + yield data + end + end + } + end + + if defined? SecureRandom + def self.random(arg) # :nodoc: + begin + SecureRandom.random_number(arg) + rescue NotImplementedError + rand(arg) + end + end + else + def self.random(arg) # :nodoc: + rand(arg) + end + end + + + def self.rangerand(range) # :nodoc: + base = range.begin + len = range.end - range.begin + if !range.exclude_end? + len += 1 + end + base + random(len) + end + + RequestID = {} + RequestIDMutex = Mutex.new + + def self.allocate_request_id(host, port) # :nodoc: + id = nil + RequestIDMutex.synchronize { + h = (RequestID[[host, port]] ||= {}) + begin + id = rangerand(0x0000..0xffff) + end while h[id] + h[id] = true + } + id + end + + def self.free_request_id(host, port, id) # :nodoc: + RequestIDMutex.synchronize { + key = [host, port] + if h = RequestID[key] + h.delete id + if h.empty? + RequestID.delete key + end + end + } + end + + def self.bind_random_port(udpsock) # :nodoc: + begin + port = rangerand(1024..65535) + udpsock.bind("", port) + rescue Errno::EADDRINUSE + retry + end + end + + class Requester # :nodoc: + def initialize + @senders = {} + @sock = nil + end + + def request(sender, tout) + timelimit = Time.now + tout + sender.send + while (now = Time.now) < timelimit + timeout = timelimit - now + if !IO.select([@sock], nil, nil, timeout) + raise ResolvTimeout + end + reply, from = recv_reply + begin + msg = Message.decode(reply) + rescue DecodeError + next # broken DNS message ignored + end + if s = @senders[[from,msg.id]] + break + else + # unexpected DNS message ignored + end + end + return msg, s.data + end + + def close + sock = @sock + @sock = nil + sock.close if sock + end + + class Sender # :nodoc: + def initialize(msg, data, sock) + @msg = msg + @data = data + @sock = sock + end + end + + class UnconnectedUDP < Requester # :nodoc: + def initialize + super() + @sock = UDPSocket.new + @sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) if defined? Fcntl::F_SETFD + DNS.bind_random_port(@sock) + end + + def recv_reply + reply, from = @sock.recvfrom(UDPSize) + return reply, [from[3],from[1]] + end + + def sender(msg, data, host, port=Port) + service = [host, port] + id = DNS.allocate_request_id(host, port) + request = msg.encode + request[0,2] = [id].pack('n') + return @senders[[service, id]] = + Sender.new(request, data, @sock, host, port) + end + + def close + super + @senders.each_key {|service, id| + DNS.free_request_id(service[0], service[1], id) + } + end + + class Sender < Requester::Sender # :nodoc: + def initialize(msg, data, sock, host, port) + super(msg, data, sock) + @host = host + @port = port + end + attr_reader :data + + def send + @sock.send(@msg, 0, @host, @port) + end + end + end + + class ConnectedUDP < Requester # :nodoc: + def initialize(host, port=Port) + super() + @host = host + @port = port + @sock = UDPSocket.new(host.index(':') ? Socket::AF_INET6 : Socket::AF_INET) + DNS.bind_random_port(@sock) + @sock.connect(host, port) + @sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) if defined? Fcntl::F_SETFD + end + + def recv_reply + reply = @sock.recv(UDPSize) + return reply, nil + end + + def sender(msg, data, host=@host, port=@port) + unless host == @host && port == @port + raise RequestError.new("host/port don't match: #{host}:#{port}") + end + id = DNS.allocate_request_id(@host, @port) + request = msg.encode + request[0,2] = [id].pack('n') + return @senders[[nil,id]] = Sender.new(request, data, @sock) + end + + def close + super + @senders.each_key {|from, id| + DNS.free_request_id(@host, @port, id) + } + end + + class Sender < Requester::Sender # :nodoc: + def send + @sock.send(@msg, 0) + end + attr_reader :data + end + end + + class TCP < Requester # :nodoc: + def initialize(host, port=Port) + super() + @host = host + @port = port + @sock = TCPSocket.new(@host, @port) + @sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) if defined? Fcntl::F_SETFD + @senders = {} + end + + def recv_reply + len = @sock.read(2).unpack('n')[0] + reply = @sock.read(len) + return reply, nil + end + + def sender(msg, data, host=@host, port=@port) + unless host == @host && port == @port + raise RequestError.new("host/port don't match: #{host}:#{port}") + end + id = DNS.allocate_request_id(@host, @port) + request = msg.encode + request[0,2] = [request.length, id].pack('nn') + return @senders[[nil,id]] = Sender.new(request, data, @sock) + end + + class Sender < Requester::Sender # :nodoc: + def send + @sock.print(@msg) + @sock.flush + end + attr_reader :data + end + + def close + super + @senders.each_key {|from,id| + DNS.free_request_id(@host, @port, id) + } + end + end + + ## + # Indicates a problem with the DNS request. + + class RequestError < StandardError + end + end + + class Config # :nodoc: + def initialize(config_info=nil) + @mutex = Mutex.new + @config_info = config_info + @initialized = nil + end + + def Config.parse_resolv_conf(filename) + nameserver = [] + search = nil + ndots = 1 + open(filename) {|f| + f.each {|line| + line.sub!(/[#;].*/, '') + keyword, *args = line.split(/\s+/) + args.each { |arg| + arg.untaint + } + next unless keyword + case keyword + when 'nameserver' + nameserver += args + when 'domain' + next if args.empty? + search = [args[0]] + when 'search' + next if args.empty? + search = args + when 'options' + args.each {|arg| + case arg + when /\Andots:(\d+)\z/ + ndots = $1.to_i + end + } + end + } + } + return { :nameserver => nameserver, :search => search, :ndots => ndots } + end + + def Config.default_config_hash(filename="/etc/resolv.conf") + if File.exist? filename + config_hash = Config.parse_resolv_conf(filename) + else + if /mswin|cygwin|mingw|bccwin/ =~ RUBY_PLATFORM + require 'win32/resolv' + search, nameserver = Win32::Resolv.get_resolv_info + config_hash = {} + config_hash[:nameserver] = nameserver if nameserver + config_hash[:search] = [search].flatten if search + end + end + config_hash + end + + def lazy_initialize + @mutex.synchronize { + unless @initialized + @nameserver = [] + @search = nil + @ndots = 1 + case @config_info + when nil + config_hash = Config.default_config_hash + when String + config_hash = Config.parse_resolv_conf(@config_info) + when Hash + config_hash = @config_info.dup + if String === config_hash[:nameserver] + config_hash[:nameserver] = [config_hash[:nameserver]] + end + if String === config_hash[:search] + config_hash[:search] = [config_hash[:search]] + end + else + raise ArgumentError.new("invalid resolv configuration: #{@config_info.inspect}") + end + @nameserver = config_hash[:nameserver] if config_hash.include? :nameserver + @search = config_hash[:search] if config_hash.include? :search + @ndots = config_hash[:ndots] if config_hash.include? :ndots + + @nameserver = ['0.0.0.0'] if @nameserver.empty? + if @search + @search = @search.map {|arg| Label.split(arg) } + else + hostname = Socket.gethostname + if /\./ =~ hostname + @search = [Label.split($')] + else + @search = [[]] + end + end + + if !@nameserver.kind_of?(Array) || + !@nameserver.all? {|ns| String === ns } + raise ArgumentError.new("invalid nameserver config: #{@nameserver.inspect}") + end + + if !@search.kind_of?(Array) || + !@search.all? {|ls| ls.all? {|l| Label::Str === l } } + raise ArgumentError.new("invalid search config: #{@search.inspect}") + end + + if !@ndots.kind_of?(Integer) + raise ArgumentError.new("invalid ndots config: #{@ndots.inspect}") + end + + @initialized = true + end + } + self + end + + def single? + lazy_initialize + if @nameserver.length == 1 + return @nameserver[0] + else + return nil + end + end + + def generate_candidates(name) + candidates = nil + name = Name.create(name) + if name.absolute? + candidates = [name] + else + if @ndots <= name.length - 1 + candidates = [Name.new(name.to_a)] + else + candidates = [] + end + candidates.concat(@search.map {|domain| Name.new(name.to_a + domain)}) + end + return candidates + end + + InitialTimeout = 5 + + def generate_timeouts + ts = [InitialTimeout] + ts << ts[-1] * 2 / @nameserver.length + ts << ts[-1] * 2 + ts << ts[-1] * 2 + return ts + end + + def resolv(name) + candidates = generate_candidates(name) + timeouts = generate_timeouts + begin + candidates.each {|candidate| + begin + timeouts.each {|tout| + @nameserver.each {|nameserver| + begin + yield candidate, tout, nameserver + rescue ResolvTimeout + end + } + } + raise ResolvError.new("DNS resolv timeout: #{name}") + rescue NXDomain + end + } + rescue ResolvError + end + end + + ## + # Indicates no such domain was found. + + class NXDomain < ResolvError + end + + ## + # Indicates some other unhandled resolver error was encountered. + + class OtherResolvError < ResolvError + end + end + + module OpCode # :nodoc: + Query = 0 + IQuery = 1 + Status = 2 + Notify = 4 + Update = 5 + end + + module RCode # :nodoc: + NoError = 0 + FormErr = 1 + ServFail = 2 + NXDomain = 3 + NotImp = 4 + Refused = 5 + YXDomain = 6 + YXRRSet = 7 + NXRRSet = 8 + NotAuth = 9 + NotZone = 10 + BADVERS = 16 + BADSIG = 16 + BADKEY = 17 + BADTIME = 18 + BADMODE = 19 + BADNAME = 20 + BADALG = 21 + end + + ## + # Indicates that the DNS response was unable to be decoded. + + class DecodeError < StandardError + end + + ## + # Indicates that the DNS request was unable to be encoded. + + class EncodeError < StandardError + end + + module Label # :nodoc: + def self.split(arg) + labels = [] + arg.scan(/[^\.]+/) {labels << Str.new($&)} + return labels + end + + class Str # :nodoc: + def initialize(string) + @string = string + @downcase = string.downcase + end + attr_reader :string, :downcase + + def to_s + return @string + end + + def inspect + return "#<#{self.class} #{self.to_s}>" + end + + def ==(other) + return @downcase == other.downcase + end + + def eql?(other) + return self == other + end + + def hash + return @downcase.hash + end + end + end + + ## + # A representation of a DNS name. + + class Name + + ## + # Creates a new DNS name from +arg+. +arg+ can be: + # + # Name:: returns +arg+. + # String:: Creates a new Name. + + def self.create(arg) + case arg + when Name + return arg + when String + return Name.new(Label.split(arg), /\.\z/ =~ arg ? true : false) + else + raise ArgumentError.new("cannot interpret as DNS name: #{arg.inspect}") + end + end + + def initialize(labels, absolute=true) # :nodoc: + @labels = labels + @absolute = absolute + end + + def inspect # :nodoc: + "#<#{self.class}: #{self.to_s}#{@absolute ? '.' : ''}>" + end + + ## + # True if this name is absolute. + + def absolute? + return @absolute + end + + def ==(other) # :nodoc: + return false unless Name === other + return @labels.join == other.to_a.join && @absolute == other.absolute? + end + + alias eql? == # :nodoc: + + ## + # Returns true if +other+ is a subdomain. + # + # Example: + # + # domain = Resolv::DNS::Name.create("y.z") + # p Resolv::DNS::Name.create("w.x.y.z").subdomain_of?(domain) #=> true + # p Resolv::DNS::Name.create("x.y.z").subdomain_of?(domain) #=> true + # p Resolv::DNS::Name.create("y.z").subdomain_of?(domain) #=> false + # p Resolv::DNS::Name.create("z").subdomain_of?(domain) #=> false + # p Resolv::DNS::Name.create("x.y.z.").subdomain_of?(domain) #=> false + # p Resolv::DNS::Name.create("w.z").subdomain_of?(domain) #=> false + # + + def subdomain_of?(other) + raise ArgumentError, "not a domain name: #{other.inspect}" unless Name === other + return false if @absolute != other.absolute? + other_len = other.length + return false if @labels.length <= other_len + return @labels[-other_len, other_len] == other.to_a + end + + def hash # :nodoc: + return @labels.hash ^ @absolute.hash + end + + def to_a # :nodoc: + return @labels + end + + def length # :nodoc: + return @labels.length + end + + def [](i) # :nodoc: + return @labels[i] + end + + ## + # returns the domain name as a string. + # + # The domain name doesn't have a trailing dot even if the name object is + # absolute. + # + # Example: + # + # p Resolv::DNS::Name.create("x.y.z.").to_s #=> "x.y.z" + # p Resolv::DNS::Name.create("x.y.z").to_s #=> "x.y.z" + + def to_s + return @labels.join('.') + end + end + + class Message # :nodoc: + @@identifier = -1 + + def initialize(id = (@@identifier += 1) & 0xffff) + @id = id + @qr = 0 + @opcode = 0 + @aa = 0 + @tc = 0 + @rd = 0 # recursion desired + @ra = 0 # recursion available + @rcode = 0 + @question = [] + @answer = [] + @authority = [] + @additional = [] + end + + attr_accessor :id, :qr, :opcode, :aa, :tc, :rd, :ra, :rcode + attr_reader :question, :answer, :authority, :additional + + def ==(other) + return @id == other.id && + @qr == other.qr && + @opcode == other.opcode && + @aa == other.aa && + @tc == other.tc && + @rd == other.rd && + @ra == other.ra && + @rcode == other.rcode && + @question == other.question && + @answer == other.answer && + @authority == other.authority && + @additional == other.additional + end + + def add_question(name, typeclass) + @question << [Name.create(name), typeclass] + end + + def each_question + @question.each {|name, typeclass| + yield name, typeclass + } + end + + def add_answer(name, ttl, data) + @answer << [Name.create(name), ttl, data] + end + + def each_answer + @answer.each {|name, ttl, data| + yield name, ttl, data + } + end + + def add_authority(name, ttl, data) + @authority << [Name.create(name), ttl, data] + end + + def each_authority + @authority.each {|name, ttl, data| + yield name, ttl, data + } + end + + def add_additional(name, ttl, data) + @additional << [Name.create(name), ttl, data] + end + + def each_additional + @additional.each {|name, ttl, data| + yield name, ttl, data + } + end + + def each_resource + each_answer {|name, ttl, data| yield name, ttl, data} + each_authority {|name, ttl, data| yield name, ttl, data} + each_additional {|name, ttl, data| yield name, ttl, data} + end + + def encode + return MessageEncoder.new {|msg| + msg.put_pack('nnnnnn', + @id, + (@qr & 1) << 15 | + (@opcode & 15) << 11 | + (@aa & 1) << 10 | + (@tc & 1) << 9 | + (@rd & 1) << 8 | + (@ra & 1) << 7 | + (@rcode & 15), + @question.length, + @answer.length, + @authority.length, + @additional.length) + @question.each {|q| + name, typeclass = q + msg.put_name(name) + msg.put_pack('nn', typeclass::TypeValue, typeclass::ClassValue) + } + [@answer, @authority, @additional].each {|rr| + rr.each {|r| + name, ttl, data = r + msg.put_name(name) + msg.put_pack('nnN', data.class::TypeValue, data.class::ClassValue, ttl) + msg.put_length16 {data.encode_rdata(msg)} + } + } + }.to_s + end + + class MessageEncoder # :nodoc: + def initialize + @data = '' + @names = {} + yield self + end + + def to_s + return @data + end + + def put_bytes(d) + @data << d + end + + def put_pack(template, *d) + @data << d.pack(template) + end + + def put_length16 + length_index = @data.length + @data << "\0\0" + data_start = @data.length + yield + data_end = @data.length + @data[length_index, 2] = [data_end - data_start].pack("n") + end + + def put_string(d) + self.put_pack("C", d.length) + @data << d + end + + def put_string_list(ds) + ds.each {|d| + self.put_string(d) + } + end + + def put_name(d) + put_labels(d.to_a) + end + + def put_labels(d) + d.each_index {|i| + domain = d[i..-1] + if idx = @names[domain] + self.put_pack("n", 0xc000 | idx) + return + else + @names[domain] = @data.length + self.put_label(d[i]) + end + } + @data << "\0" + end + + def put_label(d) + self.put_string(d.to_s) + end + end + + def Message.decode(m) + o = Message.new(0) + MessageDecoder.new(m) {|msg| + id, flag, qdcount, ancount, nscount, arcount = + msg.get_unpack('nnnnnn') + o.id = id + o.qr = (flag >> 15) & 1 + o.opcode = (flag >> 11) & 15 + o.aa = (flag >> 10) & 1 + o.tc = (flag >> 9) & 1 + o.rd = (flag >> 8) & 1 + o.ra = (flag >> 7) & 1 + o.rcode = flag & 15 + (1..qdcount).each { + name, typeclass = msg.get_question + o.add_question(name, typeclass) + } + (1..ancount).each { + name, ttl, data = msg.get_rr + o.add_answer(name, ttl, data) + } + (1..nscount).each { + name, ttl, data = msg.get_rr + o.add_authority(name, ttl, data) + } + (1..arcount).each { + name, ttl, data = msg.get_rr + o.add_additional(name, ttl, data) + } + } + return o + end + + class MessageDecoder # :nodoc: + def initialize(data) + @data = data + @index = 0 + @limit = data.length + yield self + end + + def get_length16 + len, = self.get_unpack('n') + save_limit = @limit + @limit = @index + len + d = yield(len) + if @index < @limit + raise DecodeError.new("junk exists") + elsif @limit < @index + raise DecodeError.new("limit exceeded") + end + @limit = save_limit + return d + end + + def get_bytes(len = @limit - @index) + d = @data[@index, len] + @index += len + return d + end + + def get_unpack(template) + len = 0 + template.each_byte {|byte| + byte = "%c" % byte + case byte + when ?c, ?C + len += 1 + when ?n + len += 2 + when ?N + len += 4 + else + raise StandardError.new("unsupported template: '#{byte.chr}' in '#{template}'") + end + } + raise DecodeError.new("limit exceeded") if @limit < @index + len + arr = @data.unpack("@#{@index}#{template}") + @index += len + return arr + end + + def get_string + len = @data[@index].ord + raise DecodeError.new("limit exceeded") if @limit < @index + 1 + len + d = @data[@index + 1, len] + @index += 1 + len + return d + end + + def get_string_list + strings = [] + while @index < @limit + strings << self.get_string + end + strings + end + + def get_name + return Name.new(self.get_labels) + end + + def get_labels(limit=nil) + limit = @index if !limit || @index < limit + d = [] + while true + case @data[@index].ord + when 0 + @index += 1 + return d + when 192..255 + idx = self.get_unpack('n')[0] & 0x3fff + if limit <= idx + raise DecodeError.new("non-backward name pointer") + end + save_index = @index + @index = idx + d += self.get_labels(limit) + @index = save_index + return d + else + d << self.get_label + end + end + return d + end + + def get_label + return Label::Str.new(self.get_string) + end + + def get_question + name = self.get_name + type, klass = self.get_unpack("nn") + return name, Resource.get_class(type, klass) + end + + def get_rr + name = self.get_name + type, klass, ttl = self.get_unpack('nnN') + typeclass = Resource.get_class(type, klass) + res = self.get_length16 { typeclass.decode_rdata self } + res.instance_variable_set :@ttl, ttl + return name, ttl, res + end + end + end + + ## + # A DNS query abstract class. + + class Query + def encode_rdata(msg) # :nodoc: + raise EncodeError.new("#{self.class} is query.") + end + + def self.decode_rdata(msg) # :nodoc: + raise DecodeError.new("#{self.class} is query.") + end + end + + ## + # A DNS resource abstract class. + + class Resource < Query + + ## + # Remaining Time To Live for this Resource. + + attr_reader :ttl + + ClassHash = {} # :nodoc: + + def encode_rdata(msg) # :nodoc: + raise NotImplementedError.new + end + + def self.decode_rdata(msg) # :nodoc: + raise NotImplementedError.new + end + + def ==(other) # :nodoc: + return false unless self.class == other.class + s_ivars = self.instance_variables + s_ivars.sort! + s_ivars.delete "@ttl" + o_ivars = other.instance_variables + o_ivars.sort! + o_ivars.delete "@ttl" + return s_ivars == o_ivars && + s_ivars.collect {|name| self.instance_variable_get name} == + o_ivars.collect {|name| other.instance_variable_get name} + end + + def eql?(other) # :nodoc: + return self == other + end + + def hash # :nodoc: + h = 0 + vars = self.instance_variables + vars.delete "@ttl" + vars.each {|name| + h ^= self.instance_variable_get(name).hash + } + return h + end + + def self.get_class(type_value, class_value) # :nodoc: + return ClassHash[[type_value, class_value]] || + Generic.create(type_value, class_value) + end + + ## + # A generic resource abstract class. + + class Generic < Resource + + ## + # Creates a new generic resource. + + def initialize(data) + @data = data + end + + ## + # Data for this generic resource. + + attr_reader :data + + def encode_rdata(msg) # :nodoc: + msg.put_bytes(data) + end + + def self.decode_rdata(msg) # :nodoc: + return self.new(msg.get_bytes) + end + + def self.create(type_value, class_value) # :nodoc: + c = Class.new(Generic) + c.const_set(:TypeValue, type_value) + c.const_set(:ClassValue, class_value) + Generic.const_set("Type#{type_value}_Class#{class_value}", c) + ClassHash[[type_value, class_value]] = c + return c + end + end + + ## + # Domain Name resource abstract class. + + class DomainName < Resource + + ## + # Creates a new DomainName from +name+. + + def initialize(name) + @name = name + end + + ## + # The name of this DomainName. + + attr_reader :name + + def encode_rdata(msg) # :nodoc: + msg.put_name(@name) + end + + def self.decode_rdata(msg) # :nodoc: + return self.new(msg.get_name) + end + end + + # Standard (class generic) RRs + + ClassValue = nil # :nodoc: + + ## + # An authoritative name server. + + class NS < DomainName + TypeValue = 2 # :nodoc: + end + + ## + # The canonical name for an alias. + + class CNAME < DomainName + TypeValue = 5 # :nodoc: + end + + ## + # Start Of Authority resource. + + class SOA < Resource + + TypeValue = 6 # :nodoc: + + ## + # Creates a new SOA record. See the attr documentation for the + # details of each argument. + + def initialize(mname, rname, serial, refresh, retry_, expire, minimum) + @mname = mname + @rname = rname + @serial = serial + @refresh = refresh + @retry = retry_ + @expire = expire + @minimum = minimum + end + + ## + # Name of the host where the master zone file for this zone resides. + + attr_reader :mname + + ## + # The person responsible for this domain name. + + attr_reader :rname + + ## + # The version number of the zone file. + + attr_reader :serial + + ## + # How often, in seconds, a secondary name server is to check for + # updates from the primary name server. + + attr_reader :refresh + + ## + # How often, in seconds, a secondary name server is to retry after a + # failure to check for a refresh. + + attr_reader :retry + + ## + # Time in seconds that a secondary name server is to use the data + # before refreshing from the primary name server. + + attr_reader :expire + + ## + # The minimum number of seconds to be used for TTL values in RRs. + + attr_reader :minimum + + def encode_rdata(msg) # :nodoc: + msg.put_name(@mname) + msg.put_name(@rname) + msg.put_pack('NNNNN', @serial, @refresh, @retry, @expire, @minimum) + end + + def self.decode_rdata(msg) # :nodoc: + mname = msg.get_name + rname = msg.get_name + serial, refresh, retry_, expire, minimum = msg.get_unpack('NNNNN') + return self.new( + mname, rname, serial, refresh, retry_, expire, minimum) + end + end + + ## + # A Pointer to another DNS name. + + class PTR < DomainName + TypeValue = 12 # :nodoc: + end + + ## + # Host Information resource. + + class HINFO < Resource + + TypeValue = 13 # :nodoc: + + ## + # Creates a new HINFO running +os+ on +cpu+. + + def initialize(cpu, os) + @cpu = cpu + @os = os + end + + ## + # CPU architecture for this resource. + + attr_reader :cpu + + ## + # Operating system for this resource. + + attr_reader :os + + def encode_rdata(msg) # :nodoc: + msg.put_string(@cpu) + msg.put_string(@os) + end + + def self.decode_rdata(msg) # :nodoc: + cpu = msg.get_string + os = msg.get_string + return self.new(cpu, os) + end + end + + ## + # Mailing list or mailbox information. + + class MINFO < Resource + + TypeValue = 14 # :nodoc: + + def initialize(rmailbx, emailbx) + @rmailbx = rmailbx + @emailbx = emailbx + end + + ## + # Domain name responsible for this mail list or mailbox. + + attr_reader :rmailbx + + ## + # Mailbox to use for error messages related to the mail list or mailbox. + + attr_reader :emailbx + + def encode_rdata(msg) # :nodoc: + msg.put_name(@rmailbx) + msg.put_name(@emailbx) + end + + def self.decode_rdata(msg) # :nodoc: + rmailbx = msg.get_string + emailbx = msg.get_string + return self.new(rmailbx, emailbx) + end + end + + ## + # Mail Exchanger resource. + + class MX < Resource + + TypeValue= 15 # :nodoc: + + ## + # Creates a new MX record with +preference+, accepting mail at + # +exchange+. + + def initialize(preference, exchange) + @preference = preference + @exchange = exchange + end + + ## + # The preference for this MX. + + attr_reader :preference + + ## + # The host of this MX. + + attr_reader :exchange + + def encode_rdata(msg) # :nodoc: + msg.put_pack('n', @preference) + msg.put_name(@exchange) + end + + def self.decode_rdata(msg) # :nodoc: + preference, = msg.get_unpack('n') + exchange = msg.get_name + return self.new(preference, exchange) + end + end + + ## + # Unstructured text resource. + + class TXT < Resource + + TypeValue = 16 # :nodoc: + + def initialize(first_string, *rest_strings) + @strings = [first_string, *rest_strings] + end + + ## + # Returns an Array of Strings for this TXT record. + + attr_reader :strings + + ## + # Returns the first string from +strings+. + + def data + @strings[0] + end + + def encode_rdata(msg) # :nodoc: + msg.put_string_list(@strings) + end + + def self.decode_rdata(msg) # :nodoc: + strings = msg.get_string_list + return self.new(*strings) + end + end + + ## + # A Query type requesting any RR. + + class ANY < Query + TypeValue = 255 # :nodoc: + end + + ClassInsensitiveTypes = [ # :nodoc: + NS, CNAME, SOA, PTR, HINFO, MINFO, MX, TXT, ANY + ] + + ## + # module IN contains ARPA Internet specific RRs. + + module IN + + ClassValue = 1 # :nodoc: + + ClassInsensitiveTypes.each {|s| + c = Class.new(s) + c.const_set(:TypeValue, s::TypeValue) + c.const_set(:ClassValue, ClassValue) + ClassHash[[s::TypeValue, ClassValue]] = c + self.const_set(s.name.sub(/.*::/, ''), c) + } + + ## + # IPv4 Address resource + + class A < Resource + TypeValue = 1 + ClassValue = IN::ClassValue + ClassHash[[TypeValue, ClassValue]] = self # :nodoc: + + ## + # Creates a new A for +address+. + + def initialize(address) + @address = IPv4.create(address) + end + + ## + # The Resolv::IPv4 address for this A. + + attr_reader :address + + def encode_rdata(msg) # :nodoc: + msg.put_bytes(@address.address) + end + + def self.decode_rdata(msg) # :nodoc: + return self.new(IPv4.new(msg.get_bytes(4))) + end + end + + ## + # Well Known Service resource. + + class WKS < Resource + TypeValue = 11 + ClassValue = IN::ClassValue + ClassHash[[TypeValue, ClassValue]] = self # :nodoc: + + def initialize(address, protocol, bitmap) + @address = IPv4.create(address) + @protocol = protocol + @bitmap = bitmap + end + + ## + # The host these services run on. + + attr_reader :address + + ## + # IP protocol number for these services. + + attr_reader :protocol + + ## + # A bit map of enabled services on this host. + # + # If protocol is 6 (TCP) then the 26th bit corresponds to the SMTP + # service (port 25). If this bit is set, then an SMTP server should + # be listening on TCP port 25; if zero, SMTP service is not + # supported. + + attr_reader :bitmap + + def encode_rdata(msg) # :nodoc: + msg.put_bytes(@address.address) + msg.put_pack("n", @protocol) + msg.put_bytes(@bitmap) + end + + def self.decode_rdata(msg) # :nodoc: + address = IPv4.new(msg.get_bytes(4)) + protocol, = msg.get_unpack("n") + bitmap = msg.get_bytes + return self.new(address, protocol, bitmap) + end + end + + ## + # An IPv6 address record. + + class AAAA < Resource + TypeValue = 28 + ClassValue = IN::ClassValue + ClassHash[[TypeValue, ClassValue]] = self # :nodoc: + + ## + # Creates a new AAAA for +address+. + + def initialize(address) + @address = IPv6.create(address) + end + + ## + # The Resolv::IPv6 address for this AAAA. + + attr_reader :address + + def encode_rdata(msg) # :nodoc: + msg.put_bytes(@address.address) + end + + def self.decode_rdata(msg) # :nodoc: + return self.new(IPv6.new(msg.get_bytes(16))) + end + end + + ## + # SRV resource record defined in RFC 2782 + # + # These records identify the hostname and port that a service is + # available at. + + class SRV < Resource + TypeValue = 33 + ClassValue = IN::ClassValue + ClassHash[[TypeValue, ClassValue]] = self # :nodoc: + + # Create a SRV resource record. + # + # See the documentation for #priority, #weight, #port and #target + # for +priority+, +weight+, +port and +target+ respectively. + + def initialize(priority, weight, port, target) + @priority = priority.to_int + @weight = weight.to_int + @port = port.to_int + @target = Name.create(target) + end + + # The priority of this target host. + # + # A client MUST attempt to contact the target host with the + # lowest-numbered priority it can reach; target hosts with the same + # priority SHOULD be tried in an order defined by the weight field. + # The range is 0-65535. Note that it is not widely implemented and + # should be set to zero. + + attr_reader :priority + + # A server selection mechanism. + # + # The weight field specifies a relative weight for entries with the + # same priority. Larger weights SHOULD be given a proportionately + # higher probability of being selected. The range of this number is + # 0-65535. Domain administrators SHOULD use Weight 0 when there + # isn't any server selection to do, to make the RR easier to read + # for humans (less noisy). Note that it is not widely implemented + # and should be set to zero. + + attr_reader :weight + + # The port on this target host of this service. + # + # The range is 0-65535. + + attr_reader :port + + # The domain name of the target host. + # + # A target of "." means that the service is decidedly not available + # at this domain. + + attr_reader :target + + def encode_rdata(msg) # :nodoc: + msg.put_pack("n", @priority) + msg.put_pack("n", @weight) + msg.put_pack("n", @port) + msg.put_name(@target) + end + + def self.decode_rdata(msg) # :nodoc: + priority, = msg.get_unpack("n") + weight, = msg.get_unpack("n") + port, = msg.get_unpack("n") + target = msg.get_name + return self.new(priority, weight, port, target) + end + end + end + end + end + + ## + # A Resolv::DNS IPv4 address. + + class IPv4 + + ## + # Regular expression IPv4 addresses must match. + + Regex = /\A(\d+)\.(\d+)\.(\d+)\.(\d+)\z/ + + def self.create(arg) + case arg + when IPv4 + return arg + when Regex + if (0..255) === (a = $1.to_i) && + (0..255) === (b = $2.to_i) && + (0..255) === (c = $3.to_i) && + (0..255) === (d = $4.to_i) + return self.new([a, b, c, d].pack("CCCC")) + else + raise ArgumentError.new("IPv4 address with invalid value: " + arg) + end + else + raise ArgumentError.new("cannot interpret as IPv4 address: #{arg.inspect}") + end + end + + def initialize(address) # :nodoc: + unless address.kind_of?(String) && address.length == 4 + raise ArgumentError.new('IPv4 address must be 4 bytes') + end + @address = address + end + + ## + # A String representation of this IPv4 address. + + ## + # The raw IPv4 address as a String. + + attr_reader :address + + def to_s # :nodoc: + return sprintf("%d.%d.%d.%d", *@address.unpack("CCCC")) + end + + def inspect # :nodoc: + return "#<#{self.class} #{self.to_s}>" + end + + ## + # Turns this IPv4 address into a Resolv::DNS::Name. + + def to_name + return DNS::Name.create( + '%d.%d.%d.%d.in-addr.arpa.' % @address.unpack('CCCC').reverse) + end + + def ==(other) # :nodoc: + return @address == other.address + end + + def eql?(other) # :nodoc: + return self == other + end + + def hash # :nodoc: + return @address.hash + end + end + + ## + # A Resolv::DNS IPv6 address. + + class IPv6 + + ## + # IPv6 address format a:b:c:d:e:f:g:h + Regex_8Hex = /\A + (?:[0-9A-Fa-f]{1,4}:){7} + [0-9A-Fa-f]{1,4} + \z/x + + ## + # Compressed IPv6 address format a::b + + Regex_CompressedHex = /\A + ((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) :: + ((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) + \z/x + + ## + # IPv4 mapped IPv6 address format a:b:c:d:e:f:w.x.y.z + + Regex_6Hex4Dec = /\A + ((?:[0-9A-Fa-f]{1,4}:){6,6}) + (\d+)\.(\d+)\.(\d+)\.(\d+) + \z/x + + ## + # Compressed IPv4 mapped IPv6 address format a::b:w.x.y.z + + Regex_CompressedHex4Dec = /\A + ((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) :: + ((?:[0-9A-Fa-f]{1,4}:)*) + (\d+)\.(\d+)\.(\d+)\.(\d+) + \z/x + + ## + # A composite IPv6 address Regexp. + + Regex = / + (?:#{Regex_8Hex}) | + (?:#{Regex_CompressedHex}) | + (?:#{Regex_6Hex4Dec}) | + (?:#{Regex_CompressedHex4Dec})/x + + ## + # Creates a new IPv6 address from +arg+ which may be: + # + # IPv6:: returns +arg+. + # String:: +arg+ must match one of the IPv6::Regex* constants + + def self.create(arg) + case arg + when IPv6 + return arg + when String + address = '' + if Regex_8Hex =~ arg + arg.scan(/[0-9A-Fa-f]+/) {|hex| address << [hex.hex].pack('n')} + elsif Regex_CompressedHex =~ arg + prefix = $1 + suffix = $2 + a1 = '' + a2 = '' + prefix.scan(/[0-9A-Fa-f]+/) {|hex| a1 << [hex.hex].pack('n')} + suffix.scan(/[0-9A-Fa-f]+/) {|hex| a2 << [hex.hex].pack('n')} + omitlen = 16 - a1.length - a2.length + address << a1 << "\0" * omitlen << a2 + elsif Regex_6Hex4Dec =~ arg + prefix, a, b, c, d = $1, $2.to_i, $3.to_i, $4.to_i, $5.to_i + if (0..255) === a && (0..255) === b && (0..255) === c && (0..255) === d + prefix.scan(/[0-9A-Fa-f]+/) {|hex| address << [hex.hex].pack('n')} + address << [a, b, c, d].pack('CCCC') + else + raise ArgumentError.new("not numeric IPv6 address: " + arg) + end + elsif Regex_CompressedHex4Dec =~ arg + prefix, suffix, a, b, c, d = $1, $2, $3.to_i, $4.to_i, $5.to_i, $6.to_i + if (0..255) === a && (0..255) === b && (0..255) === c && (0..255) === d + a1 = '' + a2 = '' + prefix.scan(/[0-9A-Fa-f]+/) {|hex| a1 << [hex.hex].pack('n')} + suffix.scan(/[0-9A-Fa-f]+/) {|hex| a2 << [hex.hex].pack('n')} + omitlen = 12 - a1.length - a2.length + address << a1 << "\0" * omitlen << a2 << [a, b, c, d].pack('CCCC') + else + raise ArgumentError.new("not numeric IPv6 address: " + arg) + end + else + raise ArgumentError.new("not numeric IPv6 address: " + arg) + end + return IPv6.new(address) + else + raise ArgumentError.new("cannot interpret as IPv6 address: #{arg.inspect}") + end + end + + def initialize(address) # :nodoc: + unless address.kind_of?(String) && address.length == 16 + raise ArgumentError.new('IPv6 address must be 16 bytes') + end + @address = address + end + + ## + # The raw IPv6 address as a String. + + attr_reader :address + + def to_s # :nodoc: + address = sprintf("%X:%X:%X:%X:%X:%X:%X:%X", *@address.unpack("nnnnnnnn")) + unless address.sub!(/(^|:)0(:0)+(:|$)/, '::') + address.sub!(/(^|:)0(:|$)/, '::') + end + return address + end + + def inspect # :nodoc: + return "#<#{self.class} #{self.to_s}>" + end + + ## + # Turns this IPv6 address into a Resolv::DNS::Name. + #-- + # ip6.arpa should be searched too. [RFC3152] + + def to_name + return DNS::Name.new( + @address.unpack("H32")[0].split(//).reverse + ['ip6', 'arpa']) + end + + def ==(other) # :nodoc: + return @address == other.address + end + + def eql?(other) # :nodoc: + return self == other + end + + def hash # :nodoc: + return @address.hash + end + end + + ## + # Default resolver to use for Resolv class methods. + + DefaultResolver = self.new + + ## + # Address Regexp to use for matching IP addresses. + + AddressRegex = /(?:#{IPv4::Regex})|(?:#{IPv6::Regex})/ + +end + diff --git a/lib/rinda/rinda.rb b/lib/rinda/rinda.rb new file mode 100644 index 0000000..6c59e68 --- /dev/null +++ b/lib/rinda/rinda.rb @@ -0,0 +1,283 @@ +require 'drb/drb' +require 'thread' + +## +# A module to implement the Linda distributed computing paradigm in Ruby. +# +# Rinda is part of DRb (dRuby). +# +# == Example(s) +# +# See the sample/drb/ directory in the Ruby distribution, from 1.8.2 onwards. +# +#-- +# TODO +# == Introduction to Linda/rinda? +# +# == Why is this library separate from DRb? + +module Rinda + + ## + # Rinda error base class + + class RindaError < RuntimeError; end + + ## + # Raised when a hash-based tuple has an invalid key. + + class InvalidHashTupleKey < RindaError; end + + ## + # Raised when trying to use a canceled tuple. + + class RequestCanceledError < ThreadError; end + + ## + # Raised when trying to use an expired tuple. + + class RequestExpiredError < ThreadError; end + + ## + # A tuple is the elementary object in Rinda programming. + # Tuples may be matched against templates if the tuple and + # the template are the same size. + + class Tuple + + ## + # Creates a new Tuple from +ary_or_hash+ which must be an Array or Hash. + + def initialize(ary_or_hash) + if hash?(ary_or_hash) + init_with_hash(ary_or_hash) + else + init_with_ary(ary_or_hash) + end + end + + ## + # The number of elements in the tuple. + + def size + @tuple.size + end + + ## + # Accessor method for elements of the tuple. + + def [](k) + @tuple[k] + end + + ## + # Fetches item +k+ from the tuple. + + def fetch(k) + @tuple.fetch(k) + end + + ## + # Iterate through the tuple, yielding the index or key, and the + # value, thus ensuring arrays are iterated similarly to hashes. + + def each # FIXME + if Hash === @tuple + @tuple.each { |k, v| yield(k, v) } + else + @tuple.each_with_index { |v, k| yield(k, v) } + end + end + + ## + # Return the tuple itself + def value + @tuple + end + + private + + def hash?(ary_or_hash) + ary_or_hash.respond_to?(:keys) + end + + ## + # Munges +ary+ into a valid Tuple. + + def init_with_ary(ary) + @tuple = Array.new(ary.size) + @tuple.size.times do |i| + @tuple[i] = ary[i] + end + end + + ## + # Ensures +hash+ is a valid Tuple. + + def init_with_hash(hash) + @tuple = Hash.new + hash.each do |k, v| + raise InvalidHashTupleKey unless String === k + @tuple[k] = v + end + end + + end + + ## + # Templates are used to match tuples in Rinda. + + class Template < Tuple + + ## + # Matches this template against +tuple+. The +tuple+ must be the same + # size as the template. An element with a +nil+ value in a template acts + # as a wildcard, matching any value in the corresponding position in the + # tuple. Elements of the template match the +tuple+ if the are #== or + # #===. + # + # Template.new([:foo, 5]).match Tuple.new([:foo, 5]) # => true + # Template.new([:foo, nil]).match Tuple.new([:foo, 5]) # => true + # Template.new([String]).match Tuple.new(['hello']) # => true + # + # Template.new([:foo]).match Tuple.new([:foo, 5]) # => false + # Template.new([:foo, 6]).match Tuple.new([:foo, 5]) # => false + # Template.new([:foo, nil]).match Tuple.new([:foo]) # => false + # Template.new([:foo, 6]).match Tuple.new([:foo]) # => false + + def match(tuple) + return false unless tuple.respond_to?(:size) + return false unless tuple.respond_to?(:fetch) + return false unless self.size == tuple.size + each do |k, v| + begin + it = tuple.fetch(k) + rescue + return false + end + next if v.nil? + next if v == it + next if v === it + return false + end + return true + end + + ## + # Alias for #match. + + def ===(tuple) + match(tuple) + end + + end + + ## + # Documentation? + + class DRbObjectTemplate + + ## + # Creates a new DRbObjectTemplate that will match against +uri+ and +ref+. + + def initialize(uri=nil, ref=nil) + @drb_uri = uri + @drb_ref = ref + end + + ## + # This DRbObjectTemplate matches +ro+ if the remote object's drburi and + # drbref are the same. +nil+ is used as a wildcard. + + def ===(ro) + return true if super(ro) + unless @drb_uri.nil? + return false unless (@drb_uri === ro.__drburi rescue false) + end + unless @drb_ref.nil? + return false unless (@drb_ref === ro.__drbref rescue false) + end + true + end + + end + + ## + # TupleSpaceProxy allows a remote Tuplespace to appear as local. + + class TupleSpaceProxy + + ## + # Creates a new TupleSpaceProxy to wrap +ts+. + + def initialize(ts) + @ts = ts + end + + ## + # Adds +tuple+ to the proxied TupleSpace. See TupleSpace#write. + + def write(tuple, sec=nil) + @ts.write(tuple, sec) + end + + ## + # Takes +tuple+ from the proxied TupleSpace. See TupleSpace#take. + + def take(tuple, sec=nil, &block) + port = [] + @ts.move(DRbObject.new(port), tuple, sec, &block) + port[0] + end + + ## + # Reads +tuple+ from the proxied TupleSpace. See TupleSpace#read. + + def read(tuple, sec=nil, &block) + @ts.read(tuple, sec, &block) + end + + ## + # Reads all tuples matching +tuple+ from the proxied TupleSpace. See + # TupleSpace#read_all. + + def read_all(tuple) + @ts.read_all(tuple) + end + + ## + # Registers for notifications of event +ev+ on the proxied TupleSpace. + # See TupleSpace#notify + + def notify(ev, tuple, sec=nil) + @ts.notify(ev, tuple, sec) + end + + end + + ## + # An SimpleRenewer allows a TupleSpace to check if a TupleEntry is still + # alive. + + class SimpleRenewer + + include DRbUndumped + + ## + # Creates a new SimpleRenewer that keeps an object alive for another +sec+ + # seconds. + + def initialize(sec=180) + @sec = sec + end + + ## + # Called by the TupleSpace to check if the object is still alive. + + def renew + @sec + end + end + +end + diff --git a/lib/rinda/ring.rb b/lib/rinda/ring.rb new file mode 100644 index 0000000..4dc7c7d --- /dev/null +++ b/lib/rinda/ring.rb @@ -0,0 +1,271 @@ +# +# Note: Rinda::Ring API is unstable. +# +require 'drb/drb' +require 'rinda/rinda' +require 'thread' + +module Rinda + + ## + # The default port Ring discovery will use. + + Ring_PORT = 7647 + + ## + # A RingServer allows a Rinda::TupleSpace to be located via UDP broadcasts. + # Service location uses the following steps: + # + # 1. A RingServer begins listening on the broadcast UDP address. + # 2. A RingFinger sends a UDP packet containing the DRb URI where it will + # listen for a reply. + # 3. The RingServer receives the UDP packet and connects back to the + # provided DRb URI with the DRb service. + + class RingServer + + include DRbUndumped + + ## + # Advertises +ts+ on the UDP broadcast address at +port+. + + def initialize(ts, port=Ring_PORT) + @ts = ts + @soc = UDPSocket.open + @soc.bind('', port) + @w_service = write_service + @r_service = reply_service + end + + ## + # Creates a thread that picks up UDP packets and passes them to do_write + # for decoding. + + def write_service + Thread.new do + loop do + msg = @soc.recv(1024) + do_write(msg) + end + end + end + + ## + # Extracts the response URI from +msg+ and adds it to TupleSpace where it + # will be picked up by +reply_service+ for notification. + + def do_write(msg) + Thread.new do + begin + tuple, sec = Marshal.load(msg) + @ts.write(tuple, sec) + rescue + end + end + end + + ## + # Creates a thread that notifies waiting clients from the TupleSpace. + + def reply_service + Thread.new do + loop do + do_reply + end + end + end + + ## + # Pulls lookup tuples out of the TupleSpace and sends their DRb object the + # address of the local TupleSpace. + + def do_reply + tuple = @ts.take([:lookup_ring, DRbObject]) + Thread.new { tuple[1].call(@ts) rescue nil} + rescue + end + + end + + ## + # RingFinger is used by RingServer clients to discover the RingServer's + # TupleSpace. Typically, all a client needs to do is call + # RingFinger.primary to retrieve the remote TupleSpace, which it can then + # begin using. + + class RingFinger + + @@broadcast_list = ['', 'localhost'] + + @@finger = nil + + ## + # Creates a singleton RingFinger and looks for a RingServer. Returns the + # created RingFinger. + + def self.finger + unless @@finger + @@finger = self.new + @@finger.lookup_ring_any + end + @@finger + end + + ## + # Returns the first advertised TupleSpace. + + def self.primary + finger.primary + end + + ## + # Contains all discovered TupleSpaces except for the primary. + + def self.to_a + finger.to_a + end + + ## + # The list of addresses where RingFinger will send query packets. + + attr_accessor :broadcast_list + + ## + # The port that RingFinger will send query packets to. + + attr_accessor :port + + ## + # Contain the first advertised TupleSpace after lookup_ring_any is called. + + attr_accessor :primary + + ## + # Creates a new RingFinger that will look for RingServers at +port+ on + # the addresses in +broadcast_list+. + + def initialize(broadcast_list=@@broadcast_list, port=Ring_PORT) + @broadcast_list = broadcast_list || ['localhost'] + @port = port + @primary = nil + @rings = [] + end + + ## + # Contains all discovered TupleSpaces except for the primary. + + def to_a + @rings + end + + ## + # Iterates over all discovered TupleSpaces starting with the primary. + + def each + lookup_ring_any unless @primary + return unless @primary + yield(@primary) + @rings.each { |x| yield(x) } + end + + ## + # Looks up RingServers waiting +timeout+ seconds. RingServers will be + # given +block+ as a callback, which will be called with the remote + # TupleSpace. + + def lookup_ring(timeout=5, &block) + return lookup_ring_any(timeout) unless block_given? + + msg = Marshal.dump([[:lookup_ring, DRbObject.new(block)], timeout]) + @broadcast_list.each do |it| + soc = UDPSocket.open + begin + soc.setsockopt(Socket::SOL_SOCKET, Socket::SO_BROADCAST, true) + soc.send(msg, 0, it, @port) + rescue + nil + ensure + soc.close + end + end + sleep(timeout) + end + + ## + # Returns the first found remote TupleSpace. Any further recovered + # TupleSpaces can be found by calling +to_a+. + + def lookup_ring_any(timeout=5) + queue = Queue.new + + th = Thread.new do + self.lookup_ring(timeout) do |ts| + queue.push(ts) + end + queue.push(nil) + while it = queue.pop + @rings.push(it) + end + end + + @primary = queue.pop + raise('RingNotFound') if @primary.nil? + @primary + end + + end + + ## + # RingProvider uses a RingServer advertised TupleSpace as a name service. + # TupleSpace clients can register themselves with the remote TupleSpace and + # look up other provided services via the remote TupleSpace. + # + # Services are registered with a tuple of the format [:name, klass, + # DRbObject, description]. + + class RingProvider + + ## + # Creates a RingProvider that will provide a +klass+ service running on + # +front+, with a +description+. +renewer+ is optional. + + def initialize(klass, front, desc, renewer = nil) + @tuple = [:name, klass, front, desc] + @renewer = renewer || Rinda::SimpleRenewer.new + end + + ## + # Advertises this service on the primary remote TupleSpace. + + def provide + ts = Rinda::RingFinger.primary + ts.write(@tuple, @renewer) + end + + end + +end + +if __FILE__ == $0 + DRb.start_service + case ARGV.shift + when 's' + require 'rinda/tuplespace' + ts = Rinda::TupleSpace.new + place = Rinda::RingServer.new(ts) + $stdin.gets + when 'w' + finger = Rinda::RingFinger.new(nil) + finger.lookup_ring do |ts2| + p ts2 + ts2.write([:hello, :world]) + end + when 'r' + finger = Rinda::RingFinger.new(nil) + finger.lookup_ring do |ts2| + p ts2 + p ts2.take([nil, nil]) + end + end +end + diff --git a/lib/rinda/tuplespace.rb b/lib/rinda/tuplespace.rb new file mode 100644 index 0000000..6ca30a7 --- /dev/null +++ b/lib/rinda/tuplespace.rb @@ -0,0 +1,642 @@ +require 'monitor' +require 'thread' +require 'drb/drb' +require 'rinda/rinda' +require 'enumerator' +require 'forwardable' + +module Rinda + + ## + # A TupleEntry is a Tuple (i.e. a possible entry in some Tuplespace) + # together with expiry and cancellation data. + + class TupleEntry + + include DRbUndumped + + attr_accessor :expires + + ## + # Creates a TupleEntry based on +ary+ with an optional renewer or expiry + # time +sec+. + # + # A renewer must implement the +renew+ method which returns a Numeric, + # nil, or true to indicate when the tuple has expired. + + def initialize(ary, sec=nil) + @cancel = false + @expires = nil + @tuple = make_tuple(ary) + @renewer = nil + renew(sec) + end + + ## + # Marks this TupleEntry as canceled. + + def cancel + @cancel = true + end + + ## + # A TupleEntry is dead when it is canceled or expired. + + def alive? + !canceled? && !expired? + end + + ## + # Return the object which makes up the tuple itself: the Array + # or Hash. + + def value; @tuple.value; end + + ## + # Returns the canceled status. + + def canceled?; @cancel; end + + ## + # Has this tuple expired? (true/false). + # + # A tuple has expired when its expiry timer based on the +sec+ argument to + # #initialize runs out. + + def expired? + return true unless @expires + return false if @expires > Time.now + return true if @renewer.nil? + renew(@renewer) + return true unless @expires + return @expires < Time.now + end + + ## + # Reset the expiry time according to +sec_or_renewer+. + # + # +nil+:: it is set to expire in the far future. + # +false+:: it has expired. + # Numeric:: it will expire in that many seconds. + # + # Otherwise the argument refers to some kind of renewer object + # which will reset its expiry time. + + def renew(sec_or_renewer) + sec, @renewer = get_renewer(sec_or_renewer) + @expires = make_expires(sec) + end + + ## + # Returns an expiry Time based on +sec+ which can be one of: + # Numeric:: +sec+ seconds into the future + # +true+:: the expiry time is the start of 1970 (i.e. expired) + # +nil+:: it is Tue Jan 19 03:14:07 GMT Standard Time 2038 (i.e. when + # UNIX clocks will die) + + def make_expires(sec=nil) + case sec + when Numeric + Time.now + sec + when true + Time.at(1) + when nil + Time.at(2**31-1) + end + end + + ## + # Retrieves +key+ from the tuple. + + def [](key) + @tuple[key] + end + + ## + # Fetches +key+ from the tuple. + + def fetch(key) + @tuple.fetch(key) + end + + ## + # The size of the tuple. + + def size + @tuple.size + end + + ## + # Creates a Rinda::Tuple for +ary+. + + def make_tuple(ary) + Rinda::Tuple.new(ary) + end + + private + + ## + # Returns a valid argument to make_expires and the renewer or nil. + # + # Given +true+, +nil+, or Numeric, returns that value and +nil+ (no actual + # renewer). Otherwise it returns an expiry value from calling +it.renew+ + # and the renewer. + + def get_renewer(it) + case it + when Numeric, true, nil + return it, nil + else + begin + return it.renew, it + rescue Exception + return it, nil + end + end + end + + end + + ## + # A TemplateEntry is a Template together with expiry and cancellation data. + + class TemplateEntry < TupleEntry + ## + # Matches this TemplateEntry against +tuple+. See Template#match for + # details on how a Template matches a Tuple. + + def match(tuple) + @tuple.match(tuple) + end + + alias === match + + def make_tuple(ary) # :nodoc: + Rinda::Template.new(ary) + end + + end + + ## + # Documentation? + + class WaitTemplateEntry < TemplateEntry + + attr_reader :found + + def initialize(place, ary, expires=nil) + super(ary, expires) + @place = place + @cond = place.new_cond + @found = nil + end + + def cancel + super + signal + end + + def wait + @cond.wait + end + + def read(tuple) + @found = tuple + signal + end + + def signal + @place.synchronize do + @cond.signal + end + end + + end + + ## + # A NotifyTemplateEntry is returned by TupleSpace#notify and is notified of + # TupleSpace changes. You may receive either your subscribed event or the + # 'close' event when iterating over notifications. + # + # See TupleSpace#notify_event for valid notification types. + # + # == Example + # + # ts = Rinda::TupleSpace.new + # observer = ts.notify 'write', [nil] + # + # Thread.start do + # observer.each { |t| p t } + # end + # + # 3.times { |i| ts.write [i] } + # + # Outputs: + # + # ['write', [0]] + # ['write', [1]] + # ['write', [2]] + + class NotifyTemplateEntry < TemplateEntry + + ## + # Creates a new NotifyTemplateEntry that watches +place+ for +event+s that + # match +tuple+. + + def initialize(place, event, tuple, expires=nil) + ary = [event, Rinda::Template.new(tuple)] + super(ary, expires) + @queue = Queue.new + @done = false + end + + ## + # Called by TupleSpace to notify this NotifyTemplateEntry of a new event. + + def notify(ev) + @queue.push(ev) + end + + ## + # Retrieves a notification. Raises RequestExpiredError when this + # NotifyTemplateEntry expires. + + def pop + raise RequestExpiredError if @done + it = @queue.pop + @done = true if it[0] == 'close' + return it + end + + ## + # Yields event/tuple pairs until this NotifyTemplateEntry expires. + + def each # :yields: event, tuple + while !@done + it = pop + yield(it) + end + rescue + ensure + cancel + end + + end + + ## + # TupleBag is an unordered collection of tuples. It is the basis + # of Tuplespace. + + class TupleBag + class TupleBin + extend Forwardable + def_delegators '@bin', :find_all, :delete_if, :each, :empty? + + def initialize + @bin = [] + end + + def add(tuple) + @bin.push(tuple) + end + + def delete(tuple) + idx = @bin.rindex(tuple) + @bin.delete_at(idx) if idx + end + + def find(&blk) + @bin.reverse_each do |x| + return x if yield(x) + end + nil + end + end + + def initialize # :nodoc: + @hash = {} + @enum = enum_for(:each_entry) + end + + ## + # +true+ if the TupleBag to see if it has any expired entries. + + def has_expires? + @enum.find do |tuple| + tuple.expires + end + end + + ## + # Add +tuple+ to the TupleBag. + + def push(tuple) + key = bin_key(tuple) + @hash[key] ||= TupleBin.new + @hash[key].add(tuple) + end + + ## + # Removes +tuple+ from the TupleBag. + + def delete(tuple) + key = bin_key(tuple) + bin = @hash[key] + return nil unless bin + bin.delete(tuple) + @hash.delete(key) if bin.empty? + tuple + end + + ## + # Finds all live tuples that match +template+. + def find_all(template) + bin_for_find(template).find_all do |tuple| + tuple.alive? && template.match(tuple) + end + end + + ## + # Finds a live tuple that matches +template+. + + def find(template) + bin_for_find(template).find do |tuple| + tuple.alive? && template.match(tuple) + end + end + + ## + # Finds all tuples in the TupleBag which when treated as templates, match + # +tuple+ and are alive. + + def find_all_template(tuple) + @enum.find_all do |template| + template.alive? && template.match(tuple) + end + end + + ## + # Delete tuples which dead tuples from the TupleBag, returning the deleted + # tuples. + + def delete_unless_alive + deleted = [] + @hash.each do |key, bin| + bin.delete_if do |tuple| + if tuple.alive? + false + else + deleted.push(tuple) + true + end + end + end + deleted + end + + private + def each_entry(&blk) + @hash.each do |k, v| + v.each(&blk) + end + end + + def bin_key(tuple) + head = tuple[0] + if head.class == Symbol + return head + else + false + end + end + + def bin_for_find(template) + key = bin_key(template) + key ? @hash.fetch(key, []) : @enum + end + end + + ## + # The Tuplespace manages access to the tuples it contains, + # ensuring mutual exclusion requirements are met. + # + # The +sec+ option for the write, take, move, read and notify methods may + # either be a number of seconds or a Renewer object. + + class TupleSpace + + include DRbUndumped + include MonitorMixin + + ## + # Creates a new TupleSpace. +period+ is used to control how often to look + # for dead tuples after modifications to the TupleSpace. + # + # If no dead tuples are found +period+ seconds after the last + # modification, the TupleSpace will stop looking for dead tuples. + + def initialize(period=60) + super() + @bag = TupleBag.new + @read_waiter = TupleBag.new + @take_waiter = TupleBag.new + @notify_waiter = TupleBag.new + @period = period + @keeper = nil + end + + ## + # Adds +tuple+ + + def write(tuple, sec=nil) + entry = create_entry(tuple, sec) + synchronize do + if entry.expired? + @read_waiter.find_all_template(entry).each do |template| + template.read(tuple) + end + notify_event('write', entry.value) + notify_event('delete', entry.value) + else + @bag.push(entry) + start_keeper if entry.expires + @read_waiter.find_all_template(entry).each do |template| + template.read(tuple) + end + @take_waiter.find_all_template(entry).each do |template| + template.signal + end + notify_event('write', entry.value) + end + end + entry + end + + ## + # Removes +tuple+ + + def take(tuple, sec=nil, &block) + move(nil, tuple, sec, &block) + end + + ## + # Moves +tuple+ to +port+. + + def move(port, tuple, sec=nil) + template = WaitTemplateEntry.new(self, tuple, sec) + yield(template) if block_given? + synchronize do + entry = @bag.find(template) + if entry + port.push(entry.value) if port + @bag.delete(entry) + notify_event('take', entry.value) + return entry.value + end + raise RequestExpiredError if template.expired? + + begin + @take_waiter.push(template) + start_keeper if template.expires + while true + raise RequestCanceledError if template.canceled? + raise RequestExpiredError if template.expired? + entry = @bag.find(template) + if entry + port.push(entry.value) if port + @bag.delete(entry) + notify_event('take', entry.value) + return entry.value + end + template.wait + end + ensure + @take_waiter.delete(template) + end + end + end + + ## + # Reads +tuple+, but does not remove it. + + def read(tuple, sec=nil) + template = WaitTemplateEntry.new(self, tuple, sec) + yield(template) if block_given? + synchronize do + entry = @bag.find(template) + return entry.value if entry + raise RequestExpiredError if template.expired? + + begin + @read_waiter.push(template) + start_keeper if template.expires + template.wait + raise RequestCanceledError if template.canceled? + raise RequestExpiredError if template.expired? + return template.found + ensure + @read_waiter.delete(template) + end + end + end + + ## + # Returns all tuples matching +tuple+. Does not remove the found tuples. + + def read_all(tuple) + template = WaitTemplateEntry.new(self, tuple, nil) + synchronize do + entry = @bag.find_all(template) + entry.collect do |e| + e.value + end + end + end + + ## + # Registers for notifications of +event+. Returns a NotifyTemplateEntry. + # See NotifyTemplateEntry for examples of how to listen for notifications. + # + # +event+ can be: + # 'write':: A tuple was added + # 'take':: A tuple was taken or moved + # 'delete':: A tuple was lost after being overwritten or expiring + # + # The TupleSpace will also notify you of the 'close' event when the + # NotifyTemplateEntry has expired. + + def notify(event, tuple, sec=nil) + template = NotifyTemplateEntry.new(self, event, tuple, sec) + synchronize do + @notify_waiter.push(template) + end + template + end + + private + + def create_entry(tuple, sec) + TupleEntry.new(tuple, sec) + end + + ## + # Removes dead tuples. + + def keep_clean + synchronize do + @read_waiter.delete_unless_alive.each do |e| + e.signal + end + @take_waiter.delete_unless_alive.each do |e| + e.signal + end + @notify_waiter.delete_unless_alive.each do |e| + e.notify(['close']) + end + @bag.delete_unless_alive.each do |e| + notify_event('delete', e.value) + end + end + end + + ## + # Notifies all registered listeners for +event+ of a status change of + # +tuple+. + + def notify_event(event, tuple) + ev = [event, tuple] + @notify_waiter.find_all_template(ev).each do |template| + template.notify(ev) + end + end + + ## + # Creates a thread that scans the tuplespace for expired tuples. + + def start_keeper + return if @keeper && @keeper.alive? + @keeper = Thread.new do + while true + sleep(@period) + synchronize do + break unless need_keeper? + keep_clean + end + end + end + end + + ## + # Checks the tuplespace to see if it needs cleaning. + + def need_keeper? + return true if @bag.has_expires? + return true if @read_waiter.has_expires? + return true if @take_waiter.has_expires? + return true if @notify_waiter.has_expires? + end + + end + +end + diff --git a/lib/rss.rb b/lib/rss.rb new file mode 100644 index 0000000..a1d0f76 --- /dev/null +++ b/lib/rss.rb @@ -0,0 +1,19 @@ +# Copyright (c) 2003-2007 Kouhei Sutou. You can redistribute it and/or +# modify it under the same terms as Ruby. +# +# Author:: Kouhei Sutou +# Tutorial:: http://www.cozmixng.org/~rwiki/?cmd=view;name=RSS+Parser%3A%3ATutorial.en + +require 'rss/1.0' +require 'rss/2.0' +require 'rss/atom' +require 'rss/content' +require 'rss/dublincore' +require 'rss/image' +require 'rss/itunes' +require 'rss/slash' +require 'rss/syndication' +require 'rss/taxonomy' +require 'rss/trackback' + +require "rss/maker" diff --git a/lib/rubygems.rb b/lib/rubygems.rb new file mode 100644 index 0000000..9913b59 --- /dev/null +++ b/lib/rubygems.rb @@ -0,0 +1,889 @@ +# -*- ruby -*- +#-- +# Copyright 2006 by Chad Fowler, Rich Kilmer, Jim Weirich and others. +# All rights reserved. +# See LICENSE.txt for permissions. +#++ + +require 'rubygems/rubygems_version' +require 'rubygems/defaults' +require 'thread' + +module Gem + class LoadError < ::LoadError + attr_accessor :name, :version_requirement + end +end + +module Kernel + + ## + # Use Kernel#gem to activate a specific version of +gem_name+. + # + # +version_requirements+ is a list of version requirements that the + # specified gem must match, most commonly "= example.version.number". See + # Gem::Requirement for how to specify a version requirement. + # + # If you will be activating the latest version of a gem, there is no need to + # call Kernel#gem, Kernel#require will do the right thing for you. + # + # Kernel#gem returns true if the gem was activated, otherwise false. If the + # gem could not be found, didn't match the version requirements, or a + # different version was already activated, an exception will be raised. + # + # Kernel#gem should be called *before* any require statements (otherwise + # RubyGems may load a conflicting library version). + # + # In older RubyGems versions, the environment variable GEM_SKIP could be + # used to skip activation of specified gems, for example to test out changes + # that haven't been installed yet. Now RubyGems defers to -I and the + # RUBYLIB environment variable to skip activation of a gem. + # + # Example: + # + # GEM_SKIP=libA:libB ruby -I../libA -I../libB ./mycode.rb + + def gem(gem_name, *version_requirements) # :doc: + skip_list = (ENV['GEM_SKIP'] || "").split(/:/) + raise Gem::LoadError, "skipping #{gem_name}" if skip_list.include? gem_name + Gem.activate(gem_name, *version_requirements) + end + + private :gem + +end + +## +# Main module to hold all RubyGem classes/modules. + +module Gem + + ConfigMap = {} unless defined?(ConfigMap) + require 'rbconfig' + RbConfig = Config unless defined? ::RbConfig + + ConfigMap.merge!( + :BASERUBY => RbConfig::CONFIG["BASERUBY"], + :EXEEXT => RbConfig::CONFIG["EXEEXT"], + :RUBY_INSTALL_NAME => RbConfig::CONFIG["RUBY_INSTALL_NAME"], + :RUBY_SO_NAME => RbConfig::CONFIG["RUBY_SO_NAME"], + :arch => RbConfig::CONFIG["arch"], + :bindir => RbConfig::CONFIG["bindir"], + :datadir => RbConfig::CONFIG["datadir"], + :libdir => RbConfig::CONFIG["libdir"], + :ruby_install_name => RbConfig::CONFIG["ruby_install_name"], + :ruby_version => RbConfig::CONFIG["ruby_version"], + :sitedir => RbConfig::CONFIG["sitedir"], + :sitelibdir => RbConfig::CONFIG["sitelibdir"], + :vendordir => RbConfig::CONFIG["vendordir"] , + :vendorlibdir => RbConfig::CONFIG["vendorlibdir"] + ) + + DIRECTORIES = %w[cache doc gems specifications] unless defined?(DIRECTORIES) + + MUTEX = Mutex.new + + RubyGemsPackageVersion = RubyGemsVersion + + ## + # An Array of Regexps that match windows ruby platforms. + + WIN_PATTERNS = [ + /bccwin/i, + /cygwin/i, + /djgpp/i, + /mingw/i, + /mswin/i, + /wince/i, + ] + + @@source_index = nil + @@win_platform = nil + + @configuration = nil + @loaded_specs = {} + @platforms = [] + @ruby = nil + @sources = [] + + @post_install_hooks ||= [] + @post_uninstall_hooks ||= [] + @pre_uninstall_hooks ||= [] + @pre_install_hooks ||= [] + + ## + # Activates an installed gem matching +gem+. The gem must satisfy + # +version_requirements+. + # + # Returns true if the gem is activated, false if it is already + # loaded, or an exception otherwise. + # + # Gem#activate adds the library paths in +gem+ to $LOAD_PATH. Before a Gem + # is activated its required Gems are activated. If the version information + # is omitted, the highest version Gem of the supplied name is loaded. If a + # Gem is not found that meets the version requirements or a required Gem is + # not found, a Gem::LoadError is raised. + # + # More information on version requirements can be found in the + # Gem::Requirement and Gem::Version documentation. + + def self.activate(gem, *version_requirements) + if version_requirements.empty? then + version_requirements = Gem::Requirement.default + end + + unless gem.respond_to?(:name) and + gem.respond_to?(:version_requirements) then + gem = Gem::Dependency.new(gem, version_requirements) + end + + matches = Gem.source_index.find_name(gem.name, gem.version_requirements) + report_activate_error(gem) if matches.empty? + + if @loaded_specs[gem.name] then + # This gem is already loaded. If the currently loaded gem is not in the + # list of candidate gems, then we have a version conflict. + existing_spec = @loaded_specs[gem.name] + + unless matches.any? { |spec| spec.version == existing_spec.version } then + raise Gem::Exception, + "can't activate #{gem}, already activated #{existing_spec.full_name}" + end + + return false + end + + # new load + spec = matches.last + return false if spec.loaded? + + spec.loaded = true + @loaded_specs[spec.name] = spec + + # Load dependent gems first + spec.runtime_dependencies.each do |dep_gem| + activate dep_gem + end + + # bin directory must come before library directories + spec.require_paths.unshift spec.bindir if spec.bindir + + require_paths = spec.require_paths.map do |path| + File.join spec.full_gem_path, path + end + + sitelibdir = ConfigMap[:sitelibdir] + + # gem directories must come after -I and ENV['RUBYLIB'] + insert_index = load_path_insert_index + + if insert_index then + # gem directories must come after -I and ENV['RUBYLIB'] + $LOAD_PATH.insert(insert_index, *require_paths) + else + # we are probably testing in core, -I and RUBYLIB don't apply + $LOAD_PATH.unshift(*require_paths) + end + + return true + end + + ## + # An Array of all possible load paths for all versions of all gems in the + # Gem installation. + + def self.all_load_paths + result = [] + + Gem.path.each do |gemdir| + each_load_path all_partials(gemdir) do |load_path| + result << load_path + end + end + + result + end + + ## + # Return all the partial paths in +gemdir+. + + def self.all_partials(gemdir) + Dir[File.join(gemdir, 'gems/*')] + end + + private_class_method :all_partials + + ## + # See if a given gem is available. + + def self.available?(gem, *requirements) + requirements = Gem::Requirement.default if requirements.empty? + + unless gem.respond_to?(:name) and + gem.respond_to?(:version_requirements) then + gem = Gem::Dependency.new gem, requirements + end + + !Gem.source_index.search(gem).empty? + end + + ## + # The mode needed to read a file as straight binary. + + def self.binary_mode + @binary_mode ||= RUBY_VERSION > '1.9' ? 'rb:ascii-8bit' : 'rb' + end + + ## + # The path where gem executables are to be installed. + + def self.bindir(install_dir=Gem.dir) + return File.join(install_dir, 'bin') unless + install_dir.to_s == Gem.default_dir + Gem.default_bindir + end + + ## + # Reset the +dir+ and +path+ values. The next time +dir+ or +path+ + # is requested, the values will be calculated from scratch. This is + # mainly used by the unit tests to provide test isolation. + + def self.clear_paths + @gem_home = nil + @gem_path = nil + @user_home = nil + + @@source_index = nil + + MUTEX.synchronize do + @searcher = nil + end + end + + ## + # The path to standard location of the user's .gemrc file. + + def self.config_file + File.join Gem.user_home, '.gemrc' + end + + ## + # The standard configuration object for gems. + + def self.configuration + @configuration ||= Gem::ConfigFile.new [] + end + + ## + # Use the given configuration object (which implements the ConfigFile + # protocol) as the standard configuration object. + + def self.configuration=(config) + @configuration = config + end + + ## + # The path the the data directory specified by the gem name. If the + # package is not available as a gem, return nil. + + def self.datadir(gem_name) + spec = @loaded_specs[gem_name] + return nil if spec.nil? + File.join(spec.full_gem_path, 'data', gem_name) + end + + ## + # A Zlib::Deflate.deflate wrapper + + def self.deflate(data) + require 'zlib' + Zlib::Deflate.deflate data + end + + ## + # The path where gems are to be installed. + + def self.dir + @gem_home ||= nil + set_home(ENV['GEM_HOME'] || Gem.configuration.home || default_dir) unless @gem_home + @gem_home + end + + ## + # Expand each partial gem path with each of the required paths specified + # in the Gem spec. Each expanded path is yielded. + + def self.each_load_path(partials) + partials.each do |gp| + base = File.basename(gp) + specfn = File.join(dir, "specifications", base + ".gemspec") + if File.exist?(specfn) + spec = eval(File.read(specfn)) + spec.require_paths.each do |rp| + yield(File.join(gp, rp)) + end + else + filename = File.join(gp, 'lib') + yield(filename) if File.exist?(filename) + end + end + end + + private_class_method :each_load_path + + ## + # Quietly ensure the named Gem directory contains all the proper + # subdirectories. If we can't create a directory due to a permission + # problem, then we will silently continue. + + def self.ensure_gem_subdirectories(gemdir) + require 'fileutils' + + Gem::DIRECTORIES.each do |filename| + fn = File.join gemdir, filename + FileUtils.mkdir_p fn rescue nil unless File.exist? fn + end + end + + ## + # Returns a list of paths matching +file+ that can be used by a gem to pick + # up features from other gems. For example: + # + # Gem.find_files('rdoc/discover').each do |path| load path end + # + # find_files does not search $LOAD_PATH for files, only gems. + + def self.find_files(path) + specs = searcher.find_all path + + specs.map do |spec| + searcher.matching_files spec, path + end.flatten + end + + ## + # Finds the user's home directory. + #-- + # Some comments from the ruby-talk list regarding finding the home + # directory: + # + # I have HOME, USERPROFILE and HOMEDRIVE + HOMEPATH. Ruby seems + # to be depending on HOME in those code samples. I propose that + # it should fallback to USERPROFILE and HOMEDRIVE + HOMEPATH (at + # least on Win32). + + def self.find_home + ['HOME', 'USERPROFILE'].each do |homekey| + return ENV[homekey] if ENV[homekey] + end + + if ENV['HOMEDRIVE'] && ENV['HOMEPATH'] then + return "#{ENV['HOMEDRIVE']}#{ENV['HOMEPATH']}" + end + + begin + File.expand_path("~") + rescue + if File::ALT_SEPARATOR then + "C:/" + else + "/" + end + end + end + + private_class_method :find_home + + ## + # Zlib::GzipReader wrapper that unzips +data+. + + def self.gunzip(data) + require 'stringio' + require 'zlib' + data = StringIO.new data + + Zlib::GzipReader.new(data).read + end + + ## + # Zlib::GzipWriter wrapper that zips +data+. + + def self.gzip(data) + require 'stringio' + require 'zlib' + zipped = StringIO.new + + Zlib::GzipWriter.wrap zipped do |io| io.write data end + + zipped.string + end + + ## + # A Zlib::Inflate#inflate wrapper + + def self.inflate(data) + require 'zlib' + Zlib::Inflate.inflate data + end + + ## + # Return a list of all possible load paths for the latest version for all + # gems in the Gem installation. + + def self.latest_load_paths + result = [] + + Gem.path.each do |gemdir| + each_load_path(latest_partials(gemdir)) do |load_path| + result << load_path + end + end + + result + end + + ## + # Return only the latest partial paths in the given +gemdir+. + + def self.latest_partials(gemdir) + latest = {} + all_partials(gemdir).each do |gp| + base = File.basename(gp) + if base =~ /(.*)-((\d+\.)*\d+)/ then + name, version = $1, $2 + ver = Gem::Version.new(version) + if latest[name].nil? || ver > latest[name][0] + latest[name] = [ver, gp] + end + end + end + latest.collect { |k,v| v[1] } + end + + private_class_method :latest_partials + + ## + # The index to insert activated gem paths into the $LOAD_PATH. + # + # Defaults to the site lib directory unless gem_prelude.rb has loaded paths, + # then it inserts the activated gem's paths before the gem_prelude.rb paths + # so you can override the gem_prelude.rb default $LOAD_PATH paths. + + def self.load_path_insert_index + index = $LOAD_PATH.index ConfigMap[:sitelibdir] + + $LOAD_PATH.each_with_index do |path, i| + if path.instance_variables.include?(:@gem_prelude_index) or + path.instance_variables.include?('@gem_prelude_index') then + index = i + break + end + end + + index + end + + ## + # The file name and line number of the caller of the caller of this method. + + def self.location_of_caller + caller[1] =~ /(.*?):(\d+)$/i + file = $1 + lineno = $2.to_i + + [file, lineno] + end + + ## + # manage_gems is useless and deprecated. Don't call it anymore. + + def self.manage_gems # :nodoc: + file, lineno = location_of_caller + + warn "#{file}:#{lineno}:Warning: Gem::manage_gems is deprecated and will be removed on or after March 2009." + end + + ## + # The version of the Marshal format for your Ruby. + + def self.marshal_version + "#{Marshal::MAJOR_VERSION}.#{Marshal::MINOR_VERSION}" + end + + ## + # Array of paths to search for Gems. + + def self.path + @gem_path ||= nil + + unless @gem_path then + paths = [ENV['GEM_PATH'] || Gem.configuration.path || default_path] + + if defined?(APPLE_GEM_HOME) and not ENV['GEM_PATH'] then + paths << APPLE_GEM_HOME + end + + set_paths paths.compact.join(File::PATH_SEPARATOR) + end + + @gem_path + end + + ## + # Set array of platforms this RubyGems supports (primarily for testing). + + def self.platforms=(platforms) + @platforms = platforms + end + + ## + # Array of platforms this RubyGems supports. + + def self.platforms + @platforms ||= [] + if @platforms.empty? + @platforms = [Gem::Platform::RUBY, Gem::Platform.local] + end + @platforms + end + + ## + # Adds a post-install hook that will be passed an Gem::Installer instance + # when Gem::Installer#install is called + + def self.post_install(&hook) + @post_install_hooks << hook + end + + ## + # Adds a post-uninstall hook that will be passed a Gem::Uninstaller instance + # and the spec that was uninstalled when Gem::Uninstaller#uninstall is + # called + + def self.post_uninstall(&hook) + @post_uninstall_hooks << hook + end + + ## + # Adds a pre-install hook that will be passed an Gem::Installer instance + # when Gem::Installer#install is called + + def self.pre_install(&hook) + @pre_install_hooks << hook + end + + ## + # Adds a pre-uninstall hook that will be passed an Gem::Uninstaller instance + # and the spec that will be uninstalled when Gem::Uninstaller#uninstall is + # called + + def self.pre_uninstall(&hook) + @pre_uninstall_hooks << hook + end + + ## + # The directory prefix this RubyGems was installed at. + + def self.prefix + prefix = File.dirname File.expand_path(__FILE__) + + if File.dirname(prefix) == File.expand_path(ConfigMap[:sitelibdir]) or + File.dirname(prefix) == File.expand_path(ConfigMap[:libdir]) or + 'lib' != File.basename(prefix) then + nil + else + File.dirname prefix + end + end + + ## + # Refresh source_index from disk and clear searcher. + + def self.refresh + source_index.refresh! + + MUTEX.synchronize do + @searcher = nil + end + end + + ## + # Safely read a file in binary mode on all platforms. + + def self.read_binary(path) + File.open path, binary_mode do |f| f.read end + end + + ## + # Report a load error during activation. The message of load error + # depends on whether it was a version mismatch or if there are not gems of + # any version by the requested name. + + def self.report_activate_error(gem) + matches = Gem.source_index.find_name(gem.name) + + if matches.empty? then + error = Gem::LoadError.new( + "Could not find RubyGem #{gem.name} (#{gem.version_requirements})\n") + else + error = Gem::LoadError.new( + "RubyGem version error: " + + "#{gem.name}(#{matches.first.version} not #{gem.version_requirements})\n") + end + + error.name = gem.name + error.version_requirement = gem.version_requirements + raise error + end + + private_class_method :report_activate_error + + def self.required_location(gemname, libfile, *version_constraints) + version_constraints = Gem::Requirement.default if version_constraints.empty? + matches = Gem.source_index.find_name(gemname, version_constraints) + return nil if matches.empty? + spec = matches.last + spec.require_paths.each do |path| + result = File.join(spec.full_gem_path, path, libfile) + return result if File.exist?(result) + end + nil + end + + ## + # The path to the running Ruby interpreter. + + def self.ruby + if @ruby.nil? then + @ruby = File.join(ConfigMap[:bindir], + ConfigMap[:ruby_install_name]) + @ruby << ConfigMap[:EXEEXT] + + # escape string in case path to ruby executable contain spaces. + @ruby.sub!(/.*\s.*/m, '"\&"') + end + + @ruby + end + + ## + # A Gem::Version for the currently running ruby. + + def self.ruby_version + return @ruby_version if defined? @ruby_version + version = RUBY_VERSION.dup + version << ".#{RUBY_PATCHLEVEL}" if defined? RUBY_PATCHLEVEL + @ruby_version = Gem::Version.new version + end + + ## + # The GemPathSearcher object used to search for matching installed gems. + + def self.searcher + MUTEX.synchronize do + @searcher ||= Gem::GemPathSearcher.new + end + end + + ## + # Set the Gem home directory (as reported by Gem.dir). + + def self.set_home(home) + home = home.gsub(File::ALT_SEPARATOR, File::SEPARATOR) if File::ALT_SEPARATOR + @gem_home = home + ensure_gem_subdirectories(@gem_home) + end + + private_class_method :set_home + + ## + # Set the Gem search path (as reported by Gem.path). + + def self.set_paths(gpaths) + if gpaths + @gem_path = gpaths.split(File::PATH_SEPARATOR) + + if File::ALT_SEPARATOR then + @gem_path.map! do |path| + path.gsub File::ALT_SEPARATOR, File::SEPARATOR + end + end + + @gem_path << Gem.dir + else + # TODO: should this be Gem.default_path instead? + @gem_path = [Gem.dir] + end + + @gem_path.uniq! + @gem_path.each do |path| + if 0 == File.expand_path(path).index(Gem.user_home) + next unless File.directory? Gem.user_home + unless win_platform? then + # only create by matching user + next if Etc.getpwuid.uid != File::Stat.new(Gem.user_home).uid + end + end + ensure_gem_subdirectories path + end + end + + private_class_method :set_paths + + ## + # Returns the Gem::SourceIndex of specifications that are in the Gem.path + + def self.source_index + @@source_index ||= SourceIndex.from_installed_gems + end + + ## + # Returns an Array of sources to fetch remote gems from. If the sources + # list is empty, attempts to load the "sources" gem, then uses + # default_sources if it is not installed. + + def self.sources + if @sources.empty? then + begin + gem 'sources', '> 0.0.1' + require 'sources' + rescue LoadError + @sources = default_sources + end + end + + @sources + end + + ## + # Need to be able to set the sources without calling + # Gem.sources.replace since that would cause an infinite loop. + + def self.sources=(new_sources) + @sources = new_sources + end + + ## + # Glob pattern for require-able path suffixes. + + def self.suffix_pattern + @suffix_pattern ||= "{#{suffixes.join(',')}}" + end + + ## + # Suffixes for require-able paths. + + def self.suffixes + ['', '.rb', '.rbw', '.so', '.bundle', '.dll', '.sl', '.jar'] + end + + ## + # Use the +home+ and +paths+ values for Gem.dir and Gem.path. Used mainly + # by the unit tests to provide environment isolation. + + def self.use_paths(home, paths=[]) + clear_paths + set_home(home) if home + set_paths(paths.join(File::PATH_SEPARATOR)) if paths + end + + ## + # The home directory for the user. + + def self.user_home + @user_home ||= find_home + end + + ## + # Is this a windows platform? + + def self.win_platform? + if @@win_platform.nil? then + @@win_platform = !!WIN_PATTERNS.find { |r| RUBY_PLATFORM =~ r } + end + + @@win_platform + end + + class << self + + attr_reader :loaded_specs + + ## + # The list of hooks to be run before Gem::Install#install does any work + + attr_reader :post_install_hooks + + ## + # The list of hooks to be run before Gem::Uninstall#uninstall does any + # work + + attr_reader :post_uninstall_hooks + + ## + # The list of hooks to be run after Gem::Install#install is finished + + attr_reader :pre_install_hooks + + ## + # The list of hooks to be run after Gem::Uninstall#uninstall is finished + + attr_reader :pre_uninstall_hooks + + # :stopdoc: + + alias cache source_index # an alias for the old name + + # :startdoc: + + end + + MARSHAL_SPEC_DIR = "quick/Marshal.#{Gem.marshal_version}/" + + YAML_SPEC_DIR = 'quick/' + +end + +module Config + # :stopdoc: + class << self + # Return the path to the data directory associated with the named + # package. If the package is loaded as a gem, return the gem + # specific data directory. Otherwise return a path to the share + # area as define by "#{ConfigMap[:datadir]}/#{package_name}". + def datadir(package_name) + Gem.datadir(package_name) || + File.join(Gem::ConfigMap[:datadir], package_name) + end + end + # :startdoc: +end + +require 'rubygems/exceptions' +require 'rubygems/version' +require 'rubygems/requirement' +require 'rubygems/dependency' +require 'rubygems/gem_path_searcher' # Needed for Kernel#gem +require 'rubygems/source_index' # Needed for Kernel#gem +require 'rubygems/platform' +require 'rubygems/builder' # HACK: Needed for rake's package task. + +begin + require 'rubygems/defaults/operating_system' +rescue LoadError +end + +if defined?(RUBY_ENGINE) then + begin + require "rubygems/defaults/#{RUBY_ENGINE}" + rescue LoadError + end +end + +require 'rubygems/config_file' + +if RUBY_VERSION < '1.9' then + require 'rubygems/custom_require' +end + +Gem.clear_paths diff --git a/lib/scanf.rb b/lib/scanf.rb new file mode 100644 index 0000000..549f540 --- /dev/null +++ b/lib/scanf.rb @@ -0,0 +1,703 @@ +# scanf for Ruby +# +# $Release Version: 1.1.2 $ +# $Revision: 19094 $ +# $Id: scanf.rb 19094 2008-09-03 12:54:13Z dblack $ +# $Author: dblack $ +# +# A product of the Austin Ruby Codefest (Austin, Texas, August 2002) + +=begin + +=scanf for Ruby + +==Description + +scanf for Ruby is an implementation of the C function scanf(3), +modified as necessary for Ruby compatibility. + +The methods provided are String#scanf, IO#scanf, and +Kernel#scanf. Kernel#scanf is a wrapper around STDIN.scanf. IO#scanf +can be used on any IO stream, including file handles and sockets. +scanf can be called either with or without a block. + +scanf for Ruby scans an input string or stream according to a +format, as described below ("Conversions"), and returns an +array of matches between the format and the input. The format is +defined in a string, and is similar (though not identical) to the +formats used in Kernel#printf and Kernel#sprintf. + +The format may contain conversion specifiers, which tell scanf +what form (type) each particular matched substring should be converted +to (e.g., decimal integer, floating point number, literal string, +etc.) The matches and conversions take place from left to right, and +the conversions themselves are returned as an array. + +The format string may also contain characters other than those in the +conversion specifiers. White space (blanks, tabs, or newlines) in the +format string matches any amount of white space, including none, in +the input. Everything else matches only itself. + +Scanning stops, and scanf returns, when any input character fails to +match the specifications in the format string, or when input is +exhausted, or when everything in the format string has been +matched. All matches found up to the stopping point are returned in +the return array (or yielded to the block, if a block was given). + + +==Basic usage + + require 'scanf.rb' + + # String#scanf and IO#scanf take a single argument (a format string) + array = aString.scanf("%d%s") + array = anIO.scanf("%d%s") + + # Kernel#scanf reads from STDIN + array = scanf("%d%s") + +==Block usage + +When called with a block, scanf keeps scanning the input, cycling back +to the beginning of the format string, and yields a new array of +conversions to the block every time the format string is matched +(including partial matches, but not including complete failures). The +actual return value of scanf when called with a block is an array +containing the results of all the executions of the block. + + str = "123 abc 456 def 789 ghi" + str.scanf("%d%s") { |num,str| [ num * 2, str.upcase ] } + # => [[246, "ABC"], [912, "DEF"], [1578, "GHI"]] + +==Conversions + +The single argument to scanf is a format string, which generally +includes one or more conversion specifiers. Conversion specifiers +begin with the percent character ('%') and include information about +what scanf should next scan for (string, decimal number, single +character, etc.). + +There may be an optional maximum field width, expressed as a decimal +integer, between the % and the conversion. If no width is given, a +default of `infinity' is used (with the exception of the %c specifier; +see below). Otherwise, given a field width of n for a given +conversion, at most n characters are scanned in processing +that conversion. Before conversion begins, most conversions skip +white space in the input string; this white space is not counted +against the field width. + +The following conversions are available. (See the files EXAMPLES +and tests/scanftests.rb for examples.) + +[%] + Matches a literal `%'. That is, `%%' in the format string matches a + single input `%' character. No conversion is done, and the resulting + '%' is not included in the return array. + +[d] + Matches an optionally signed decimal integer. + +[u] + Same as d. + +[i] + Matches an optionally signed integer. The integer is read in base + 16 if it begins with `0x' or `0X', in base 8 if it begins with `0', + and in base 10 other- wise. Only characters that correspond to the + base are recognized. + +[o] + Matches an optionally signed octal integer. + +[x,X] + Matches an optionally signed hexadecimal integer, + +[f,g,e,E] + Matches an optionally signed floating-point number. + +[s] + Matches a sequence of non-white-space character. The input string stops at + white space or at the maximum field width, whichever occurs first. + +[c] + Matches a single character, or a sequence of n characters if a + field width of n is specified. The usual skip of leading white + space is suppressed. To skip white space first, use an explicit space in + the format. + +[[] + Matches a nonempty sequence of characters from the specified set + of accepted characters. The usual skip of leading white space is + suppressed. This bracketed sub-expression is interpreted exactly like a + character class in a Ruby regular expression. (In fact, it is placed as-is + in a regular expression.) The matching against the input string ends with + the appearance of a character not in (or, with a circumflex, in) the set, + or when the field width runs out, whichever comes first. + +===Assignment suppression + +To require that a particular match occur, but without including the result +in the return array, place the assignment suppression flag, which is +the star character ('*'), immediately after the leading '%' of a format +specifier (just before the field width, if any). + +==Examples + +See the files EXAMPLES and tests/scanftests.rb. + +==scanf for Ruby compared with scanf in C + +scanf for Ruby is based on the C function scanf(3), but with modifications, +dictated mainly by the underlying differences between the languages. + +===Unimplemented flags and specifiers + +* The only flag implemented in scanf for Ruby is '*' (ignore + upcoming conversion). Many of the flags available in C versions of scanf(4) + have to do with the type of upcoming pointer arguments, and are literally + meaningless in Ruby. + +* The n specifier (store number of characters consumed so far in + next pointer) is not implemented. + +* The p specifier (match a pointer value) is not implemented. + +===Altered specifiers + +[o,u,x,X] + In scanf for Ruby, all of these specifiers scan for an optionally signed + integer, rather than for an unsigned integer like their C counterparts. + +===Return values + +scanf for Ruby returns an array of successful conversions, whereas +scanf(3) returns the number of conversions successfully +completed. (See below for more details on scanf for Ruby's return +values.) + +==Return values + +Without a block, scanf returns an array containing all the conversions +it has found. If none are found, scanf will return an empty array. An +unsuccesful match is never ignored, but rather always signals the end +of the scanning operation. If the first unsuccessful match takes place +after one or more successful matches have already taken place, the +returned array will contain the results of those successful matches. + +With a block scanf returns a 'map'-like array of transformations from +the block -- that is, an array reflecting what the block did with each +yielded result from the iterative scanf operation. (See "Block +usage", above.) + +==Test suite + +scanf for Ruby includes a suite of unit tests (requiring the +TestUnit package), which can be run with the command ruby +tests/scanftests.rb or the command make test. + +==Current limitations and bugs + +When using IO#scanf under Windows, make sure you open your files in +binary mode: + + File.open("filename", "rb") + +so that scanf can keep track of characters correctly. + +Support for character classes is reasonably complete (since it +essentially piggy-backs on Ruby's regular expression handling of +character classes), but users are advised that character class testing +has not been exhaustive, and that they should exercise some caution +in using any of the more complex and/or arcane character class +idioms. + + +==Technical notes + +===Rationale behind scanf for Ruby + +The impetus for a scanf implementation in Ruby comes chiefly from the fact +that existing pattern matching operations, such as Regexp#match and +String#scan, return all results as strings, which have to be converted to +integers or floats explicitly in cases where what's ultimately wanted are +integer or float values. + +===Design of scanf for Ruby + +scanf for Ruby is essentially a -to- converter. + +When scanf is called, a FormatString object is generated from the +format string ("%d%s...") argument. The FormatString object breaks the +format string down into atoms ("%d", "%5f", "blah", etc.), and from +each atom it creates a FormatSpecifier object, which it +saves. + +Each FormatSpecifier has a regular expression fragment and a "handler" +associated with it. For example, the regular expression fragment +associated with the format "%d" is "([-+]?\d+)", and the handler +associated with it is a wrapper around String#to_i. scanf itself calls +FormatString#match, passing in the input string. FormatString#match +iterates through its FormatSpecifiers; for each one, it matches the +corresponding regular expression fragment against the string. If +there's a match, it sends the matched string to the handler associated +with the FormatSpecifier. + +Thus, to follow up the "%d" example: if "123" occurs in the input +string when a FormatSpecifier consisting of "%d" is reached, the "123" +will be matched against "([-+]?\d+)", and the matched string will be +rendered into an integer by a call to to_i. + +The rendered match is then saved to an accumulator array, and the +input string is reduced to the post-match substring. Thus the string +is "eaten" from the left as the FormatSpecifiers are applied in +sequence. (This is done to a duplicate string; the original string is +not altered.) + +As soon as a regular expression fragment fails to match the string, or +when the FormatString object runs out of FormatSpecifiers, scanning +stops and results accumulated so far are returned in an array. + +==License and copyright + +Copyright:: (c) 2002-2003 David Alan Black +License:: Distributed on the same licensing terms as Ruby itself + +==Warranty disclaimer + +This software is provided "as is" and without any express or implied +warranties, including, without limitation, the implied warranties of +merchantibility and fitness for a particular purpose. + +==Credits and acknowledgements + +scanf for Ruby was developed as the major activity of the Austin +Ruby Codefest (Austin, Texas, August 2002). + +Principal author:: David Alan Black (mailto:dblack@superlink.net) +Co-author:: Hal Fulton (mailto:hal9000@hypermetrics.com) +Project contributors:: Nolan Darilek, Jason Johnston + +Thanks to Hal Fulton for hosting the Codefest. + +Thanks to Matz for suggestions about the class design. + +Thanks to Gavin Sinclair for some feedback on the documentation. + +The text for parts of this document, especially the Description and +Conversions sections, above, were adapted from the Linux Programmer's +Manual manpage for scanf(3), dated 1995-11-01. + +==Bugs and bug reports + +scanf for Ruby is based on something of an amalgam of C scanf +implementations and documentation, rather than on a single canonical +description. Suggestions for features and behaviors which appear in +other scanfs, and would be meaningful in Ruby, are welcome, as are +reports of suspicious behaviors and/or bugs. (Please see "Credits and +acknowledgements", above, for email addresses.) + +=end + +module Scanf + + class FormatSpecifier + + attr_reader :re_string, :matched_string, :conversion, :matched + + private + + def skip; /^\s*%\*/.match(@spec_string); end + + def extract_float(s); s.to_f if s &&! skip; end + def extract_decimal(s); s.to_i if s &&! skip; end + def extract_hex(s); s.hex if s &&! skip; end + def extract_octal(s); s.oct if s &&! skip; end + def extract_integer(s); Integer(s) if s &&! skip; end + def extract_plain(s); s unless skip; end + + def nil_proc(s); nil; end + + public + + def to_s + @spec_string + end + + def count_space? + /(?:\A|\S)%\*?\d*c|%\d*\[/.match(@spec_string) + end + + def initialize(str) + @spec_string = str + h = '[A-Fa-f0-9]' + + @re_string, @handler = + case @spec_string + + # %[[:...:]] + when /%\*?(\[\[:[a-z]+:\]\])/ + [ "(#{$1}+)", :extract_plain ] + + # %5[[:...:]] + when /%\*?(\d+)(\[\[:[a-z]+:\]\])/ + [ "(#{$2}{1,#{$1}})", :extract_plain ] + + # %[...] + when /%\*?\[([^\]]*)\]/ + yes = $1 + if /^\^/.match(yes) then no = yes[1..-1] else no = '^' + yes end + [ "([#{yes}]+)(?=[#{no}]|\\z)", :extract_plain ] + + # %5[...] + when /%\*?(\d+)\[([^\]]*)\]/ + yes = $2 + w = $1 + [ "([#{yes}]{1,#{w}})", :extract_plain ] + + # %i + when /%\*?i/ + [ "([-+]?(?:(?:0[0-7]+)|(?:0[Xx]#{h}+)|(?:[1-9]\\d*)))", :extract_integer ] + + # %5i + when /%\*?(\d+)i/ + n = $1.to_i + s = "(" + if n > 1 then s += "[1-9]\\d{1,#{n-1}}|" end + if n > 1 then s += "0[0-7]{1,#{n-1}}|" end + if n > 2 then s += "[-+]0[0-7]{1,#{n-2}}|" end + if n > 2 then s += "[-+][1-9]\\d{1,#{n-2}}|" end + if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end + if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end + s += "\\d" + s += ")" + [ s, :extract_integer ] + + # %d, %u + when /%\*?[du]/ + [ '([-+]?\d+)', :extract_decimal ] + + # %5d, %5u + when /%\*?(\d+)[du]/ + n = $1.to_i + s = "(" + if n > 1 then s += "[-+]\\d{1,#{n-1}}|" end + s += "\\d{1,#{$1}})" + [ s, :extract_decimal ] + + # %x + when /%\*?[Xx]/ + [ "([-+]?(?:0[Xx])?#{h}+)", :extract_hex ] + + # %5x + when /%\*?(\d+)[Xx]/ + n = $1.to_i + s = "(" + if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end + if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end + if n > 1 then s += "[-+]#{h}{1,#{n-1}}|" end + s += "#{h}{1,#{n}}" + s += ")" + [ s, :extract_hex ] + + # %o + when /%\*?o/ + [ '([-+]?[0-7]+)', :extract_octal ] + + # %5o + when /%\*?(\d+)o/ + [ "([-+][0-7]{1,#{$1.to_i-1}}|[0-7]{1,#{$1}})", :extract_octal ] + + # %f + when /%\*?f/ + [ '([-+]?((\d+(?>(?=[^\d.]|$)))|(\d*(\.(\d*([eE][-+]?\d+)?)))))', :extract_float ] + + # %5f + when /%\*?(\d+)f/ + [ "(\\S{1,#{$1}})", :extract_float ] + + # %5s + when /%\*?(\d+)s/ + [ "(\\S{1,#{$1}})", :extract_plain ] + + # %s + when /%\*?s/ + [ '(\S+)', :extract_plain ] + + # %c + when /\s%\*?c/ + [ "\\s*(.)", :extract_plain ] + + # %c + when /%\*?c/ + [ "(.)", :extract_plain ] + + # %5c (whitespace issues are handled by the count_*_space? methods) + when /%\*?(\d+)c/ + [ "(.{1,#{$1}})", :extract_plain ] + + # %% + when /%%/ + [ '(\s*%)', :nil_proc ] + + # literal characters + else + [ "(#{Regexp.escape(@spec_string)})", :nil_proc ] + end + + @re_string = '\A' + @re_string + end + + def to_re + Regexp.new(@re_string,Regexp::MULTILINE) + end + + def match(str) + @matched = false + s = str.dup + s.sub!(/\A\s+/,'') unless count_space? + res = to_re.match(s) + if res + @conversion = send(@handler, res[1]) + @matched_string = @conversion.to_s + @matched = true + end + res + end + + def letter + @spec_string[/%\*?\d*([a-z\[])/, 1] + end + + def width + w = @spec_string[/%\*?(\d+)/, 1] + w && w.to_i + end + + def mid_match? + return false unless @matched + cc_no_width = letter == '[' &&! width + c_or_cc_width = (letter == 'c' || letter == '[') && width + width_left = c_or_cc_width && (matched_string.size < width) + + return width_left || cc_no_width + end + + end + + class FormatString + + attr_reader :string_left, :last_spec_tried, + :last_match_tried, :matched_count, :space + + SPECIFIERS = 'diuXxofeEgsc' + REGEX = / + # possible space, followed by... + (?:\s* + # percent sign, followed by... + % + # another percent sign, or... + (?:%| + # optional assignment suppression flag + \*? + # optional maximum field width + \d* + # named character class, ... + (?:\[\[:\w+:\]\]| + # traditional character class, or... + \[[^\]]*\]| + # specifier letter. + [#{SPECIFIERS}])))| + # or miscellaneous characters + [^%\s]+/ix + + def initialize(str) + @specs = [] + @i = 1 + s = str.to_s + return unless /\S/.match(s) + @space = true if /\s\z/.match(s) + @specs.replace s.scan(REGEX).map {|spec| FormatSpecifier.new(spec) } + end + + def to_s + @specs.join('') + end + + def prune(n=matched_count) + n.times { @specs.shift } + end + + def spec_count + @specs.size + end + + def last_spec + @i == spec_count - 1 + end + + def match(str) + accum = [] + @string_left = str + @matched_count = 0 + + @specs.each_with_index do |spec,i| + @i=i + @last_spec_tried = spec + @last_match_tried = spec.match(@string_left) + break unless @last_match_tried + @matched_count += 1 + + accum << spec.conversion + + @string_left = @last_match_tried.post_match + break if @string_left.empty? + end + return accum.compact + end + end +end + +class IO + +# The trick here is doing a match where you grab one *line* +# of input at a time. The linebreak may or may not occur +# at the boundary where the string matches a format specifier. +# And if it does, some rule about whitespace may or may not +# be in effect... +# +# That's why this is much more elaborate than the string +# version. +# +# For each line: +# Match succeeds (non-emptily) +# and the last attempted spec/string sub-match succeeded: +# +# could the last spec keep matching? +# yes: save interim results and continue (next line) +# +# The last attempted spec/string did not match: +# +# are we on the next-to-last spec in the string? +# yes: +# is fmt_string.string_left all spaces? +# yes: does current spec care about input space? +# yes: fatal failure +# no: save interim results and continue +# no: continue [this state could be analyzed further] +# +# + + def scanf(str,&b) + return block_scanf(str,&b) if b + return [] unless str.size > 0 + + start_position = pos rescue 0 + matched_so_far = 0 + source_buffer = "" + result_buffer = [] + final_result = [] + + fstr = Scanf::FormatString.new(str) + + loop do + if eof || (tty? &&! fstr.match(source_buffer)) + final_result.concat(result_buffer) + break + end + + source_buffer << gets + + current_match = fstr.match(source_buffer) + + spec = fstr.last_spec_tried + + if spec.matched + if spec.mid_match? + result_buffer.replace(current_match) + next + end + + elsif (fstr.matched_count == fstr.spec_count - 1) + if /\A\s*\z/.match(fstr.string_left) + break if spec.count_space? + result_buffer.replace(current_match) + next + end + end + + final_result.concat(current_match) + + matched_so_far += source_buffer.size + source_buffer.replace(fstr.string_left) + matched_so_far -= source_buffer.size + break if fstr.last_spec + fstr.prune + end + seek(start_position + matched_so_far, IO::SEEK_SET) rescue Errno::ESPIPE + soak_up_spaces if fstr.last_spec && fstr.space + + return final_result + end + + private + + def soak_up_spaces + c = getc + ungetc(c) if c + until eof ||! c || /\S/.match(c.chr) + c = getc + end + ungetc(c) if (c && /\S/.match(c.chr)) + end + + def block_scanf(str) + final = [] +# Sub-ideal, since another FS gets created in scanf. +# But used here to determine the number of specifiers. + fstr = Scanf::FormatString.new(str) + last_spec = fstr.last_spec + begin + current = scanf(str) + break if current.empty? + final.push(yield(current)) + end until eof || fstr.last_spec_tried == last_spec + return final + end +end + +class String + + def scanf(fstr,&b) + if b + block_scanf(fstr,&b) + else + fs = + if fstr.is_a? Scanf::FormatString + fstr + else + Scanf::FormatString.new(fstr) + end + fs.match(self) + end + end + + def block_scanf(fstr,&b) + fs = Scanf::FormatString.new(fstr) + str = self.dup + final = [] + begin + current = str.scanf(fs) + final.push(yield(current)) unless current.empty? + str = fs.string_left + end until current.empty? || str.empty? + return final + end +end + +module Kernel + private + def scanf(fs,&b) + STDIN.scanf(fs,&b) + end +end diff --git a/lib/securerandom.rb b/lib/securerandom.rb new file mode 100644 index 0000000..0f7c05b --- /dev/null +++ b/lib/securerandom.rb @@ -0,0 +1,182 @@ +# = Secure random number generator interface. +# +# This library is an interface for secure random number generator which is +# suitable for generating session key in HTTP cookies, etc. +# +# It supports following secure random number generators. +# +# * openssl +# * /dev/urandom +# * Win32 +# +# == Example +# +# # random hexadecimal string. +# p SecureRandom.hex(10) #=> "52750b30ffbc7de3b362" +# p SecureRandom.hex(10) #=> "92b15d6c8dc4beb5f559" +# p SecureRandom.hex(11) #=> "6aca1b5c58e4863e6b81b8" +# p SecureRandom.hex(12) #=> "94b2fff3e7fd9b9c391a2306" +# p SecureRandom.hex(13) #=> "39b290146bea6ce975c37cfc23" +# ... +# +# # random base64 string. +# p SecureRandom.base64(10) #=> "EcmTPZwWRAozdA==" +# p SecureRandom.base64(10) #=> "9b0nsevdwNuM/w==" +# p SecureRandom.base64(10) #=> "KO1nIU+p9DKxGg==" +# p SecureRandom.base64(11) #=> "l7XEiFja+8EKEtY=" +# p SecureRandom.base64(12) #=> "7kJSM/MzBJI+75j8" +# p SecureRandom.base64(13) #=> "vKLJ0tXBHqQOuIcSIg==" +# ... +# +# # random binary string. +# p SecureRandom.random_bytes(10) #=> "\016\t{\370g\310pbr\301" +# p SecureRandom.random_bytes(10) #=> "\323U\030TO\234\357\020\a\337" +# ... + +begin + require 'openssl' +rescue LoadError +end + +module SecureRandom + # SecureRandom.random_bytes generates a random binary string. + # + # The argument n specifies the length of the result string. + # + # If n is not specified, 16 is assumed. + # It may be larger in future. + # + # If secure random number generator is not available, + # NotImplementedError is raised. + def self.random_bytes(n=nil) + n ||= 16 + + if defined? OpenSSL::Random + return OpenSSL::Random.random_bytes(n) + end + + if !defined?(@has_urandom) || @has_urandom + flags = File::RDONLY + flags |= File::NONBLOCK if defined? File::NONBLOCK + flags |= File::NOCTTY if defined? File::NOCTTY + flags |= File::NOFOLLOW if defined? File::NOFOLLOW + begin + File.open("/dev/urandom", flags) {|f| + unless f.stat.chardev? + raise Errno::ENOENT + end + @has_urandom = true + ret = f.readpartial(n) + if ret.length != n + raise NotImplementedError, "Unexpected partial read from random device" + end + return ret + } + rescue Errno::ENOENT + @has_urandom = false + end + end + + if !defined?(@has_win32) + begin + require 'Win32API' + + crypt_acquire_context = Win32API.new("advapi32", "CryptAcquireContext", 'PPPII', 'L') + @crypt_gen_random = Win32API.new("advapi32", "CryptGenRandom", 'LIP', 'L') + + hProvStr = " " * 4 + prov_rsa_full = 1 + crypt_verifycontext = 0xF0000000 + + if crypt_acquire_context.call(hProvStr, nil, nil, prov_rsa_full, crypt_verifycontext) == 0 + raise SystemCallError, "CryptAcquireContext failed: #{lastWin32ErrorMessage}" + end + @hProv, = hProvStr.unpack('L') + + @has_win32 = true + rescue LoadError + @has_win32 = false + end + end + if @has_win32 + bytes = " ".force_encoding("ASCII-8BIT") * n + if @crypt_gen_random.call(@hProv, bytes.size, bytes) == 0 + raise SystemCallError, "CryptGenRandom failed: #{lastWin32ErrorMessage}" + end + return bytes + end + + raise NotImplementedError, "No random device" + end + + # SecureRandom.hex generates a random hex string. + # + # The argument n specifies the length of the random length. + # The length of the result string is twice of n. + # + # If n is not specified, 16 is assumed. + # It may be larger in future. + # + # If secure random number generator is not available, + # NotImplementedError is raised. + def self.hex(n=nil) + random_bytes(n).unpack("H*")[0] + end + + # SecureRandom.base64 generates a random base64 string. + # + # The argument n specifies the length of the random length. + # The length of the result string is about 4/3 of n. + # + # If n is not specified, 16 is assumed. + # It may be larger in future. + # + # If secure random number generator is not available, + # NotImplementedError is raised. + def self.base64(n=nil) + [random_bytes(n)].pack("m*").delete("\n") + end + + # SecureRandom.random_number generates a random number. + # + # If an positive integer is given as n, + # SecureRandom.random_number returns an integer: + # 0 <= SecureRandom.random_number(n) < n. + # + # If 0 is given or an argument is not given, + # SecureRandom.random_number returns an float: + # 0.0 <= SecureRandom.random_number() < 1.0. + def self.random_number(n=0) + if 0 < n + hex = n.to_s(16) + hex = '0' + hex if (hex.length & 1) == 1 + bin = [hex].pack("H*") + mask = bin[0].ord + mask |= mask >> 1 + mask |= mask >> 2 + mask |= mask >> 4 + begin + rnd = SecureRandom.random_bytes(bin.length) + rnd[0] = (rnd[0].ord & mask).chr + end until rnd < bin + rnd.unpack("H*")[0].hex + else + # assumption: Float::MANT_DIG <= 64 + i64 = SecureRandom.random_bytes(8).unpack("Q")[0] + Math.ldexp(i64 >> (64-Float::MANT_DIG), -Float::MANT_DIG) + end + end + + # Following code is based on David Garamond's GUID library for Ruby. + def self.lastWin32ErrorMessage # :nodoc: + get_last_error = Win32API.new("kernel32", "GetLastError", '', 'L') + format_message = Win32API.new("kernel32", "FormatMessageA", 'LPLLPLPPPPPPPP', 'L') + format_message_ignore_inserts = 0x00000200 + format_message_from_system = 0x00001000 + + code = get_last_error.call + msg = "\0" * 1024 + len = format_message.call(format_message_ignore_inserts + format_message_from_system, 0, code, 0, msg, 1024, nil, nil, nil, nil, nil, nil, nil, nil) + msg[0, len].tr("\r", '').chomp + end +end diff --git a/lib/set.rb b/lib/set.rb new file mode 100644 index 0000000..635652b --- /dev/null +++ b/lib/set.rb @@ -0,0 +1,1274 @@ +#!/usr/bin/env ruby +#-- +# set.rb - defines the Set class +#++ +# Copyright (c) 2002-2008 Akinori MUSHA +# +# Documentation by Akinori MUSHA and Gavin Sinclair. +# +# All rights reserved. You can redistribute and/or modify it under the same +# terms as Ruby. +# +# $Id: set.rb 18571 2008-08-13 08:03:30Z knu $ +# +# == Overview +# +# This library provides the Set class, which deals with a collection +# of unordered values with no duplicates. It is a hybrid of Array's +# intuitive inter-operation facilities and Hash's fast lookup. If you +# need to keep values ordered, use the SortedSet class. +# +# The method +to_set+ is added to Enumerable for convenience. +# +# See the Set class for an example of usage. + + +# +# Set implements a collection of unordered values with no duplicates. +# This is a hybrid of Array's intuitive inter-operation facilities and +# Hash's fast lookup. +# +# The equality of each couple of elements is determined according to +# Object#eql? and Object#hash, since Set uses Hash as storage. +# +# Set is easy to use with Enumerable objects (implementing +each+). +# Most of the initializer methods and binary operators accept generic +# Enumerable objects besides sets and arrays. An Enumerable object +# can be converted to Set using the +to_set+ method. +# +# == Example +# +# require 'set' +# s1 = Set.new [1, 2] # -> # +# s2 = [1, 2].to_set # -> # +# s1 == s2 # -> true +# s1.add("foo") # -> # +# s1.merge([2, 6]) # -> # +# s1.subset? s2 # -> false +# s2.subset? s1 # -> true +# +# == Contact +# +# - Akinori MUSHA (current maintainer) +# +class Set + include Enumerable + + # Creates a new set containing the given objects. + def self.[](*ary) + new(ary) + end + + # Creates a new set containing the elements of the given enumerable + # object. + # + # If a block is given, the elements of enum are preprocessed by the + # given block. + def initialize(enum = nil, &block) # :yields: o + @hash ||= Hash.new + + enum.nil? and return + + if block + enum.each { |o| add(block[o]) } + else + merge(enum) + end + end + + # Copy internal hash. + def initialize_copy(orig) + @hash = orig.instance_eval{@hash}.dup + end + + def freeze # :nodoc: + super + @hash.freeze + self + end + + def taint # :nodoc: + super + @hash.taint + self + end + + def untaint # :nodoc: + super + @hash.untaint + self + end + + # Returns the number of elements. + def size + @hash.size + end + alias length size + + # Returns true if the set contains no elements. + def empty? + @hash.empty? + end + + # Removes all elements and returns self. + def clear + @hash.clear + self + end + + # Replaces the contents of the set with the contents of the given + # enumerable object and returns self. + def replace(enum) + if enum.class == self.class + @hash.replace(enum.instance_eval { @hash }) + else + clear + enum.each { |o| add(o) } + end + + self + end + + # Converts the set to an array. The order of elements is uncertain. + def to_a + @hash.keys + end + + def flatten_merge(set, seen = Set.new) + set.each { |e| + if e.is_a?(Set) + if seen.include?(e_id = e.object_id) + raise ArgumentError, "tried to flatten recursive Set" + end + + seen.add(e_id) + flatten_merge(e, seen) + seen.delete(e_id) + else + add(e) + end + } + + self + end + protected :flatten_merge + + # Returns a new set that is a copy of the set, flattening each + # containing set recursively. + def flatten + self.class.new.flatten_merge(self) + end + + # Equivalent to Set#flatten, but replaces the receiver with the + # result in place. Returns nil if no modifications were made. + def flatten! + if detect { |e| e.is_a?(Set) } + replace(flatten()) + else + nil + end + end + + # Returns true if the set contains the given object. + def include?(o) + @hash.include?(o) + end + alias member? include? + + # Returns true if the set is a superset of the given set. + def superset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if size < set.size + set.all? { |o| include?(o) } + end + + # Returns true if the set is a proper superset of the given set. + def proper_superset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if size <= set.size + set.all? { |o| include?(o) } + end + + # Returns true if the set is a subset of the given set. + def subset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if set.size < size + all? { |o| set.include?(o) } + end + + # Returns true if the set is a proper subset of the given set. + def proper_subset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if set.size <= size + all? { |o| set.include?(o) } + end + + # Calls the given block once for each element in the set, passing + # the element as parameter. Returns an enumerator if no block is + # given. + def each + block_given? or return enum_for(__method__) + @hash.each_key { |o| yield(o) } + self + end + + # Adds the given object to the set and returns self. Use +merge+ to + # add many elements at once. + def add(o) + @hash[o] = true + self + end + alias << add + + # Adds the given object to the set and returns self. If the + # object is already in the set, returns nil. + def add?(o) + if include?(o) + nil + else + add(o) + end + end + + # Deletes the given object from the set and returns self. Use +subtract+ to + # delete many items at once. + def delete(o) + @hash.delete(o) + self + end + + # Deletes the given object from the set and returns self. If the + # object is not in the set, returns nil. + def delete?(o) + if include?(o) + delete(o) + else + nil + end + end + + # Deletes every element of the set for which block evaluates to + # true, and returns self. + def delete_if + block_given? or return enum_for(__method__) + to_a.each { |o| @hash.delete(o) if yield(o) } + self + end + + # Replaces the elements with ones returned by collect(). + def collect! + block_given? or return enum_for(__method__) + set = self.class.new + each { |o| set << yield(o) } + replace(set) + end + alias map! collect! + + # Equivalent to Set#delete_if, but returns nil if no changes were + # made. + def reject! + block_given? or return enum_for(__method__) + n = size + delete_if { |o| yield(o) } + size == n ? nil : self + end + + # Merges the elements of the given enumerable object to the set and + # returns self. + def merge(enum) + if enum.is_a?(Set) + @hash.update(enum.instance_eval { @hash }) + else + enum.each { |o| add(o) } + end + + self + end + + # Deletes every element that appears in the given enumerable object + # and returns self. + def subtract(enum) + enum.each { |o| delete(o) } + self + end + + # Returns a new set built by merging the set and the elements of the + # given enumerable object. + def |(enum) + dup.merge(enum) + end + alias + | ## + alias union | ## + + # Returns a new set built by duplicating the set, removing every + # element that appears in the given enumerable object. + def -(enum) + dup.subtract(enum) + end + alias difference - ## + + # Returns a new set containing elements common to the set and the + # given enumerable object. + def &(enum) + n = self.class.new + enum.each { |o| n.add(o) if include?(o) } + n + end + alias intersection & ## + + # Returns a new set containing elements exclusive between the set + # and the given enumerable object. (set ^ enum) is equivalent to + # ((set | enum) - (set & enum)). + def ^(enum) + n = Set.new(enum) + each { |o| if n.include?(o) then n.delete(o) else n.add(o) end } + n + end + + # Returns true if two sets are equal. The equality of each couple + # of elements is defined according to Object#eql?. + def ==(set) + equal?(set) and return true + + set.is_a?(Set) && size == set.size or return false + + hash = @hash.dup + set.all? { |o| hash.include?(o) } + end + + def hash # :nodoc: + @hash.hash + end + + def eql?(o) # :nodoc: + return false unless o.is_a?(Set) + @hash.eql?(o.instance_eval{@hash}) + end + + # Classifies the set by the return value of the given block and + # returns a hash of {value => set of elements} pairs. The block is + # called once for each element of the set, passing the element as + # parameter. + # + # e.g.: + # + # require 'set' + # files = Set.new(Dir.glob("*.rb")) + # hash = files.classify { |f| File.mtime(f).year } + # p hash # => {2000=>#, + # # 2001=>#, + # # 2002=>#} + def classify # :yields: o + block_given? or return enum_for(__method__) + + h = {} + + each { |i| + x = yield(i) + (h[x] ||= self.class.new).add(i) + } + + h + end + + # Divides the set into a set of subsets according to the commonality + # defined by the given block. + # + # If the arity of the block is 2, elements o1 and o2 are in common + # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are + # in common if block.call(o1) == block.call(o2). + # + # e.g.: + # + # require 'set' + # numbers = Set[1, 3, 4, 6, 9, 10, 11] + # set = numbers.divide { |i,j| (i - j).abs == 1 } + # p set # => #, + # # #, + # # #, + # # #}> + def divide(&func) + func or return enum_for(__method__) + + if func.arity == 2 + require 'tsort' + + class << dig = {} # :nodoc: + include TSort + + alias tsort_each_node each_key + def tsort_each_child(node, &block) + fetch(node).each(&block) + end + end + + each { |u| + dig[u] = a = [] + each{ |v| func.call(u, v) and a << v } + } + + set = Set.new() + dig.each_strongly_connected_component { |css| + set.add(self.class.new(css)) + } + set + else + Set.new(classify(&func).values) + end + end + + InspectKey = :__inspect_key__ # :nodoc: + + # Returns a string containing a human-readable representation of the + # set. ("#") + def inspect + ids = (Thread.current[InspectKey] ||= []) + + if ids.include?(object_id) + return sprintf('#<%s: {...}>', self.class.name) + end + + begin + ids << object_id + return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2]) + ensure + ids.pop + end + end + + def pretty_print(pp) # :nodoc: + pp.text sprintf('#<%s: {', self.class.name) + pp.nest(1) { + pp.seplist(self) { |o| + pp.pp o + } + } + pp.text "}>" + end + + def pretty_print_cycle(pp) # :nodoc: + pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...') + end +end + +# SortedSet implements a set which elements are sorted in order. See Set. +class SortedSet < Set + @@setup = false + + class << self + def [](*ary) # :nodoc: + new(ary) + end + + def setup # :nodoc: + @@setup and return + + module_eval { + # a hack to shut up warning + alias old_init initialize + remove_method :old_init + } + begin + require 'rbtree' + + module_eval %{ + def initialize(*args, &block) + @hash = RBTree.new + super + end + } + rescue LoadError + module_eval %{ + def initialize(*args, &block) + @keys = nil + super + end + + def clear + @keys = nil + super + end + + def replace(enum) + @keys = nil + super + end + + def add(o) + @keys = nil + @hash[o] = true + self + end + alias << add + + def delete(o) + @keys = nil + @hash.delete(o) + self + end + + def delete_if + block_given? or return enum_for(__method__) + n = @hash.size + super + @keys = nil if @hash.size != n + self + end + + def merge(enum) + @keys = nil + super + end + + def each + block_given? or return enum_for(__method__) + to_a.each { |o| yield(o) } + self + end + + def to_a + (@keys = @hash.keys).sort! unless @keys + @keys + end + } + end + + @@setup = true + end + end + + def initialize(*args, &block) # :nodoc: + SortedSet.setup + initialize(*args, &block) + end +end + +module Enumerable + # Makes a set from the enumerable object with given arguments. + # Needs to +require "set"+ to use this method. + def to_set(klass = Set, *args, &block) + klass.new(self, *args, &block) + end +end + +# =begin +# == RestricedSet class +# RestricedSet implements a set with restrictions defined by a given +# block. +# +# === Super class +# Set +# +# === Class Methods +# --- RestricedSet::new(enum = nil) { |o| ... } +# --- RestricedSet::new(enum = nil) { |rset, o| ... } +# Creates a new restricted set containing the elements of the given +# enumerable object. Restrictions are defined by the given block. +# +# If the block's arity is 2, it is called with the RestrictedSet +# itself and an object to see if the object is allowed to be put in +# the set. +# +# Otherwise, the block is called with an object to see if the object +# is allowed to be put in the set. +# +# === Instance Methods +# --- restriction_proc +# Returns the restriction procedure of the set. +# +# =end +# +# class RestricedSet < Set +# def initialize(*args, &block) +# @proc = block or raise ArgumentError, "missing a block" +# +# if @proc.arity == 2 +# instance_eval %{ +# def add(o) +# @hash[o] = true if @proc.call(self, o) +# self +# end +# alias << add +# +# def add?(o) +# if include?(o) || !@proc.call(self, o) +# nil +# else +# @hash[o] = true +# self +# end +# end +# +# def replace(enum) +# clear +# enum.each { |o| add(o) } +# +# self +# end +# +# def merge(enum) +# enum.each { |o| add(o) } +# +# self +# end +# } +# else +# instance_eval %{ +# def add(o) +# if @proc.call(o) +# @hash[o] = true +# end +# self +# end +# alias << add +# +# def add?(o) +# if include?(o) || !@proc.call(o) +# nil +# else +# @hash[o] = true +# self +# end +# end +# } +# end +# +# super(*args) +# end +# +# def restriction_proc +# @proc +# end +# end + +if $0 == __FILE__ + eval DATA.read, nil, $0, __LINE__+4 +end + +__END__ + +require 'test/unit' + +class TC_Set < Test::Unit::TestCase + def test_aref + assert_nothing_raised { + Set[] + Set[nil] + Set[1,2,3] + } + + assert_equal(0, Set[].size) + assert_equal(1, Set[nil].size) + assert_equal(1, Set[[]].size) + assert_equal(1, Set[[nil]].size) + + set = Set[2,4,6,4] + assert_equal(Set.new([2,4,6]), set) + end + + def test_s_new + assert_nothing_raised { + Set.new() + Set.new(nil) + Set.new([]) + Set.new([1,2]) + Set.new('a'..'c') + } + assert_raises(NoMethodError) { + Set.new(false) + } + assert_raises(NoMethodError) { + Set.new(1) + } + assert_raises(ArgumentError) { + Set.new(1,2) + } + + assert_equal(0, Set.new().size) + assert_equal(0, Set.new(nil).size) + assert_equal(0, Set.new([]).size) + assert_equal(1, Set.new([nil]).size) + + ary = [2,4,6,4] + set = Set.new(ary) + ary.clear + assert_equal(false, set.empty?) + assert_equal(3, set.size) + + ary = [1,2,3] + + s = Set.new(ary) { |o| o * 2 } + assert_equal([2,4,6], s.sort) + end + + def test_clone + set1 = Set.new + set2 = set1.clone + set1 << 'abc' + assert_equal(Set.new, set2) + end + + def test_dup + set1 = Set[1,2] + set2 = set1.dup + + assert_not_same(set1, set2) + + assert_equal(set1, set2) + + set1.add(3) + + assert_not_equal(set1, set2) + end + + def test_size + assert_equal(0, Set[].size) + assert_equal(2, Set[1,2].size) + assert_equal(2, Set[1,2,1].size) + end + + def test_empty? + assert_equal(true, Set[].empty?) + assert_equal(false, Set[1, 2].empty?) + end + + def test_clear + set = Set[1,2] + ret = set.clear + + assert_same(set, ret) + assert_equal(true, set.empty?) + end + + def test_replace + set = Set[1,2] + ret = set.replace('a'..'c') + + assert_same(set, ret) + assert_equal(Set['a','b','c'], set) + end + + def test_to_a + set = Set[1,2,3,2] + ary = set.to_a + + assert_equal([1,2,3], ary.sort) + end + + def test_flatten + # test1 + set1 = Set[ + 1, + Set[ + 5, + Set[7, + Set[0] + ], + Set[6,2], + 1 + ], + 3, + Set[3,4] + ] + + set2 = set1.flatten + set3 = Set.new(0..7) + + assert_not_same(set2, set1) + assert_equal(set3, set2) + + # test2; destructive + orig_set1 = set1 + set1.flatten! + + assert_same(orig_set1, set1) + assert_equal(set3, set1) + + # test3; multiple occurrences of a set in an set + set1 = Set[1, 2] + set2 = Set[set1, Set[set1, 4], 3] + + assert_nothing_raised { + set2.flatten! + } + + assert_equal(Set.new(1..4), set2) + + # test4; recursion + set2 = Set[] + set1 = Set[1, set2] + set2.add(set1) + + assert_raises(ArgumentError) { + set1.flatten! + } + + # test5; miscellaneous + empty = Set[] + set = Set[Set[empty, "a"],Set[empty, "b"]] + + assert_nothing_raised { + set.flatten + } + + set1 = empty.merge(Set["no_more", set]) + + assert_nil(Set.new(0..31).flatten!) + + x = Set[Set[],Set[1,2]].flatten! + y = Set[1,2] + + assert_equal(x, y) + end + + def test_include? + set = Set[1,2,3] + + assert_equal(true, set.include?(1)) + assert_equal(true, set.include?(2)) + assert_equal(true, set.include?(3)) + assert_equal(false, set.include?(0)) + assert_equal(false, set.include?(nil)) + + set = Set["1",nil,"2",nil,"0","1",false] + assert_equal(true, set.include?(nil)) + assert_equal(true, set.include?(false)) + assert_equal(true, set.include?("1")) + assert_equal(false, set.include?(0)) + assert_equal(false, set.include?(true)) + end + + def test_superset? + set = Set[1,2,3] + + assert_raises(ArgumentError) { + set.superset?() + } + + assert_raises(ArgumentError) { + set.superset?(2) + } + + assert_raises(ArgumentError) { + set.superset?([2]) + } + + assert_equal(true, set.superset?(Set[])) + assert_equal(true, set.superset?(Set[1,2])) + assert_equal(true, set.superset?(Set[1,2,3])) + assert_equal(false, set.superset?(Set[1,2,3,4])) + assert_equal(false, set.superset?(Set[1,4])) + + assert_equal(true, Set[].superset?(Set[])) + end + + def test_proper_superset? + set = Set[1,2,3] + + assert_raises(ArgumentError) { + set.proper_superset?() + } + + assert_raises(ArgumentError) { + set.proper_superset?(2) + } + + assert_raises(ArgumentError) { + set.proper_superset?([2]) + } + + assert_equal(true, set.proper_superset?(Set[])) + assert_equal(true, set.proper_superset?(Set[1,2])) + assert_equal(false, set.proper_superset?(Set[1,2,3])) + assert_equal(false, set.proper_superset?(Set[1,2,3,4])) + assert_equal(false, set.proper_superset?(Set[1,4])) + + assert_equal(false, Set[].proper_superset?(Set[])) + end + + def test_subset? + set = Set[1,2,3] + + assert_raises(ArgumentError) { + set.subset?() + } + + assert_raises(ArgumentError) { + set.subset?(2) + } + + assert_raises(ArgumentError) { + set.subset?([2]) + } + + assert_equal(true, set.subset?(Set[1,2,3,4])) + assert_equal(true, set.subset?(Set[1,2,3])) + assert_equal(false, set.subset?(Set[1,2])) + assert_equal(false, set.subset?(Set[])) + + assert_equal(true, Set[].subset?(Set[1])) + assert_equal(true, Set[].subset?(Set[])) + end + + def test_proper_subset? + set = Set[1,2,3] + + assert_raises(ArgumentError) { + set.proper_subset?() + } + + assert_raises(ArgumentError) { + set.proper_subset?(2) + } + + assert_raises(ArgumentError) { + set.proper_subset?([2]) + } + + assert_equal(true, set.proper_subset?(Set[1,2,3,4])) + assert_equal(false, set.proper_subset?(Set[1,2,3])) + assert_equal(false, set.proper_subset?(Set[1,2])) + assert_equal(false, set.proper_subset?(Set[])) + + assert_equal(false, Set[].proper_subset?(Set[])) + end + + def test_each + ary = [1,3,5,7,10,20] + set = Set.new(ary) + + ret = set.each { |o| } + assert_same(set, ret) + + e = set.each + assert_instance_of(Enumerator, e) + + assert_nothing_raised { + set.each { |o| + ary.delete(o) or raise "unexpected element: #{o}" + } + + ary.empty? or raise "forgotten elements: #{ary.join(', ')}" + } + end + + def test_add + set = Set[1,2,3] + + ret = set.add(2) + assert_same(set, ret) + assert_equal(Set[1,2,3], set) + + ret = set.add?(2) + assert_nil(ret) + assert_equal(Set[1,2,3], set) + + ret = set.add(4) + assert_same(set, ret) + assert_equal(Set[1,2,3,4], set) + + ret = set.add?(5) + assert_same(set, ret) + assert_equal(Set[1,2,3,4,5], set) + end + + def test_delete + set = Set[1,2,3] + + ret = set.delete(4) + assert_same(set, ret) + assert_equal(Set[1,2,3], set) + + ret = set.delete?(4) + assert_nil(ret) + assert_equal(Set[1,2,3], set) + + ret = set.delete(2) + assert_equal(set, ret) + assert_equal(Set[1,3], set) + + ret = set.delete?(1) + assert_equal(set, ret) + assert_equal(Set[3], set) + end + + def test_delete_if + set = Set.new(1..10) + ret = set.delete_if { |i| i > 10 } + assert_same(set, ret) + assert_equal(Set.new(1..10), set) + + set = Set.new(1..10) + ret = set.delete_if { |i| i % 3 == 0 } + assert_same(set, ret) + assert_equal(Set[1,2,4,5,7,8,10], set) + end + + def test_collect! + set = Set[1,2,3,'a','b','c',-1..1,2..4] + + ret = set.collect! { |i| + case i + when Numeric + i * 2 + when String + i.upcase + else + nil + end + } + + assert_same(set, ret) + assert_equal(Set[2,4,6,'A','B','C',nil], set) + end + + def test_reject! + set = Set.new(1..10) + + ret = set.reject! { |i| i > 10 } + assert_nil(ret) + assert_equal(Set.new(1..10), set) + + ret = set.reject! { |i| i % 3 == 0 } + assert_same(set, ret) + assert_equal(Set[1,2,4,5,7,8,10], set) + end + + def test_merge + set = Set[1,2,3] + + ret = set.merge([2,4,6]) + assert_same(set, ret) + assert_equal(Set[1,2,3,4,6], set) + end + + def test_subtract + set = Set[1,2,3] + + ret = set.subtract([2,4,6]) + assert_same(set, ret) + assert_equal(Set[1,3], set) + end + + def test_plus + set = Set[1,2,3] + + ret = set + [2,4,6] + assert_not_same(set, ret) + assert_equal(Set[1,2,3,4,6], ret) + end + + def test_minus + set = Set[1,2,3] + + ret = set - [2,4,6] + assert_not_same(set, ret) + assert_equal(Set[1,3], ret) + end + + def test_and + set = Set[1,2,3,4] + + ret = set & [2,4,6] + assert_not_same(set, ret) + assert_equal(Set[2,4], ret) + end + + def test_xor + set = Set[1,2,3,4] + ret = set ^ [2,4,5,5] + assert_not_same(set, ret) + assert_equal(Set[1,3,5], ret) + end + + def test_eq + set1 = Set[2,3,1] + set2 = Set[1,2,3] + + assert_equal(set1, set1) + assert_equal(set1, set2) + assert_not_equal(Set[1], [1]) + + set1 = Class.new(Set)["a", "b"] + set2 = Set["a", "b", set1] + set1 = set1.add(set1.clone) + +# assert_equal(set1, set2) +# assert_equal(set2, set1) + assert_equal(set2, set2.clone) + assert_equal(set1.clone, set1) + + assert_not_equal(Set[Exception.new,nil], Set[Exception.new,Exception.new], "[ruby-dev:26127]") + end + + # def test_hash + # end + + # def test_eql? + # end + + def test_classify + set = Set.new(1..10) + ret = set.classify { |i| i % 3 } + + assert_equal(3, ret.size) + assert_instance_of(Hash, ret) + ret.each_value { |value| assert_instance_of(Set, value) } + assert_equal(Set[3,6,9], ret[0]) + assert_equal(Set[1,4,7,10], ret[1]) + assert_equal(Set[2,5,8], ret[2]) + end + + def test_divide + set = Set.new(1..10) + ret = set.divide { |i| i % 3 } + + assert_equal(3, ret.size) + n = 0 + ret.each { |s| n += s.size } + assert_equal(set.size, n) + assert_equal(set, ret.flatten) + + set = Set[7,10,5,11,1,3,4,9,0] + ret = set.divide { |a,b| (a - b).abs == 1 } + + assert_equal(4, ret.size) + n = 0 + ret.each { |s| n += s.size } + assert_equal(set.size, n) + assert_equal(set, ret.flatten) + ret.each { |s| + if s.include?(0) + assert_equal(Set[0,1], s) + elsif s.include?(3) + assert_equal(Set[3,4,5], s) + elsif s.include?(7) + assert_equal(Set[7], s) + elsif s.include?(9) + assert_equal(Set[9,10,11], s) + else + raise "unexpected group: #{s.inspect}" + end + } + end + + def test_inspect + set1 = Set[1] + + assert_equal('#', set1.inspect) + + set2 = Set[Set[0], 1, 2, set1] + assert_equal(false, set2.inspect.include?('#')) + + set1.add(set2) + assert_equal(true, set1.inspect.include?('#')) + end + + # def test_pretty_print + # end + + # def test_pretty_print_cycle + # end +end + +class TC_SortedSet < Test::Unit::TestCase + def test_sortedset + s = SortedSet[4,5,3,1,2] + + assert_equal([1,2,3,4,5], s.to_a) + + prev = nil + s.each { |o| assert(prev < o) if prev; prev = o } + assert_not_nil(prev) + + s.map! { |o| -2 * o } + + assert_equal([-10,-8,-6,-4,-2], s.to_a) + + prev = nil + ret = s.each { |o| assert(prev < o) if prev; prev = o } + assert_not_nil(prev) + assert_same(s, ret) + + s = SortedSet.new([2,1,3]) { |o| o * -2 } + assert_equal([-6,-4,-2], s.to_a) + + s = SortedSet.new(['one', 'two', 'three', 'four']) + a = [] + ret = s.delete_if { |o| a << o; o.start_with?('t') } + assert_same(s, ret) + assert_equal(['four', 'one'], s.to_a) + assert_equal(['four', 'one', 'three', 'two'], a) + + s = SortedSet.new(['one', 'two', 'three', 'four']) + a = [] + ret = s.reject! { |o| a << o; o.start_with?('t') } + assert_same(s, ret) + assert_equal(['four', 'one'], s.to_a) + assert_equal(['four', 'one', 'three', 'two'], a) + + s = SortedSet.new(['one', 'two', 'three', 'four']) + a = [] + ret = s.reject! { |o| a << o; false } + assert_same(nil, ret) + assert_equal(['four', 'one', 'three', 'two'], s.to_a) + assert_equal(['four', 'one', 'three', 'two'], a) + end +end + +class TC_Enumerable < Test::Unit::TestCase + def test_to_set + ary = [2,5,4,3,2,1,3] + + set = ary.to_set + assert_instance_of(Set, set) + assert_equal([1,2,3,4,5], set.sort) + + set = ary.to_set { |o| o * -2 } + assert_instance_of(Set, set) + assert_equal([-10,-8,-6,-4,-2], set.sort) + + set = ary.to_set(SortedSet) + assert_instance_of(SortedSet, set) + assert_equal([1,2,3,4,5], set.to_a) + + set = ary.to_set(SortedSet) { |o| o * -2 } + assert_instance_of(SortedSet, set) + assert_equal([-10,-8,-6,-4,-2], set.sort) + end +end + +# class TC_RestricedSet < Test::Unit::TestCase +# def test_s_new +# assert_raises(ArgumentError) { RestricedSet.new } +# +# s = RestricedSet.new([-1,2,3]) { |o| o > 0 } +# assert_equal([2,3], s.sort) +# end +# +# def test_restriction_proc +# s = RestricedSet.new([-1,2,3]) { |o| o > 0 } +# +# f = s.restriction_proc +# assert_instance_of(Proc, f) +# assert(f[1]) +# assert(!f[0]) +# end +# +# def test_replace +# s = RestricedSet.new(-3..3) { |o| o > 0 } +# assert_equal([1,2,3], s.sort) +# +# s.replace([-2,0,3,4,5]) +# assert_equal([3,4,5], s.sort) +# end +# +# def test_merge +# s = RestricedSet.new { |o| o > 0 } +# s.merge(-5..5) +# assert_equal([1,2,3,4,5], s.sort) +# +# s.merge([10,-10,-8,8]) +# assert_equal([1,2,3,4,5,8,10], s.sort) +# end +# end diff --git a/lib/shell.rb b/lib/shell.rb new file mode 100644 index 0000000..6a64cb2 --- /dev/null +++ b/lib/shell.rb @@ -0,0 +1,300 @@ +# +# shell.rb - +# $Release Version: 0.7 $ +# $Revision: 1.9 $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +# -- +# +# +# + +require "e2mmap" + +require "thread" unless defined?(Mutex) + +require "forwardable" + +require "shell/error" +require "shell/command-processor" +require "shell/process-controller" + +class Shell + @RCS_ID='-$Id: shell.rb,v 1.9 2002/03/04 12:01:10 keiju Exp keiju $-' + + include Error + extend Exception2MessageMapper + +# @cascade = true + # debug: true -> normal debug + # debug: 1 -> eval definition debug + # debug: 2 -> detail inspect debug + @debug = false + @verbose = true + + @debug_display_process_id = false + @debug_display_thread_id = true + @debug_output_mutex = Mutex.new + + class << Shell + extend Forwardable + + attr_accessor :cascade, :debug, :verbose + +# alias cascade? cascade + alias debug? debug + alias verbose? verbose + @verbose = true + + def debug=(val) + @debug = val + @verbose = val if val + end + + def cd(path) + new(path) + end + + def default_system_path + if @default_system_path + @default_system_path + else + ENV["PATH"].split(":") + end + end + + def default_system_path=(path) + @default_system_path = path + end + + def default_record_separator + if @default_record_separator + @default_record_separator + else + $/ + end + end + + def default_record_separator=(rs) + @default_record_separator = rs + end + + # os resource mutex + mutex_methods = ["unlock", "lock", "locked?", "synchronize", "try_lock", "exclusive_unlock"] + for m in mutex_methods + def_delegator("@debug_output_mutex", m, "debug_output_"+m.to_s) + end + + end + + def initialize(pwd = Dir.pwd, umask = nil) + @cwd = File.expand_path(pwd) + @dir_stack = [] + @umask = umask + + @system_path = Shell.default_system_path + @record_separator = Shell.default_record_separator + + @command_processor = CommandProcessor.new(self) + @process_controller = ProcessController.new(self) + + @verbose = Shell.verbose + @debug = Shell.debug + end + + attr_reader :system_path + + def system_path=(path) + @system_path = path + rehash + end + + attr_accessor :umask, :record_separator + attr_accessor :verbose, :debug + + def debug=(val) + @debug = val + @verbose = val if val + end + + alias verbose? verbose + alias debug? debug + + attr_reader :command_processor + attr_reader :process_controller + + def expand_path(path) + File.expand_path(path, @cwd) + end + + # Most Shell commands are defined via CommandProcessor + + # + # Dir related methods + # + # Shell#cwd/dir/getwd/pwd + # Shell#chdir/cd + # Shell#pushdir/pushd + # Shell#popdir/popd + # Shell#mkdir + # Shell#rmdir + + attr_reader :cwd + alias dir cwd + alias getwd cwd + alias pwd cwd + + attr_reader :dir_stack + alias dirs dir_stack + + # If called as iterator, it restores the current directory when the + # block ends. + def chdir(path = nil, verbose = @verbose) + check_point + + if iterator? + notify("chdir(with block) #{path}") if verbose + cwd_old = @cwd + begin + chdir(path, nil) + yield + ensure + chdir(cwd_old, nil) + end + else + notify("chdir #{path}") if verbose + path = "~" unless path + @cwd = expand_path(path) + notify "current dir: #{@cwd}" + rehash + Void.new(self) + end + end + alias cd chdir + + def pushdir(path = nil, verbose = @verbose) + check_point + + if iterator? + notify("pushdir(with block) #{path}") if verbose + pushdir(path, nil) + begin + yield + ensure + popdir + end + elsif path + notify("pushdir #{path}") if verbose + @dir_stack.push @cwd + chdir(path, nil) + notify "dir stack: [#{@dir_stack.join ', '}]" + self + else + notify("pushdir") if verbose + if pop = @dir_stack.pop + @dir_stack.push @cwd + chdir pop + notify "dir stack: [#{@dir_stack.join ', '}]" + self + else + Shell.Fail DirStackEmpty + end + end + Void.new(self) + end + alias pushd pushdir + + def popdir + check_point + + notify("popdir") + if pop = @dir_stack.pop + chdir pop + notify "dir stack: [#{@dir_stack.join ', '}]" + self + else + Shell.Fail DirStackEmpty + end + Void.new(self) + end + alias popd popdir + + # + # process management + # + def jobs + @process_controller.jobs + end + + def kill(sig, command) + @process_controller.kill_job(sig, command) + end + + # + # command definitions + # + def Shell.def_system_command(command, path = command) + CommandProcessor.def_system_command(command, path) + end + + def Shell.undef_system_command(command) + CommandProcessor.undef_system_command(command) + end + + def Shell.alias_command(ali, command, *opts, &block) + CommandProcessor.alias_command(ali, command, *opts, &block) + end + + def Shell.unalias_command(ali) + CommandProcessor.unalias_command(ali) + end + + def Shell.install_system_commands(pre = "sys_") + CommandProcessor.install_system_commands(pre) + end + + # + def inspect + if debug.kind_of?(Integer) && debug > 2 + super + else + to_s + end + end + + def self.notify(*opts, &block) + Shell::debug_output_synchronize do + if opts[-1].kind_of?(String) + yorn = verbose? + else + yorn = opts.pop + end + return unless yorn + + if @debug_display_thread_id + if @debug_display_process_id + prefix = "shell(##{Process.pid}:#{Thread.current.to_s.sub("Thread", "Th")}): " + else + prefix = "shell(#{Thread.current.to_s.sub("Thread", "Th")}): " + end + else + prefix = "shell: " + end + _head = true + STDERR.print opts.collect{|mes| + mes = mes.dup + yield mes if iterator? + if _head + _head = false +# "shell" " + mes + prefix + mes + else + " "* prefix.size + mes + end + }.join("\n")+"\n" + end + end + + CommandProcessor.initialize + CommandProcessor.run_config +end diff --git a/lib/shell/builtin-command.rb b/lib/shell/builtin-command.rb new file mode 100644 index 0000000..270c65c --- /dev/null +++ b/lib/shell/builtin-command.rb @@ -0,0 +1,160 @@ +# +# shell/builtin-command.rb - +# $Release Version: 0.7 $ +# $Revision: 14912 $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +# -- +# +# +# + +require "shell/filter" + +class Shell + class BuiltInCommand + # + def expand_path(path) + @shell.expand_path(path) + end + + # + # File related commands + # Shell#foreach + # Shell#open + # Shell#unlink + # Shell#test + # + # - + # + # CommandProcessor#foreach(path, rs) + # path: String + # rs: String - record separator + # iterator + # Same as: + # File#foreach (when path is file) + # Dir#foreach (when path is directory) + # path is relative to pwd + # + def foreach(path = nil, *rs) + path = "." unless path + path = expand_path(path) + + if File.directory?(path) + Dir.foreach(path){|fn| yield fn} + else + IO.foreach(path, *rs){|l| yield l} + end + end + + # + # CommandProcessor#open(path, mode) + # path: String + # mode: String + # return: File or Dir + # Same as: + # File#open (when path is file) + # Dir#open (when path is directory) + # mode has an effect only when path is a file + # + def open(path, mode = nil, perm = 0666, &b) + path = expand_path(path) + if File.directory?(path) + Dir.open(path, &b) + else + if @shell.umask + f = File.open(path, mode, perm) + File.chmod(perm & ~@shell.umask, path) + if block_given? + f.each(&b) + end + f + else + f = File.open(path, mode, perm, &b) + end + end + end + # public :open + + # + # CommandProcessor#unlink(path) + # same as: + # Dir#unlink (when path is directory) + # File#unlink (when path is file) + # + def unlink(path) + @shell.check_point + + path = expand_path(path) + if File.directory?(path) + Dir.unlink(path) + else + IO.unlink(path) + end + Void.new(@shell) + end + + # + # CommandProcessor#test(command, file1, file2) + # CommandProcessor#[command, file1, file2] + # command: char or String or Symbol + # file1: String + # file2: String(optional) + # return: Boolean + # same as: + # test() (when command is char or length 1 string or symbol) + # FileTest.command (others) + # example: + # sh[?e, "foo"] + # sh[:e, "foo"] + # sh["e", "foo"] + # sh[:exists?, "foo"] + # sh["exists?", "foo"] + # + alias top_level_test test + def test(command, file1, file2=nil) + file1 = expand_path(file1) + file2 = expand_path(file2) if file2 + command = command.id2name if command.kind_of?(Symbol) + + case command + when Integer + if file2 + top_level_test(command, file1, file2) + else + top_level_test(command, file1) + end + when String + if command.size == 1 + if file2 + top_level_test(command, file1, file2) + else + top_level_test(command, file1) + end + else + if file2 + FileTest.send(command, file1, file2) + else + FileTest.send(command, file1) + end + end + end + end + alias [] test + + # + # Dir related methods + # + # Shell#mkdir + # Shell#rmdir + # + #-- + # + # CommandProcessor#mkdir(*path) + # path: String + # same as Dir.mkdir() + # + def mkdir(*path) + @shell.check_point + notify("mkdir #{path.join(' ')}") + + perm = nil + if path.last.kind_of?(Integer) + perm = path.pop + end + for dir in path + d = expand_path(dir) + if perm + Dir.mkdir(d, perm) + else + Dir.mkdir(d) + end + File.chmod(d, 0666 & ~@shell.umask) if @shell.umask + end + Void.new(@shell) + end + + # + # CommandProcessor#rmdir(*path) + # path: String + # same as Dir.rmdir() + # + def rmdir(*path) + @shell.check_point + notify("rmdir #{path.join(' ')}") + + for dir in path + Dir.rmdir(expand_path(dir)) + end + Void.new(@shell) + end + + # + # CommandProcessor#system(command, *opts) + # command: String + # opts: String + # return: SystemCommand + # Same as system() function + # example: + # print sh.system("ls", "-l") + # sh.system("ls", "-l") | sh.head > STDOUT + # + def system(command, *opts) + if opts.empty? + if command =~ /\*|\?|\{|\}|\[|\]|<|>|\(|\)|~|&|\||\\|\$|;|'|`|"|\n/ + return SystemCommand.new(@shell, find_system_command("sh"), "-c", command) + else + command, *opts = command.split(/\s+/) + end + end + SystemCommand.new(@shell, find_system_command(command), *opts) + end + + # + # ProcessCommand#rehash + # clear command hash table. + # + def rehash + @system_commands = {} + end + + # + # ProcessCommand#transact + # + def check_point + @shell.process_controller.wait_all_jobs_execution + end + alias finish_all_jobs check_point + + def transact(&block) + begin + @shell.instance_eval(&block) + ensure + check_point + end + end + + # + # internal commands + # + def out(dev = STDOUT, &block) + dev.print transact(&block) + end + + def echo(*strings) + Echo.new(@shell, *strings) + end + + def cat(*filenames) + Cat.new(@shell, *filenames) + end + + # def sort(*filenames) + # Sort.new(self, *filenames) + # end + + def glob(pattern) + Glob.new(@shell, pattern) + end + + def append(to, filter) + case to + when String + AppendFile.new(@shell, to, filter) + when IO + AppendIO.new(@shell, to, filter) + else + Shell.Fail Error::CantApplyMethod, "append", to.class + end + end + + def tee(file) + Tee.new(@shell, file) + end + + def concat(*jobs) + Concat.new(@shell, *jobs) + end + + # %pwd, %cwd -> @pwd + def notify(*opts, &block) + Shell.notify(*opts) {|mes| + yield mes if iterator? + + mes.gsub!("%pwd", "#{@cwd}") + mes.gsub!("%cwd", "#{@cwd}") + } + end + + # + # private functions + # + def find_system_command(command) + return command if /^\// =~ command + case path = @system_commands[command] + when String + if exists?(path) + return path + else + Shell.Fail Error::CommandNotFound, command + end + when false + Shell.Fail Error::CommandNotFound, command + end + + for p in @shell.system_path + path = join(p, command) + if FileTest.exist?(path) + @system_commands[command] = path + return path + end + end + @system_commands[command] = false + Shell.Fail Error::CommandNotFound, command + end + + # + # CommandProcessor.def_system_command(command, path) + # command: String + # path: String + # define 'command()' method as method. + # + def self.def_system_command(command, path = command) + begin + eval((d = %Q[def #{command}(*opts) + SystemCommand.new(@shell, '#{path}', *opts) + end]), nil, __FILE__, __LINE__ - 1) + rescue SyntaxError + Shell.notify "warn: Can't define #{command} path: #{path}." + end + Shell.notify "Define #{command} path: #{path}.", Shell.debug? + Shell.notify("Definition of #{command}: ", d, + Shell.debug.kind_of?(Integer) && Shell.debug > 1) + end + + def self.undef_system_command(command) + command = command.id2name if command.kind_of?(Symbol) + remove_method(command) + Shell.module_eval{remove_method(command)} + Filter.module_eval{remove_method(command)} + self + end + + # define command alias + # ex) + # def_alias_command("ls_c", "ls", "-C", "-F") + # def_alias_command("ls_c", "ls"){|*opts| ["-C", "-F", *opts]} + # + @alias_map = {} + def self.alias_map + @alias_map + end + def self.alias_command(ali, command, *opts, &block) + ali = ali.id2name if ali.kind_of?(Symbol) + command = command.id2name if command.kind_of?(Symbol) + begin + if iterator? + @alias_map[ali.intern] = proc + + eval((d = %Q[def #{ali}(*opts) + @shell.__send__(:#{command}, + *(CommandProcessor.alias_map[:#{ali}].call *opts)) + end]), nil, __FILE__, __LINE__ - 1) + + else + args = opts.collect{|opt| '"' + opt + '"'}.join(",") + eval((d = %Q[def #{ali}(*opts) + @shell.__send__(:#{command}, #{args}, *opts) + end]), nil, __FILE__, __LINE__ - 1) + end + rescue SyntaxError + Shell.notify "warn: Can't alias #{ali} command: #{command}." + Shell.notify("Definition of #{ali}: ", d) + raise + end + Shell.notify "Define #{ali} command: #{command}.", Shell.debug? + Shell.notify("Definition of #{ali}: ", d, + Shell.debug.kind_of?(Integer) && Shell.debug > 1) + self + end + + def self.unalias_command(ali) + ali = ali.id2name if ali.kind_of?(Symbol) + @alias_map.delete ali.intern + undef_system_command(ali) + end + + # + # CommandProcessor.def_builtin_commands(delegation_class, command_specs) + # delegation_class: Class or Module + # command_specs: [[command_name, [argument,...]],...] + # command_name: String + # arguments: String + # FILENAME?? -> expand_path(filename??) + # *FILENAME?? -> filename??.collect{|f|expand_path(f)}.join(", ") + # define command_name(argument,...) as + # delegation_class.command_name(argument,...) + # + def self.def_builtin_commands(delegation_class, command_specs) + for meth, args in command_specs + arg_str = args.collect{|arg| arg.downcase}.join(", ") + call_arg_str = args.collect{ + |arg| + case arg + when /^(FILENAME.*)$/ + format("expand_path(%s)", $1.downcase) + when /^(\*FILENAME.*)$/ + # \*FILENAME* -> filenames.collect{|fn| expand_path(fn)}.join(", ") + $1.downcase + '.collect{|fn| expand_path(fn)}' + else + arg + end + }.join(", ") + d = %Q[def #{meth}(#{arg_str}) + #{delegation_class}.#{meth}(#{call_arg_str}) + end] + Shell.notify "Define #{meth}(#{arg_str})", Shell.debug? + Shell.notify("Definition of #{meth}: ", d, + Shell.debug.kind_of?(Integer) && Shell.debug > 1) + eval d + end + end + + # + # CommandProcessor.install_system_commands(pre) + # pre: String - command name prefix + # defines every command which belongs in default_system_path via + # CommandProcessor.command(). It doesn't define already defined + # methods twice. By default, "pre_" is prefixes to each method + # name. Characters that may not be used in a method name are + # all converted to '_'. Definition errors are just ignored. + # + def self.install_system_commands(pre = "sys_") + defined_meth = {} + for m in Shell.methods + defined_meth[m] = true + end + sh = Shell.new + for path in Shell.default_system_path + next unless sh.directory? path + sh.cd path + sh.foreach do + |cn| + if !defined_meth[pre + cn] && sh.file?(cn) && sh.executable?(cn) + command = (pre + cn).gsub(/\W/, "_").sub(/^([0-9])/, '_\1') + begin + def_system_command(command, sh.expand_path(cn)) + rescue + Shell.notify "warn: Can't define #{command} path: #{cn}" + end + defined_meth[command] = command + end + end + end + end + + #---------------------------------------------------------------------- + # + # class initializing methods - + # + #---------------------------------------------------------------------- + def self.add_delegate_command_to_shell(id) + id = id.intern if id.kind_of?(String) + name = id.id2name + if Shell.method_defined?(id) + Shell.notify "warn: override definnition of Shell##{name}." + Shell.notify "warn: alias Shell##{name} to Shell##{name}_org.\n" + Shell.module_eval "alias #{name}_org #{name}" + end + Shell.notify "method added: Shell##{name}.", Shell.debug? + Shell.module_eval(%Q[def #{name}(*args, &block) + begin + @command_processor.__send__(:#{name}, *args, &block) + rescue Exception + $@.delete_if{|s| /:in `__getobj__'$/ =~ s} #` + $@.delete_if{|s| /^\\(eval\\):/ =~ s} + raise + end + end], __FILE__, __LINE__) + + if Shell::Filter.method_defined?(id) + Shell.notify "warn: override definnition of Shell::Filter##{name}." + Shell.notify "warn: alias Shell##{name} to Shell::Filter##{name}_org." + Filter.module_eval "alias #{name}_org #{name}" + end + Shell.notify "method added: Shell::Filter##{name}.", Shell.debug? + Filter.module_eval(%Q[def #{name}(*args, &block) + begin + self | @shell.__send__(:#{name}, *args, &block) + rescue Exception + $@.delete_if{|s| /:in `__getobj__'$/ =~ s} #` + $@.delete_if{|s| /^\\(eval\\):/ =~ s} + raise + end + end], __FILE__, __LINE__) + end + + # + # define default builtin commands + # + def self.install_builtin_commands + # method related File. + # (exclude open/foreach/unlink) + normal_delegation_file_methods = [ + ["atime", ["FILENAME"]], + ["basename", ["fn", "*opts"]], + ["chmod", ["mode", "*FILENAMES"]], + ["chown", ["owner", "group", "*FILENAME"]], + ["ctime", ["FILENAMES"]], + ["delete", ["*FILENAMES"]], + ["dirname", ["FILENAME"]], + ["ftype", ["FILENAME"]], + ["join", ["*items"]], + ["link", ["FILENAME_O", "FILENAME_N"]], + ["lstat", ["FILENAME"]], + ["mtime", ["FILENAME"]], + ["readlink", ["FILENAME"]], + ["rename", ["FILENAME_FROM", "FILENAME_TO"]], + # ["size", ["FILENAME"]], + ["split", ["pathname"]], + ["stat", ["FILENAME"]], + ["symlink", ["FILENAME_O", "FILENAME_N"]], + ["truncate", ["FILENAME", "length"]], + ["utime", ["atime", "mtime", "*FILENAMES"]]] + + def_builtin_commands(File, normal_delegation_file_methods) + alias_method :rm, :delete + + # method related FileTest + def_builtin_commands(FileTest, + FileTest.singleton_methods(false).collect{|m| [m, ["FILENAME"]]}) + + end + + end +end diff --git a/lib/shell/error.rb b/lib/shell/error.rb new file mode 100644 index 0000000..a4a6c1a --- /dev/null +++ b/lib/shell/error.rb @@ -0,0 +1,25 @@ +# +# shell/error.rb - +# $Release Version: 0.7 $ +# $Revision: 14912 $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +# -- +# +# +# + +require "e2mmap" + +class Shell + module Error + extend Exception2MessageMapper + def_e2message TypeError, "wrong argument type %s (expected %s)" + + def_exception :DirStackEmpty, "Directory stack empty." + def_exception :CantDefine, "Can't define method(%s, %s)." + def_exception :CantApplyMethod, "This method(%s) does not apply to this type(%s)." + def_exception :CommandNotFound, "Command not found(%s)." + end +end + diff --git a/lib/shell/filter.rb b/lib/shell/filter.rb new file mode 100644 index 0000000..63f5777 --- /dev/null +++ b/lib/shell/filter.rb @@ -0,0 +1,109 @@ +# +# shell/filter.rb - +# $Release Version: 0.7 $ +# $Revision: 14912 $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +# -- +# +# +# + +class Shell + # + # Filter + # A method to require + # each() + # + class Filter + include Enumerable + + def initialize(sh) + @shell = sh # parent shell + @input = nil # input filter + end + + attr_reader :input + + def input=(filter) + @input = filter + end + + def each(rs = nil) + rs = @shell.record_separator unless rs + if @input + @input.each(rs){|l| yield l} + end + end + + def < (src) + case src + when String + cat = Cat.new(@shell, src) + cat | self + when IO + self.input = src + self + else + Shell.Fail Error::CantApplyMethod, "<", to.class + end + end + + def > (to) + case to + when String + dst = @shell.open(to, "w") + begin + each(){|l| dst << l} + ensure + dst.close + end + when IO + each(){|l| to << l} + else + Shell.Fail Error::CantApplyMethod, ">", to.class + end + self + end + + def >> (to) + begin + Shell.cd(@shell.pwd).append(to, self) + rescue CantApplyMethod + Shell.Fail Error::CantApplyMethod, ">>", to.class + end + end + + def | (filter) + filter.input = self + if active? + @shell.process_controller.start_job filter + end + filter + end + + def + (filter) + Join.new(@shell, self, filter) + end + + def to_a + ary = [] + each(){|l| ary.push l} + ary + end + + def to_s + str = "" + each(){|l| str.concat l} + str + end + + def inspect + if @shell.debug.kind_of?(Integer) && @shell.debug > 2 + super + else + to_s + end + end + end +end diff --git a/lib/shell/process-controller.rb b/lib/shell/process-controller.rb new file mode 100644 index 0000000..1ae60cc --- /dev/null +++ b/lib/shell/process-controller.rb @@ -0,0 +1,319 @@ +# +# shell/process-controller.rb - +# $Release Version: 0.7 $ +# $Revision: 20880 $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +# -- +# +# +# +require "forwardable" + +require "thread" +require "sync" + +class Shell + class ProcessController + + @ProcessControllers = {} + @ProcessControllersMonitor = Mutex.new + @ProcessControllersCV = ConditionVariable.new + + @BlockOutputMonitor = Mutex.new + @BlockOutputCV = ConditionVariable.new + + class< +# +# Contact: +# - Akinori MUSHA (current maintainer) +# +module Shellwords + # + # Splits a string into an array of tokens in the same way the UNIX + # Bourne shell does. + # + # argv = Shellwords.split('here are "two words"') + # argv #=> ["here", "are", "two words"] + # + # +String#shellsplit+ is a shorthand for this function. + # + # argv = 'here are "two words"'.shellsplit + # argv #=> ["here", "are", "two words"] + # + def shellsplit(line) + words = [] + field = '' + line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do + |word, sq, dq, esc, garbage, sep| + raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage + field << (word || sq || (dq || esc).gsub(/\\(?=.)/, '')) + if sep + words << field + field = '' + end + end + words + end + + alias shellwords shellsplit + + module_function :shellsplit, :shellwords + + class << self + alias split shellsplit + end + + # + # Escapes a string so that it can be safely used in a Bourne shell + # command line. + # + # Note that a resulted string should be used unquoted and is not + # intended for use in double quotes nor in single quotes. + # + # open("| grep #{Shellwords.escape(pattern)} file") { |pipe| + # # ... + # } + # + # +String#shellescape+ is a shorthand for this function. + # + # open("| grep #{pattern.shellescape} file") { |pipe| + # # ... + # } + # + def shellescape(str) + # An empty argument will be skipped, so return empty quotes. + return "''" if str.empty? + + str = str.dup + + # Process as a single byte sequence because not all shell + # implementations are multibyte aware. + str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1") + + # A LF cannot be escaped with a backslash because a backslash + LF + # combo is regarded as line continuation and simply ignored. + str.gsub!(/\n/, "'\n'") + + return str + end + + module_function :shellescape + + class << self + alias escape shellescape + end + + # + # Builds a command line string from an argument list +array+ joining + # all elements escaped for Bourne shell and separated by a space. + # + # open('|' + Shellwords.join(['grep', pattern, *files])) { |pipe| + # # ... + # } + # + # +Array#shelljoin+ is a shorthand for this function. + # + # open('|' + ['grep', pattern, *files].shelljoin) { |pipe| + # # ... + # } + # + def shelljoin(array) + array.map { |arg| shellescape(arg) }.join(' ') + end + + module_function :shelljoin + + class << self + alias join shelljoin + end +end + +class String + # + # call-seq: + # str.shellsplit => array + # + # Splits +str+ into an array of tokens in the same way the UNIX + # Bourne shell does. See +Shellwords::shellsplit+ for details. + # + def shellsplit + Shellwords.split(self) + end + + # + # call-seq: + # str.shellescape => string + # + # Escapes +str+ so that it can be safely used in a Bourne shell + # command line. See +Shellwords::shellescape+ for details. + # + def shellescape + Shellwords.escape(self) + end +end + +class Array + # + # call-seq: + # array.shelljoin => string + # + # Builds a command line string from an argument list +array+ joining + # all elements escaped for Bourne shell and separated by a space. + # See +Shellwords::shelljoin+ for details. + # + def shelljoin + Shellwords.join(self) + end +end diff --git a/lib/singleton.rb b/lib/singleton.rb new file mode 100644 index 0000000..3c81b2d --- /dev/null +++ b/lib/singleton.rb @@ -0,0 +1,313 @@ +# The Singleton module implements the Singleton pattern. +# +# Usage: +# class Klass +# include Singleton +# # ... +# end +# +# * this ensures that only one instance of Klass lets call it +# ``the instance'' can be created. +# +# a,b = Klass.instance, Klass.instance +# a == b # => true +# Klass.new # NoMethodError - new is private ... +# +# * ``The instance'' is created at instantiation time, in other +# words the first call of Klass.instance(), thus +# +# class OtherKlass +# include Singleton +# # ... +# end +# ObjectSpace.each_object(OtherKlass){} # => 0. +# +# * This behavior is preserved under inheritance and cloning. +# +# +# +# This is achieved by marking +# * Klass.new and Klass.allocate - as private +# +# Providing (or modifying) the class methods +# * Klass.inherited(sub_klass) and Klass.clone() - +# to ensure that the Singleton pattern is properly +# inherited and cloned. +# +# * Klass.instance() - returning ``the instance''. After a +# successful self modifying (normally the first) call the +# method body is a simple: +# +# def Klass.instance() +# return @singleton__instance__ +# end +# +# * Klass._load(str) - calling Klass.instance() +# +# * Klass._instantiate?() - returning ``the instance'' or +# nil. This hook method puts a second (or nth) thread calling +# Klass.instance() on a waiting loop. The return value +# signifies the successful completion or premature termination +# of the first, or more generally, current "instantiation thread". +# +# +# The instance method of Singleton are +# * clone and dup - raising TypeErrors to prevent cloning or duping +# +# * _dump(depth) - returning the empty string. Marshalling strips +# by default all state information, e.g. instance variables and +# taint state, from ``the instance''. Providing custom _load(str) +# and _dump(depth) hooks allows the (partially) resurrections of +# a previous state of ``the instance''. + +require 'thread' + +module Singleton + # disable build-in copying methods + def clone + raise TypeError, "can't clone instance of singleton #{self.class}" + end + def dup + raise TypeError, "can't dup instance of singleton #{self.class}" + end + + # default marshalling strategy + def _dump(depth = -1) + '' + end + + module SingletonClassMethods + # properly clone the Singleton pattern - did you know + # that duping doesn't copy class methods? + def clone + Singleton.__init__(super) + end + + def _load(str) + instance + end + + private + + # ensure that the Singleton pattern is properly inherited + def inherited(sub_klass) + super + Singleton.__init__(sub_klass) + end + end + + class << Singleton + def __init__(klass) + klass.instance_eval { + @singleton__instance__ = nil + @singleton__mutex__ = Mutex.new + } + def klass.instance + return @singleton__instance__ if @singleton__instance__ + @singleton__mutex__.synchronize { + return @singleton__instance__ if @singleton__instance__ + @singleton__instance__ = new() + } + @singleton__instance__ + end + klass + end + + private + + # extending an object with Singleton is a bad idea + undef_method :extend_object + + def append_features(mod) + # help out people counting on transitive mixins + unless mod.instance_of?(Class) + raise TypeError, "Inclusion of the OO-Singleton module in module #{mod}" + end + super + end + + def included(klass) + super + klass.private_class_method :new, :allocate + klass.extend SingletonClassMethods + Singleton.__init__(klass) + end + end + +end + + +if __FILE__ == $0 + +def num_of_instances(klass) + "#{ObjectSpace.each_object(klass){}} #{klass} instance(s)" +end + +# The basic and most important example. + +class SomeSingletonClass + include Singleton +end +puts "There are #{num_of_instances(SomeSingletonClass)}" + +a = SomeSingletonClass.instance +b = SomeSingletonClass.instance # a and b are same object +puts "basic test is #{a == b}" + +begin + SomeSingletonClass.new +rescue NoMethodError => mes + puts mes +end + + + +puts "\nThreaded example with exception and customized #_instantiate?() hook"; p +Thread.abort_on_exception = false + +class Ups < SomeSingletonClass + def initialize + self.class.__sleep + puts "initialize called by thread ##{Thread.current[:i]}" + end +end + +class << Ups + def _instantiate? + @enter.push Thread.current[:i] + while false.equal?(@singleton__instance__) + @singleton__mutex__.unlock + sleep 0.08 + @singleton__mutex__.lock + end + @leave.push Thread.current[:i] + @singleton__instance__ + end + + def __sleep + sleep(rand(0.08)) + end + + def new + begin + __sleep + raise "boom - thread ##{Thread.current[:i]} failed to create instance" + ensure + # simple flip-flop + class << self + remove_method :new + end + end + end + + def instantiate_all + @enter = [] + @leave = [] + 1.upto(9) {|i| + Thread.new { + begin + Thread.current[:i] = i + __sleep + instance + rescue RuntimeError => mes + puts mes + end + } + } + puts "Before there were #{num_of_instances(self)}" + sleep 3 + puts "Now there is #{num_of_instances(self)}" + puts "#{@enter.join '; '} was the order of threads entering the waiting loop" + puts "#{@leave.join '; '} was the order of threads leaving the waiting loop" + end +end + + +Ups.instantiate_all +# results in message like +# Before there were 0 Ups instance(s) +# boom - thread #6 failed to create instance +# initialize called by thread #3 +# Now there is 1 Ups instance(s) +# 3; 2; 1; 8; 4; 7; 5 was the order of threads entering the waiting loop +# 3; 2; 1; 7; 4; 8; 5 was the order of threads leaving the waiting loop + + +puts "\nLets see if class level cloning really works" +Yup = Ups.clone +def Yup.new + begin + __sleep + raise "boom - thread ##{Thread.current[:i]} failed to create instance" + ensure + # simple flip-flop + class << self + remove_method :new + end + end +end +Yup.instantiate_all + + +puts "\n\n","Customized marshalling" +class A + include Singleton + attr_accessor :persist, :die + def _dump(depth) + # this strips the @die information from the instance + Marshal.dump(@persist,depth) + end +end + +def A._load(str) + instance.persist = Marshal.load(str) + instance +end + +a = A.instance +a.persist = ["persist"] +a.die = "die" +a.taint + +stored_state = Marshal.dump(a) +# change state +a.persist = nil +a.die = nil +b = Marshal.load(stored_state) +p a == b # => true +p a.persist # => ["persist"] +p a.die # => nil + + +puts "\n\nSingleton with overridden default #inherited() hook" +class Up +end +def Up.inherited(sub_klass) + puts "#{sub_klass} subclasses #{self}" +end + + +class Middle < Up + include Singleton +end + +class Down < Middle; end + +puts "and basic \"Down test\" is #{Down.instance == Down.instance}\n +Various exceptions" + +begin + module AModule + include Singleton + end +rescue TypeError => mes + puts mes #=> Inclusion of the OO-Singleton module in module AModule +end + +begin + 'aString'.extend Singleton +rescue NoMethodError => mes + puts mes #=> undefined method `extend_object' for Singleton:Module +end + +end diff --git a/lib/sync.rb b/lib/sync.rb new file mode 100644 index 0000000..3853ba2 --- /dev/null +++ b/lib/sync.rb @@ -0,0 +1,307 @@ +# +# sync.rb - 2 phase lock with counter +# $Release Version: 1.0$ +# $Revision: 19280 $ +# by Keiju ISHITSUKA(keiju@ishitsuka.com) +# +# -- +# Sync_m, Synchronizer_m +# Usage: +# obj.extend(Sync_m) +# or +# class Foo +# include Sync_m +# : +# end +# +# Sync_m#sync_mode +# Sync_m#sync_locked?, locked? +# Sync_m#sync_shared?, shared? +# Sync_m#sync_exclusive?, sync_exclusive? +# Sync_m#sync_try_lock, try_lock +# Sync_m#sync_lock, lock +# Sync_m#sync_unlock, unlock +# +# Sync, Synchronizer: +# Usage: +# sync = Sync.new +# +# Sync#mode +# Sync#locked? +# Sync#shared? +# Sync#exclusive? +# Sync#try_lock(mode) -- mode = :EX, :SH, :UN +# Sync#lock(mode) -- mode = :EX, :SH, :UN +# Sync#unlock +# Sync#synchronize(mode) {...} +# +# + +unless defined? Thread + raise "Thread not available for this ruby interpreter" +end + +module Sync_m + RCS_ID='-$Header$-' + + # lock mode + UN = :UN + SH = :SH + EX = :EX + + # exceptions + class Err < StandardError + def Err.Fail(*opt) + fail self, sprintf(self::Message, *opt) + end + + class UnknownLocker < Err + Message = "Thread(%s) not locked." + def UnknownLocker.Fail(th) + super(th.inspect) + end + end + + class LockModeFailer < Err + Message = "Unknown lock mode(%s)" + def LockModeFailer.Fail(mode) + if mode.id2name + mode = id2name + end + super(mode) + end + end + end + + def Sync_m.define_aliases(cl) + cl.module_eval %q{ + alias locked? sync_locked? + alias shared? sync_shared? + alias exclusive? sync_exclusive? + alias lock sync_lock + alias unlock sync_unlock + alias try_lock sync_try_lock + alias synchronize sync_synchronize + } + end + + def Sync_m.append_features(cl) + super + # do nothing for Modules + # make aliases for Classes. + define_aliases(cl) unless cl.instance_of?(Module) + self + end + + def Sync_m.extend_object(obj) + super + obj.sync_extend + end + + def sync_extend + unless (defined? locked? and + defined? shared? and + defined? exclusive? and + defined? lock and + defined? unlock and + defined? try_lock and + defined? synchronize) + Sync_m.define_aliases(class< 0 + th, count = sync_upgrade_waiting.shift + sync_sh_locker[th] = count + th.wakeup + wakeup_threads.push th + else + wait = sync_waiting + self.sync_waiting = [] + for th in wait + th.wakeup + wakeup_threads.push th + end + end + end + end + for th in wakeup_threads + th.run + end + self + end + + def sync_synchronize(mode = EX) + sync_lock(mode) + begin + yield + ensure + sync_unlock + end + end + + attr_accessor :sync_mode + + attr_accessor :sync_waiting + attr_accessor :sync_upgrade_waiting + attr_accessor :sync_sh_locker + attr_accessor :sync_ex_locker + attr_accessor :sync_ex_count + + def sync_inspect + sync_iv = instance_variables.select{|iv| /^@sync_/ =~ iv.id2name}.collect{|iv| iv.id2name + '=' + instance_eval(iv.id2name).inspect}.join(",") + print "<#{self.class}.extend Sync_m: #{inspect}, " + end + + private + + def sync_initialize + @sync_mode = UN + @sync_waiting = [] + @sync_upgrade_waiting = [] + @sync_sh_locker = Hash.new + @sync_ex_locker = nil + @sync_ex_count = 0 + + @sync_mutex = Mutex.new + end + + def initialize(*args) + super + sync_initialize + end + + def sync_try_lock_sub(m) + case m + when SH + case sync_mode + when UN + self.sync_mode = m + sync_sh_locker[Thread.current] = 1 + ret = true + when SH + count = 0 unless count = sync_sh_locker[Thread.current] + sync_sh_locker[Thread.current] = count + 1 + ret = true + when EX + # in EX mode, lock will upgrade to EX lock + if sync_ex_locker == Thread.current + self.sync_ex_count = sync_ex_count + 1 + ret = true + else + ret = false + end + end + when EX + if sync_mode == UN or + sync_mode == SH && sync_sh_locker.size == 1 && sync_sh_locker.include?(Thread.current) + self.sync_mode = m + self.sync_ex_locker = Thread.current + self.sync_ex_count = 1 + ret = true + elsif sync_mode == EX && sync_ex_locker == Thread.current + self.sync_ex_count = sync_ex_count + 1 + ret = true + else + ret = false + end + else + Err::LockModeFailer.Fail mode + end + return ret + end +end +Synchronizer_m = Sync_m + +class Sync + include Sync_m +end +Synchronizer = Sync diff --git a/lib/tempfile.rb b/lib/tempfile.rb new file mode 100644 index 0000000..4d3a2f0 --- /dev/null +++ b/lib/tempfile.rb @@ -0,0 +1,218 @@ +# +# tempfile - manipulates temporary files +# +# $Id: tempfile.rb 24119 2009-07-15 11:57:41Z yugui $ +# + +require 'delegate' +require 'tmpdir' +require 'thread' + +# A class for managing temporary files. This library is written to be +# thread safe. +class Tempfile < DelegateClass(File) + MAX_TRY = 10 + @@cleanlist = [] + @@lock = Mutex.new + + # Creates a temporary file of mode 0600 in the temporary directory, + # opens it with mode "w+", and returns a Tempfile object which + # represents the created temporary file. A Tempfile object can be + # treated just like a normal File object. + # + # The basename parameter is used to determine the name of a + # temporary file. If an Array is given, the first element is used + # as prefix string and the second as suffix string, respectively. + # Otherwise it is treated as prefix string. + # + # If tmpdir is omitted, the temporary directory is determined by + # Dir::tmpdir provided by 'tmpdir.rb'. + # When $SAFE > 0 and the given tmpdir is tainted, it uses + # /tmp. (Note that ENV values are tainted by default) + def initialize(basename, *rest) + # I wish keyword argument settled soon. + if opts = Hash.try_convert(rest[-1]) + rest.pop + end + tmpdir = rest[0] || Dir::tmpdir + if $SAFE > 0 and tmpdir.tainted? + tmpdir = '/tmp' + end + + lock = tmpname = nil + n = failure = 0 + @@lock.synchronize { + begin + begin + tmpname = File.join(tmpdir, make_tmpname(basename, n)) + lock = tmpname + '.lock' + n += 1 + end while @@cleanlist.include?(tmpname) or + File.exist?(lock) or File.exist?(tmpname) + Dir.mkdir(lock) + rescue + failure += 1 + retry if failure < MAX_TRY + raise "cannot generate tempfile `%s'" % tmpname + end + } + + @data = [tmpname] + @clean_proc = Tempfile.callback(@data) + ObjectSpace.define_finalizer(self, @clean_proc) + + if opts.nil? + opts = [] + else + opts = [opts] + end + @tmpfile = File.open(tmpname, File::RDWR|File::CREAT|File::EXCL, 0600, *opts) + @tmpname = tmpname + @@cleanlist << @tmpname + @data[1] = @tmpfile + @data[2] = @@cleanlist + + super(@tmpfile) + + # Now we have all the File/IO methods defined, you must not + # carelessly put bare puts(), etc. after this. + + Dir.rmdir(lock) + end + + def make_tmpname(basename, n) + case basename + when Array + prefix, suffix = *basename + else + prefix, suffix = basename, '' + end + + t = Time.now.strftime("%Y%m%d") + path = "#{prefix}#{t}-#{$$}-#{rand(0x100000000).to_s(36)}-#{n}#{suffix}" + end + private :make_tmpname + + # Opens or reopens the file with mode "r+". + def open + @tmpfile.close if @tmpfile + @tmpfile = File.open(@tmpname, 'r+') + @data[1] = @tmpfile + __setobj__(@tmpfile) + end + + def _close # :nodoc: + @tmpfile.close if @tmpfile + @tmpfile = nil + @data[1] = nil if @data + end + protected :_close + + #Closes the file. If the optional flag is true, unlinks the file + # after closing. + # + # If you don't explicitly unlink the temporary file, the removal + # will be delayed until the object is finalized. + def close(unlink_now=false) + if unlink_now + close! + else + _close + end + end + + # Closes and unlinks the file. + def close! + _close + @clean_proc.call + ObjectSpace.undefine_finalizer(self) + @data = @tmpname = nil + end + + # Unlinks the file. On UNIX-like systems, it is often a good idea + # to unlink a temporary file immediately after creating and opening + # it, because it leaves other programs zero chance to access the + # file. + def unlink + # keep this order for thread safeness + begin + if File.exist?(@tmpname) + closed? or close + File.unlink(@tmpname) + end + @@cleanlist.delete(@tmpname) + @data = @tmpname = nil + ObjectSpace.undefine_finalizer(self) + rescue Errno::EACCES + # may not be able to unlink on Windows; just ignore + end + end + alias delete unlink + + # Returns the full path name of the temporary file. + def path + @tmpname + end + + # Returns the size of the temporary file. As a side effect, the IO + # buffer is flushed before determining the size. + def size + if @tmpfile + @tmpfile.flush + @tmpfile.stat.size + else + 0 + end + end + alias length size + + class << self + def callback(data) # :nodoc: + pid = $$ + Proc.new { + if pid == $$ + path, tmpfile, cleanlist = *data + + print "removing ", path, "..." if $DEBUG + + tmpfile.close if tmpfile + + # keep this order for thread safeness + File.unlink(path) if File.exist?(path) + cleanlist.delete(path) if cleanlist + + print "done\n" if $DEBUG + end + } + end + + # If no block is given, this is a synonym for new(). + # + # If a block is given, it will be passed tempfile as an argument, + # and the tempfile will automatically be closed when the block + # terminates. The call returns the value of the block. + def open(*args) + tempfile = new(*args) + + if block_given? + begin + yield(tempfile) + ensure + tempfile.close + end + else + tempfile + end + end + end +end + +if __FILE__ == $0 +# $DEBUG = true + f = Tempfile.new("foo") + f.print("foo\n") + f.close + f.open + p f.gets # => "foo\n" + f.close! +end diff --git a/lib/test/unit.rb b/lib/test/unit.rb new file mode 100644 index 0000000..ec248c3 --- /dev/null +++ b/lib/test/unit.rb @@ -0,0 +1,66 @@ +# test/unit compatibility layer using minitest. + +require 'minitest/unit' +require 'test/unit/assertions' +require 'test/unit/testcase' + +module Test + module Unit + TEST_UNIT_IMPLEMENTATION = 'test/unit compatibility layer using minitest' + + def self.setup_argv(original_argv=ARGV) + minitest_argv = [] + files = [] + reject = [] + original_argv = original_argv.dup + while arg = original_argv.shift + case arg + when '-v' + minitest_argv << '-v' + when '-n', '--name' + minitest_argv << arg + minitest_argv << original_argv.shift + when '-x' + reject << original_argv.shift + else + files << arg + end + end + + if block_given? + files = yield files + end + + files.map! {|f| + f = f.gsub(Regexp.compile(Regexp.quote(File::ALT_SEPARATOR)), File::SEPARATOR) if File::ALT_SEPARATOR + if File.directory? f + Dir["#{f}/**/test_*.rb"] + elsif File.file? f + f + else + raise ArgumentError, "file not found: #{f}" + end + } + files.flatten! + + reject_pat = Regexp.union(reject.map {|r| /#{r}/ }) + files.reject! {|f| reject_pat =~ f } + + files.each {|f| + d = File.dirname(File.expand_path(f)) + unless $:.include? d + $: << d + end + begin + require f + rescue LoadError + puts "#{f}: #{$!}" + end + } + + ARGV.replace minitest_argv + end + end +end + +MiniTest::Unit.autorun diff --git a/lib/test/unit/assertions.rb b/lib/test/unit/assertions.rb new file mode 100644 index 0000000..ac3ecf9 --- /dev/null +++ b/lib/test/unit/assertions.rb @@ -0,0 +1,122 @@ +require 'minitest/unit' +require 'pp' + +module Test + module Unit + module Assertions + include MiniTest::Assertions + + def mu_pp(obj) + obj.pretty_inspect.chomp + end + + def assert_raise(*args, &b) + assert_raises(*args, &b) + end + + def assert_nothing_raised(*args) + self._assertions += 1 + if Module === args.last + msg = nil + else + msg = args.pop + end + begin + line = __LINE__; yield + rescue Exception => e + bt = e.backtrace + as = e.instance_of?(MiniTest::Assertion) + if as + ans = /\A#{Regexp.quote(__FILE__)}:#{line}:in /o + bt.reject! {|line| ans =~ line} + end + if ((args.empty? && !as) || + args.any? {|a| a.instance_of?(Module) ? e.is_a?(a) : e.class == a }) + msg = message(msg) { "Exception raised:\n<#{mu_pp(e)}>" } + raise MiniTest::Assertion, msg.call, bt + else + raise + end + end + nil + end + + def assert_nothing_thrown(msg=nil) + begin + yield + rescue ArgumentError => error + raise error if /\Auncaught throw (.+)\z/m !~ error.message + msg = message(msg) { "<#{$1}> was thrown when nothing was expected" } + flunk(msg) + end + assert(true, "Expected nothing to be thrown") + end + + def assert_equal(exp, act, msg = nil) + msg = message(msg) { + exp_str = mu_pp(exp) + act_str = mu_pp(act) + exp_comment = '' + act_comment = '' + if exp_str == act_str + if (exp.is_a?(String) && act.is_a?(String)) || + (exp.is_a?(Regexp) && act.is_a?(Regexp)) + exp_comment = " (#{exp.encoding})" + act_comment = " (#{act.encoding})" + elsif exp.is_a?(Float) && act.is_a?(Float) + exp_str = "%\#.#{Float::DIG+2}g" % exp + act_str = "%\#.#{Float::DIG+2}g" % act + elsif exp.is_a?(Time) && act.is_a?(Time) + exp_comment = " (nsec=#{exp.nsec})" + act_comment = " (nsec=#{act.nsec})" + end + elsif !Encoding.compatible?(exp_str, act_str) + if exp.is_a?(String) && act.is_a?(String) + exp_str = exp.dump + act_str = act.dump + exp_comment = " (#{exp.encoding})" + act_comment = " (#{act.encoding})" + else + exp_str = exp_str.dump + act_str = act_str.dump + end + end + "<#{exp_str}>#{exp_comment} expected but was\n<#{act_str}>#{act_comment}" + } + assert(exp == act, msg) + end + + def assert_not_nil(exp, msg=nil) + msg = message(msg) { "<#{mu_pp(exp)}> expected to not be nil" } + assert(!exp.nil?, msg) + end + + def assert_not_equal(exp, act, msg=nil) + msg = message(msg) { "<#{mu_pp(exp)}> expected to be != to\n<#{mu_pp(act)}>" } + assert(exp != act, msg) + end + + def assert_no_match(regexp, string, msg=nil) + assert_instance_of(Regexp, regexp, "The first argument to assert_no_match should be a Regexp.") + self._assertions -= 1 + msg = message(msg) { "<#{mu_pp(regexp)}> expected to not match\n<#{mu_pp(string)}>" } + assert(regexp !~ string, msg) + end + + def assert_not_same(expected, actual, message="") + msg = message(msg) { build_message(message, < +with id expected to not be equal\\? to + +with id . +EOT + assert(!actual.equal?(expected), msg) + end + + def build_message(head, template=nil, *arguments) + template &&= template.chomp + template.gsub(/\?/) { mu_pp(arguments.shift) } + end + end + end +end diff --git a/lib/test/unit/testcase.rb b/lib/test/unit/testcase.rb new file mode 100644 index 0000000..89aa0f3 --- /dev/null +++ b/lib/test/unit/testcase.rb @@ -0,0 +1,12 @@ +require 'test/unit/assertions' + +module Test + module Unit + class TestCase < MiniTest::Unit::TestCase + include Assertions + def self.test_order + :sorted + end + end + end +end diff --git a/lib/thread.rb b/lib/thread.rb new file mode 100644 index 0000000..e5585c3 --- /dev/null +++ b/lib/thread.rb @@ -0,0 +1,367 @@ +# +# thread.rb - thread support classes +# by Yukihiro Matsumoto +# +# Copyright (C) 2001 Yukihiro Matsumoto +# Copyright (C) 2000 Network Applied Communication Laboratory, Inc. +# Copyright (C) 2000 Information-technology Promotion Agency, Japan +# + +unless defined? Thread + raise "Thread not available for this ruby interpreter" +end + +unless defined? ThreadError + class ThreadError < StandardError + end +end + +if $DEBUG + Thread.abort_on_exception = true +end + +# +# ConditionVariable objects augment class Mutex. Using condition variables, +# it is possible to suspend while in the middle of a critical section until a +# resource becomes available. +# +# Example: +# +# require 'thread' +# +# mutex = Mutex.new +# resource = ConditionVariable.new +# +# a = Thread.new { +# mutex.synchronize { +# # Thread 'a' now needs the resource +# resource.wait(mutex) +# # 'a' can now have the resource +# } +# } +# +# b = Thread.new { +# mutex.synchronize { +# # Thread 'b' has finished using the resource +# resource.signal +# } +# } +# +class ConditionVariable + # + # Creates a new ConditionVariable + # + def initialize + @waiters = [] + @waiters_mutex = Mutex.new + end + + # + # Releases the lock held in +mutex+ and waits; reacquires the lock on wakeup. + # + def wait(mutex) + begin + # TODO: mutex should not be used + @waiters_mutex.synchronize do + @waiters.push(Thread.current) + end + mutex.sleep + end + end + + # + # Wakes up the first thread in line waiting for this lock. + # + def signal + begin + t = @waiters_mutex.synchronize { @waiters.shift } + t.run if t + rescue ThreadError + retry + end + end + + # + # Wakes up all threads waiting for this lock. + # + def broadcast + # TODO: imcomplete + waiters0 = nil + @waiters_mutex.synchronize do + waiters0 = @waiters.dup + @waiters.clear + end + for t in waiters0 + begin + t.run + rescue ThreadError + end + end + end +end + +# +# This class provides a way to synchronize communication between threads. +# +# Example: +# +# require 'thread' +# +# queue = Queue.new +# +# producer = Thread.new do +# 5.times do |i| +# sleep rand(i) # simulate expense +# queue << i +# puts "#{i} produced" +# end +# end +# +# consumer = Thread.new do +# 5.times do |i| +# value = queue.pop +# sleep rand(i/2) # simulate expense +# puts "consumed #{value}" +# end +# end +# +# consumer.join +# +class Queue + # + # Creates a new queue. + # + def initialize + @que = [] + @waiting = [] + @que.taint # enable tainted comunication + @waiting.taint + self.taint + @mutex = Mutex.new + end + + # + # Pushes +obj+ to the queue. + # + def push(obj) + t = nil + @mutex.synchronize{ + @que.push obj + begin + t = @waiting.shift + t.wakeup if t + rescue ThreadError + retry + end + } + begin + t.run if t + rescue ThreadError + end + end + + # + # Alias of push + # + alias << push + + # + # Alias of push + # + alias enq push + + # + # Retrieves data from the queue. If the queue is empty, the calling thread is + # suspended until data is pushed onto the queue. If +non_block+ is true, the + # thread isn't suspended, and an exception is raised. + # + def pop(non_block=false) + while true + @mutex.synchronize{ + if @que.empty? + raise ThreadError, "queue empty" if non_block + @waiting.push Thread.current + @mutex.sleep + else + return @que.shift + end + } + end + end + + # + # Alias of pop + # + alias shift pop + + # + # Alias of pop + # + alias deq pop + + # + # Returns +true+ if the queue is empty. + # + def empty? + @que.empty? + end + + # + # Removes all objects from the queue. + # + def clear + @que.clear + end + + # + # Returns the length of the queue. + # + def length + @que.length + end + + # + # Alias of length. + # + alias size length + + # + # Returns the number of threads waiting on the queue. + # + def num_waiting + @waiting.size + end +end + +# +# This class represents queues of specified size capacity. The push operation +# may be blocked if the capacity is full. +# +# See Queue for an example of how a SizedQueue works. +# +class SizedQueue < Queue + # + # Creates a fixed-length queue with a maximum size of +max+. + # + def initialize(max) + raise ArgumentError, "queue size must be positive" unless max > 0 + @max = max + @queue_wait = [] + @queue_wait.taint # enable tainted comunication + super() + end + + # + # Returns the maximum size of the queue. + # + def max + @max + end + + # + # Sets the maximum size of the queue. + # + def max=(max) + diff = nil + @mutex.synchronize { + if max <= @max + @max = max + else + diff = max - @max + @max = max + end + } + if diff + diff.times do + begin + t = @queue_wait.shift + t.run if t + rescue ThreadError + retry + end + end + end + max + end + + # + # Pushes +obj+ to the queue. If there is no space left in the queue, waits + # until space becomes available. + # + def push(obj) + t = nil + @mutex.synchronize{ + while true + break if @que.length < @max + @queue_wait.push Thread.current + @mutex.sleep + end + + @que.push obj + begin + t = @waiting.shift + t.wakeup if t + rescue ThreadError + retry + end + } + + begin + t.run if t + rescue ThreadError + end + end + + # + # Alias of push + # + alias << push + + # + # Alias of push + # + alias enq push + + # + # Retrieves data from the queue and runs a waiting thread, if any. + # + def pop(*args) + retval = super + t = nil + @mutex.synchronize { + if @que.length < @max + begin + t = @queue_wait.shift + t.wakeup if t + rescue ThreadError + retry + end + end + } + begin + t.run if t + rescue ThreadError + end + retval + end + + # + # Alias of pop + # + alias shift pop + + # + # Alias of pop + # + alias deq pop + + # + # Returns the number of threads waiting on the queue. + # + def num_waiting + @waiting.size + @queue_wait.size + end +end + +# Documentation comments: +# - How do you make RDoc inherit documentation from superclass? diff --git a/lib/thwait.rb b/lib/thwait.rb new file mode 100644 index 0000000..029b259 --- /dev/null +++ b/lib/thwait.rb @@ -0,0 +1,168 @@ +# +# thwait.rb - thread synchronization class +# $Release Version: 0.9 $ +# $Revision: 1.3 $ +# by Keiju ISHITSUKA(Nihpon Rational Software Co.,Ltd.) +# +# -- +# feature: +# provides synchronization for multiple threads. +# +# class methods: +# * ThreadsWait.all_waits(thread1,...) +# waits until all of specified threads are terminated. +# if a block is supplied for the method, evaluates it for +# each thread termination. +# * th = ThreadsWait.new(thread1,...) +# creates synchronization object, specifying thread(s) to wait. +# +# methods: +# * th.threads +# list threads to be synchronized +# * th.empty? +# is there any thread to be synchronized. +# * th.finished? +# is there already terminated thread. +# * th.join(thread1,...) +# wait for specified thread(s). +# * th.join_nowait(threa1,...) +# specifies thread(s) to wait. non-blocking. +# * th.next_wait +# waits until any of specified threads is terminated. +# * th.all_waits +# waits until all of specified threads are terminated. +# if a block is supplied for the method, evaluates it for +# each thread termination. +# + +require "thread.rb" +require "e2mmap.rb" + +# +# This class watches for termination of multiple threads. Basic functionality +# (wait until specified threads have terminated) can be accessed through the +# class method ThreadsWait::all_waits. Finer control can be gained using +# instance methods. +# +# Example: +# +# ThreadsWait.all_wait(thr1, thr2, ...) do |t| +# STDERR.puts "Thread #{t} has terminated." +# end +# +class ThreadsWait + RCS_ID='-$Id: thwait.rb,v 1.3 1998/06/26 03:19:34 keiju Exp keiju $-' + + extend Exception2MessageMapper + def_exception("ErrNoWaitingThread", "No threads for waiting.") + def_exception("ErrNoFinishedThread", "No finished threads.") + + # + # Waits until all specified threads have terminated. If a block is provided, + # it is executed for each thread termination. + # + def ThreadsWait.all_waits(*threads) # :yield: thread + tw = ThreadsWait.new(*threads) + if block_given? + tw.all_waits do |th| + yield th + end + else + tw.all_waits + end + end + + # + # Creates a ThreadsWait object, specifying the threads to wait on. + # Non-blocking. + # + def initialize(*threads) + @threads = [] + @wait_queue = Queue.new + join_nowait(*threads) unless threads.empty? + end + + # Returns the array of threads in the wait queue. + attr :threads + + # + # Returns +true+ if there are no threads to be synchronized. + # + def empty? + @threads.empty? + end + + # + # Returns +true+ if any thread has terminated. + # + def finished? + !@wait_queue.empty? + end + + # + # Waits for specified threads to terminate. + # + def join(*threads) + join_nowait(*threads) + next_wait + end + + # + # Specifies the threads that this object will wait for, but does not actually + # wait. + # + def join_nowait(*threads) + threads.flatten! + @threads.concat threads + for th in threads + Thread.start(th) do |t| + begin + t.join + ensure + @wait_queue.push t + end + end + end + end + + # + # Waits until any of the specified threads has terminated, and returns the one + # that does. + # + # If there is no thread to wait, raises +ErrNoWaitingThread+. If +nonblock+ + # is true, and there is no terminated thread, raises +ErrNoFinishedThread+. + # + def next_wait(nonblock = nil) + ThreadsWait.fail ErrNoWaitingThread if @threads.empty? + begin + @threads.delete(th = @wait_queue.pop(nonblock)) + th + rescue ThreadError + ThreadsWait.fail ErrNoFinishedThread + end + end + + # + # Waits until all of the specified threads are terminated. If a block is + # supplied for the method, it is executed for each thread termination. + # + # Raises exceptions in the same manner as +next_wait+. + # + def all_waits + until @threads.empty? + th = next_wait + yield th if block_given? + end + end +end + +ThWait = ThreadsWait + + +# Documentation comments: +# - Source of documentation is evenly split between Nutshell, existing +# comments, and my own rephrasing. +# - I'm not particularly confident that the comments are all exactly correct. +# - The history, etc., up the top appears in the RDoc output. Perhaps it would +# be better to direct that not to appear, and put something else there +# instead. diff --git a/lib/time.rb b/lib/time.rb new file mode 100644 index 0000000..3555571 --- /dev/null +++ b/lib/time.rb @@ -0,0 +1,869 @@ + +# +# == Introduction +# +# This library extends the Time class: +# * conversion between date string and time object. +# * date-time defined by RFC 2822 +# * HTTP-date defined by RFC 2616 +# * dateTime defined by XML Schema Part 2: Datatypes (ISO 8601) +# * various formats handled by Date._parse (string to time only) +# +# == Design Issues +# +# === Specialized interface +# +# This library provides methods dedicated to special purposes: +# * RFC 2822, RFC 2616 and XML Schema. +# * They makes usual life easier. +# +# === Doesn't depend on strftime +# +# This library doesn't use +strftime+. Especially #rfc2822 doesn't depend +# on +strftime+ because: +# +# * %a and %b are locale sensitive +# +# Since they are locale sensitive, they may be replaced to +# invalid weekday/month name in some locales. +# Since ruby-1.6 doesn't invoke setlocale by default, +# the problem doesn't arise until some external library invokes setlocale. +# Ruby/GTK is the example of such library. +# +# * %z is not portable +# +# %z is required to generate zone in date-time of RFC 2822 +# but it is not portable. +# +# == Revision Information +# +# $Id$ +# + +require 'date/format' + +# +# Implements the extensions to the Time class that are described in the +# documentation for the time.rb library. +# +class Time + class << Time + + ZoneOffset = { + 'UTC' => 0, + # ISO 8601 + 'Z' => 0, + # RFC 822 + 'UT' => 0, 'GMT' => 0, + 'EST' => -5, 'EDT' => -4, + 'CST' => -6, 'CDT' => -5, + 'MST' => -7, 'MDT' => -6, + 'PST' => -8, 'PDT' => -7, + # Following definition of military zones is original one. + # See RFC 1123 and RFC 2822 for the error in RFC 822. + 'A' => +1, 'B' => +2, 'C' => +3, 'D' => +4, 'E' => +5, 'F' => +6, + 'G' => +7, 'H' => +8, 'I' => +9, 'K' => +10, 'L' => +11, 'M' => +12, + 'N' => -1, 'O' => -2, 'P' => -3, 'Q' => -4, 'R' => -5, 'S' => -6, + 'T' => -7, 'U' => -8, 'V' => -9, 'W' => -10, 'X' => -11, 'Y' => -12, + } + def zone_offset(zone, year=self.now.year) + off = nil + zone = zone.upcase + if /\A([+-])(\d\d):?(\d\d)\z/ =~ zone + off = ($1 == '-' ? -1 : 1) * ($2.to_i * 60 + $3.to_i) * 60 + elsif /\A[+-]\d\d\z/ =~ zone + off = zone.to_i * 3600 + elsif ZoneOffset.include?(zone) + off = ZoneOffset[zone] * 3600 + elsif ((t = self.local(year, 1, 1)).zone.upcase == zone rescue false) + off = t.utc_offset + elsif ((t = self.local(year, 7, 1)).zone.upcase == zone rescue false) + off = t.utc_offset + end + off + end + + def zone_utc?(zone) + # * +0000 + # In RFC 2822, +0000 indicate a time zone at Universal Time. + # Europe/London is "a time zone at Universal Time" in Winter. + # Europe/Lisbon is "a time zone at Universal Time" in Winter. + # Atlantic/Reykjavik is "a time zone at Universal Time". + # Africa/Dakar is "a time zone at Universal Time". + # So +0000 is a local time such as Europe/London, etc. + # * GMT + # GMT is used as a time zone abbreviation in Europe/London, + # Africa/Dakar, etc. + # So it is a local time. + # + # * -0000, -00:00 + # In RFC 2822, -0000 the date-time contains no information about the + # local time zone. + # In RFC 3339, -00:00 is used for the time in UTC is known, + # but the offset to local time is unknown. + # They are not appropriate for specific time zone such as + # Europe/London because time zone neutral, + # So -00:00 and -0000 are treated as UTC. + if /\A(?:-00:00|-0000|-00|UTC|Z|UT)\z/i =~ zone + true + else + false + end + end + private :zone_utc? + + LeapYearMonthDays = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + CommonYearMonthDays = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + def month_days(y, m) + if ((y % 4 == 0) && (y % 100 != 0)) || (y % 400 == 0) + LeapYearMonthDays[m-1] + else + CommonYearMonthDays[m-1] + end + end + private :month_days + + def apply_offset(year, mon, day, hour, min, sec, off) + if off < 0 + off = -off + off, o = off.divmod(60) + if o != 0 then sec += o; o, sec = sec.divmod(60); off += o end + off, o = off.divmod(60) + if o != 0 then min += o; o, min = min.divmod(60); off += o end + off, o = off.divmod(24) + if o != 0 then hour += o; o, hour = hour.divmod(24); off += o end + if off != 0 + day += off + if month_days(year, mon) < day + mon += 1 + if 12 < mon + mon = 1 + year += 1 + end + day = 1 + end + end + elsif 0 < off + off, o = off.divmod(60) + if o != 0 then sec -= o; o, sec = sec.divmod(60); off -= o end + off, o = off.divmod(60) + if o != 0 then min -= o; o, min = min.divmod(60); off -= o end + off, o = off.divmod(24) + if o != 0 then hour -= o; o, hour = hour.divmod(24); off -= o end + if off != 0 then + day -= off + if day < 1 + mon -= 1 + if mon < 1 + year -= 1 + mon = 12 + end + day = month_days(year, mon) + end + end + end + return year, mon, day, hour, min, sec + end + private :apply_offset + + def make_time(year, mon, day, hour, min, sec, sec_fraction, zone, now) + usec = nil + usec = sec_fraction * 1000000 if sec_fraction + if now + begin + break if year; year = now.year + break if mon; mon = now.mon + break if day; day = now.day + break if hour; hour = now.hour + break if min; min = now.min + break if sec; sec = now.sec + break if sec_fraction; usec = now.tv_usec + end until true + end + + year ||= 1970 + mon ||= 1 + day ||= 1 + hour ||= 0 + min ||= 0 + sec ||= 0 + usec ||= 0 + + off = nil + off = zone_offset(zone, year) if zone + + if off + year, mon, day, hour, min, sec = + apply_offset(year, mon, day, hour, min, sec, off) + t = self.utc(year, mon, day, hour, min, sec, usec) + t.localtime if !zone_utc?(zone) + t + else + self.local(year, mon, day, hour, min, sec, usec) + end + end + private :make_time + + # + # Parses +date+ using Date._parse and converts it to a Time object. + # + # If a block is given, the year described in +date+ is converted by the + # block. For example: + # + # Time.parse(...) {|y| y < 100 ? (y >= 69 ? y + 1900 : y + 2000) : y} + # + # If the upper components of the given time are broken or missing, they are + # supplied with those of +now+. For the lower components, the minimum + # values (1 or 0) are assumed if broken or missing. For example: + # + # # Suppose it is "Thu Nov 29 14:33:20 GMT 2001" now and + # # your timezone is GMT: + # Time.parse("16:30") #=> Thu Nov 29 16:30:00 GMT 2001 + # Time.parse("7/23") #=> Mon Jul 23 00:00:00 GMT 2001 + # Time.parse("Aug 31") #=> Fri Aug 31 00:00:00 GMT 2001 + # + # Since there are numerous conflicts among locally defined timezone + # abbreviations all over the world, this method is not made to + # understand all of them. For example, the abbreviation "CST" is + # used variously as: + # + # -06:00 in America/Chicago, + # -05:00 in America/Havana, + # +08:00 in Asia/Harbin, + # +09:30 in Australia/Darwin, + # +10:30 in Australia/Adelaide, + # etc. + # + # Based on the fact, this method only understands the timezone + # abbreviations described in RFC 822 and the system timezone, in the + # order named. (i.e. a definition in RFC 822 overrides the system + # timezone definition.) The system timezone is taken from + # Time.local(year, 1, 1).zone and + # Time.local(year, 7, 1).zone. + # If the extracted timezone abbreviation does not match any of them, + # it is ignored and the given time is regarded as a local time. + # + # ArgumentError is raised if Date._parse cannot extract information from + # +date+ or Time class cannot represent specified date. + # + # This method can be used as fail-safe for other parsing methods as: + # + # Time.rfc2822(date) rescue Time.parse(date) + # Time.httpdate(date) rescue Time.parse(date) + # Time.xmlschema(date) rescue Time.parse(date) + # + # A failure for Time.parse should be checked, though. + # + def parse(date, now=self.now) + d = Date._parse(date, false) + year = d[:year] + year = yield(year) if year && block_given? + make_time(year, d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction], d[:zone], now) + end + + # + # Parses +date+ using Date._strptime and converts it to a Time object. + # + # If a block is given, the year described in +date+ is converted by the + # block. For example: + # + # Time.strptime(...) {|y| y < 100 ? (y >= 69 ? y + 1900 : y + 2000) : y} + def strptime(date, format, now=self.now) + d = Date._strptime(date, format) + raise ArgumentError, "invalid strptime format - `#{format}'" unless d + year = d[:year] + year = yield(year) if year && block_given? + make_time(year, d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction], d[:zone], now) + end + + MonthValue = { + 'JAN' => 1, 'FEB' => 2, 'MAR' => 3, 'APR' => 4, 'MAY' => 5, 'JUN' => 6, + 'JUL' => 7, 'AUG' => 8, 'SEP' => 9, 'OCT' =>10, 'NOV' =>11, 'DEC' =>12 + } + + # + # Parses +date+ as date-time defined by RFC 2822 and converts it to a Time + # object. The format is identical to the date format defined by RFC 822 and + # updated by RFC 1123. + # + # ArgumentError is raised if +date+ is not compliant with RFC 2822 + # or Time class cannot represent specified date. + # + # See #rfc2822 for more information on this format. + # + def rfc2822(date) + if /\A\s* + (?:(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s*,\s*)? + (\d{1,2})\s+ + (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+ + (\d{2,})\s+ + (\d{2})\s* + :\s*(\d{2})\s* + (?::\s*(\d{2}))?\s+ + ([+-]\d{4}| + UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[A-IK-Z])/ix =~ date + # Since RFC 2822 permit comments, the regexp has no right anchor. + day = $1.to_i + mon = MonthValue[$2.upcase] + year = $3.to_i + hour = $4.to_i + min = $5.to_i + sec = $6 ? $6.to_i : 0 + zone = $7 + + # following year completion is compliant with RFC 2822. + year = if year < 50 + 2000 + year + elsif year < 1000 + 1900 + year + else + year + end + + year, mon, day, hour, min, sec = + apply_offset(year, mon, day, hour, min, sec, zone_offset(zone)) + t = self.utc(year, mon, day, hour, min, sec) + t.localtime if !zone_utc?(zone) + t + else + raise ArgumentError.new("not RFC 2822 compliant date: #{date.inspect}") + end + end + alias rfc822 rfc2822 + + # + # Parses +date+ as HTTP-date defined by RFC 2616 and converts it to a Time + # object. + # + # ArgumentError is raised if +date+ is not compliant with RFC 2616 or Time + # class cannot represent specified date. + # + # See #httpdate for more information on this format. + # + def httpdate(date) + if /\A\s* + (?:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\x20 + (\d{2})\x20 + (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\x20 + (\d{4})\x20 + (\d{2}):(\d{2}):(\d{2})\x20 + GMT + \s*\z/ix =~ date + self.rfc2822(date) + elsif /\A\s* + (?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),\x20 + (\d\d)-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-(\d\d)\x20 + (\d\d):(\d\d):(\d\d)\x20 + GMT + \s*\z/ix =~ date + year = $3.to_i + if year < 50 + year += 2000 + else + year += 1900 + end + self.utc(year, $2, $1.to_i, $4.to_i, $5.to_i, $6.to_i) + elsif /\A\s* + (?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\x20 + (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\x20 + (\d\d|\x20\d)\x20 + (\d\d):(\d\d):(\d\d)\x20 + (\d{4}) + \s*\z/ix =~ date + self.utc($6.to_i, MonthValue[$1.upcase], $2.to_i, + $3.to_i, $4.to_i, $5.to_i) + else + raise ArgumentError.new("not RFC 2616 compliant date: #{date.inspect}") + end + end + + # + # Parses +date+ as dateTime defined by XML Schema and converts it to a Time + # object. The format is restricted version of the format defined by ISO + # 8601. + # + # ArgumentError is raised if +date+ is not compliant with the format or Time + # class cannot represent specified date. + # + # See #xmlschema for more information on this format. + # + def xmlschema(date) + if /\A\s* + (-?\d+)-(\d\d)-(\d\d) + T + (\d\d):(\d\d):(\d\d) + (\.\d+)? + (Z|[+-]\d\d:\d\d)? + \s*\z/ix =~ date + year = $1.to_i + mon = $2.to_i + day = $3.to_i + hour = $4.to_i + min = $5.to_i + sec = $6.to_i + usec = 0 + if $7 + usec = Rational($7) * 1000000 + end + if $8 + zone = $8 + year, mon, day, hour, min, sec = + apply_offset(year, mon, day, hour, min, sec, zone_offset(zone)) + self.utc(year, mon, day, hour, min, sec, usec) + else + self.local(year, mon, day, hour, min, sec, usec) + end + else + raise ArgumentError.new("invalid date: #{date.inspect}") + end + end + alias iso8601 xmlschema + end # class << self + + # + # Returns a string which represents the time as date-time defined by RFC 2822: + # + # day-of-week, DD month-name CCYY hh:mm:ss zone + # + # where zone is [+-]hhmm. + # + # If +self+ is a UTC time, -0000 is used as zone. + # + def rfc2822 + sprintf('%s, %02d %s %d %02d:%02d:%02d ', + RFC2822_DAY_NAME[wday], + day, RFC2822_MONTH_NAME[mon-1], year, + hour, min, sec) + + if utc? + '-0000' + else + off = utc_offset + sign = off < 0 ? '-' : '+' + sprintf('%s%02d%02d', sign, *(off.abs / 60).divmod(60)) + end + end + alias rfc822 rfc2822 + + RFC2822_DAY_NAME = [ + 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat' + ] + RFC2822_MONTH_NAME = [ + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ] + + # + # Returns a string which represents the time as rfc1123-date of HTTP-date + # defined by RFC 2616: + # + # day-of-week, DD month-name CCYY hh:mm:ss GMT + # + # Note that the result is always UTC (GMT). + # + def httpdate + t = dup.utc + sprintf('%s, %02d %s %d %02d:%02d:%02d GMT', + RFC2822_DAY_NAME[t.wday], + t.day, RFC2822_MONTH_NAME[t.mon-1], t.year, + t.hour, t.min, t.sec) + end + + # + # Returns a string which represents the time as dateTime defined by XML + # Schema: + # + # CCYY-MM-DDThh:mm:ssTZD + # CCYY-MM-DDThh:mm:ss.sssTZD + # + # where TZD is Z or [+-]hh:mm. + # + # If self is a UTC time, Z is used as TZD. [+-]hh:mm is used otherwise. + # + # +fractional_seconds+ specifies a number of digits of fractional seconds. + # Its default value is 0. + # + def xmlschema(fraction_digits=0) + sprintf('%d-%02d-%02dT%02d:%02d:%02d', + year, mon, day, hour, min, sec) + + if fraction_digits == 0 + '' + elsif fraction_digits <= 9 + '.' + sprintf('%09d', nsec)[0, fraction_digits] + else + '.' + sprintf('%09d', nsec) + '0' * (fraction_digits - 9) + end + + if utc? + 'Z' + else + off = utc_offset + sign = off < 0 ? '-' : '+' + sprintf('%s%02d:%02d', sign, *(off.abs / 60).divmod(60)) + end + end + alias iso8601 xmlschema +end + +if __FILE__ == $0 + require 'test/unit' + + class TimeExtentionTest < Test::Unit::TestCase # :nodoc: + def test_rfc822 + assert_equal(Time.utc(1976, 8, 26, 14, 30) + 4 * 3600, + Time.rfc2822("26 Aug 76 14:30 EDT")) + assert_equal(Time.utc(1976, 8, 27, 9, 32) + 7 * 3600, + Time.rfc2822("27 Aug 76 09:32 PDT")) + end + + def test_rfc2822 + assert_equal(Time.utc(1997, 11, 21, 9, 55, 6) + 6 * 3600, + Time.rfc2822("Fri, 21 Nov 1997 09:55:06 -0600")) + assert_equal(Time.utc(2003, 7, 1, 10, 52, 37) - 2 * 3600, + Time.rfc2822("Tue, 1 Jul 2003 10:52:37 +0200")) + assert_equal(Time.utc(1997, 11, 21, 10, 1, 10) + 6 * 3600, + Time.rfc2822("Fri, 21 Nov 1997 10:01:10 -0600")) + assert_equal(Time.utc(1997, 11, 21, 11, 0, 0) + 6 * 3600, + Time.rfc2822("Fri, 21 Nov 1997 11:00:00 -0600")) + assert_equal(Time.utc(1997, 11, 24, 14, 22, 1) + 8 * 3600, + Time.rfc2822("Mon, 24 Nov 1997 14:22:01 -0800")) + begin + Time.at(-1) + rescue ArgumentError + # ignore + else + assert_equal(Time.utc(1969, 2, 13, 23, 32, 54) + 3 * 3600 + 30 * 60, + Time.rfc2822("Thu, 13 Feb 1969 23:32:54 -0330")) + assert_equal(Time.utc(1969, 2, 13, 23, 32, 0) + 3 * 3600 + 30 * 60, + Time.rfc2822(" Thu, + 13 + Feb + 1969 + 23:32 + -0330 (Newfoundland Time)")) + end + assert_equal(Time.utc(1997, 11, 21, 9, 55, 6), + Time.rfc2822("21 Nov 97 09:55:06 GMT")) + assert_equal(Time.utc(1997, 11, 21, 9, 55, 6) + 6 * 3600, + Time.rfc2822("Fri, 21 Nov 1997 09 : 55 : 06 -0600")) + assert_raise(ArgumentError) { + # inner comment is not supported. + Time.rfc2822("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600") + } + end + + def test_rfc2616 + t = Time.utc(1994, 11, 6, 8, 49, 37) + assert_equal(t, Time.httpdate("Sun, 06 Nov 1994 08:49:37 GMT")) + assert_equal(t, Time.httpdate("Sunday, 06-Nov-94 08:49:37 GMT")) + assert_equal(t, Time.httpdate("Sun Nov 6 08:49:37 1994")) + assert_equal(Time.utc(1995, 11, 15, 6, 25, 24), + Time.httpdate("Wed, 15 Nov 1995 06:25:24 GMT")) + assert_equal(Time.utc(1995, 11, 15, 4, 58, 8), + Time.httpdate("Wed, 15 Nov 1995 04:58:08 GMT")) + assert_equal(Time.utc(1994, 11, 15, 8, 12, 31), + Time.httpdate("Tue, 15 Nov 1994 08:12:31 GMT")) + assert_equal(Time.utc(1994, 12, 1, 16, 0, 0), + Time.httpdate("Thu, 01 Dec 1994 16:00:00 GMT")) + assert_equal(Time.utc(1994, 10, 29, 19, 43, 31), + Time.httpdate("Sat, 29 Oct 1994 19:43:31 GMT")) + assert_equal(Time.utc(1994, 11, 15, 12, 45, 26), + Time.httpdate("Tue, 15 Nov 1994 12:45:26 GMT")) + assert_equal(Time.utc(1999, 12, 31, 23, 59, 59), + Time.httpdate("Fri, 31 Dec 1999 23:59:59 GMT")) + + assert_equal(Time.utc(2007, 12, 23, 11, 22, 33), + Time.httpdate('Sunday, 23-Dec-07 11:22:33 GMT')) + end + + def test_rfc3339 + t = Time.utc(1985, 4, 12, 23, 20, 50, 520000) + s = "1985-04-12T23:20:50.52Z" + assert_equal(t, Time.iso8601(s)) + assert_equal(s, t.iso8601(2)) + + t = Time.utc(1996, 12, 20, 0, 39, 57) + s = "1996-12-19T16:39:57-08:00" + assert_equal(t, Time.iso8601(s)) + # There is no way to generate time string with arbitrary timezone. + s = "1996-12-20T00:39:57Z" + assert_equal(t, Time.iso8601(s)) + assert_equal(s, t.iso8601) + + t = Time.utc(1990, 12, 31, 23, 59, 60) + s = "1990-12-31T23:59:60Z" + assert_equal(t, Time.iso8601(s)) + # leap second is representable only if timezone file has it. + s = "1990-12-31T15:59:60-08:00" + assert_equal(t, Time.iso8601(s)) + + begin + Time.at(-1) + rescue ArgumentError + # ignore + else + t = Time.utc(1937, 1, 1, 11, 40, 27, 870000) + s = "1937-01-01T12:00:27.87+00:20" + assert_equal(t, Time.iso8601(s)) + end + end + + # http://www.w3.org/TR/xmlschema-2/ + def test_xmlschema + assert_equal(Time.utc(1999, 5, 31, 13, 20, 0) + 5 * 3600, + Time.xmlschema("1999-05-31T13:20:00-05:00")) + assert_equal(Time.local(2000, 1, 20, 12, 0, 0), + Time.xmlschema("2000-01-20T12:00:00")) + assert_equal(Time.utc(2000, 1, 20, 12, 0, 0), + Time.xmlschema("2000-01-20T12:00:00Z")) + assert_equal(Time.utc(2000, 1, 20, 12, 0, 0) - 12 * 3600, + Time.xmlschema("2000-01-20T12:00:00+12:00")) + assert_equal(Time.utc(2000, 1, 20, 12, 0, 0) + 13 * 3600, + Time.xmlschema("2000-01-20T12:00:00-13:00")) + assert_equal(Time.utc(2000, 3, 4, 23, 0, 0) - 3 * 3600, + Time.xmlschema("2000-03-04T23:00:00+03:00")) + assert_equal(Time.utc(2000, 3, 4, 20, 0, 0), + Time.xmlschema("2000-03-04T20:00:00Z")) + assert_equal(Time.local(2000, 1, 15, 0, 0, 0), + Time.xmlschema("2000-01-15T00:00:00")) + assert_equal(Time.local(2000, 2, 15, 0, 0, 0), + Time.xmlschema("2000-02-15T00:00:00")) + assert_equal(Time.local(2000, 1, 15, 12, 0, 0), + Time.xmlschema("2000-01-15T12:00:00")) + assert_equal(Time.utc(2000, 1, 16, 12, 0, 0), + Time.xmlschema("2000-01-16T12:00:00Z")) + assert_equal(Time.local(2000, 1, 1, 12, 0, 0), + Time.xmlschema("2000-01-01T12:00:00")) + assert_equal(Time.utc(1999, 12, 31, 23, 0, 0), + Time.xmlschema("1999-12-31T23:00:00Z")) + assert_equal(Time.local(2000, 1, 16, 12, 0, 0), + Time.xmlschema("2000-01-16T12:00:00")) + assert_equal(Time.local(2000, 1, 16, 0, 0, 0), + Time.xmlschema("2000-01-16T00:00:00")) + assert_equal(Time.utc(2000, 1, 12, 12, 13, 14), + Time.xmlschema("2000-01-12T12:13:14Z")) + assert_equal(Time.utc(2001, 4, 17, 19, 23, 17, 300000), + Time.xmlschema("2001-04-17T19:23:17.3Z")) + assert_raise(ArgumentError) { Time.xmlschema("2000-01-01T00:00:00.+00:00") } + end + + def test_encode_xmlschema + t = Time.utc(2001, 4, 17, 19, 23, 17, 300000) + assert_equal("2001-04-17T19:23:17Z", t.xmlschema) + assert_equal("2001-04-17T19:23:17.3Z", t.xmlschema(1)) + assert_equal("2001-04-17T19:23:17.300000Z", t.xmlschema(6)) + assert_equal("2001-04-17T19:23:17.3000000Z", t.xmlschema(7)) + + t = Time.utc(2001, 4, 17, 19, 23, 17, 123456) + assert_equal("2001-04-17T19:23:17.1234560Z", t.xmlschema(7)) + assert_equal("2001-04-17T19:23:17.123456Z", t.xmlschema(6)) + assert_equal("2001-04-17T19:23:17.12345Z", t.xmlschema(5)) + assert_equal("2001-04-17T19:23:17.1Z", t.xmlschema(1)) + + begin + Time.at(-1) + rescue ArgumentError + # ignore + else + t = Time.utc(1960, 12, 31, 23, 0, 0, 123456) + assert_equal("1960-12-31T23:00:00.123456Z", t.xmlschema(6)) + end + + assert_equal(249, Time.xmlschema("2008-06-05T23:49:23.000249+09:00").usec) + end + + def test_completion + now = Time.local(2001,11,29,21,26,35) + assert_equal(Time.local( 2001,11,29,21,12), + Time.parse("2001/11/29 21:12", now)) + assert_equal(Time.local( 2001,11,29), + Time.parse("2001/11/29", now)) + assert_equal(Time.local( 2001,11,29), + Time.parse( "11/29", now)) + #assert_equal(Time.local(2001,11,1), Time.parse("Nov", now)) + assert_equal(Time.local( 2001,11,29,10,22), + Time.parse( "10:22", now)) + end + + def test_invalid + # They were actually used in some web sites. + assert_raise(ArgumentError) { Time.httpdate("1 Dec 2001 10:23:57 GMT") } + assert_raise(ArgumentError) { Time.httpdate("Sat, 1 Dec 2001 10:25:42 GMT") } + assert_raise(ArgumentError) { Time.httpdate("Sat, 1-Dec-2001 10:53:55 GMT") } + assert_raise(ArgumentError) { Time.httpdate("Saturday, 01-Dec-2001 10:15:34 GMT") } + assert_raise(ArgumentError) { Time.httpdate("Saturday, 01-Dec-101 11:10:07 GMT") } + assert_raise(ArgumentError) { Time.httpdate("Fri, 30 Nov 2001 21:30:00 JST") } + + # They were actually used in some mails. + assert_raise(ArgumentError) { Time.rfc2822("01-5-20") } + assert_raise(ArgumentError) { Time.rfc2822("7/21/00") } + assert_raise(ArgumentError) { Time.rfc2822("2001-8-28") } + assert_raise(ArgumentError) { Time.rfc2822("00-5-6 1:13:06") } + assert_raise(ArgumentError) { Time.rfc2822("2001-9-27 9:36:49") } + assert_raise(ArgumentError) { Time.rfc2822("2000-12-13 11:01:11") } + assert_raise(ArgumentError) { Time.rfc2822("2001/10/17 04:29:55") } + assert_raise(ArgumentError) { Time.rfc2822("9/4/2001 9:23:19 PM") } + assert_raise(ArgumentError) { Time.rfc2822("01 Nov 2001 09:04:31") } + assert_raise(ArgumentError) { Time.rfc2822("13 Feb 2001 16:4 GMT") } + assert_raise(ArgumentError) { Time.rfc2822("01 Oct 00 5:41:19 PM") } + assert_raise(ArgumentError) { Time.rfc2822("2 Jul 00 00:51:37 JST") } + assert_raise(ArgumentError) { Time.rfc2822("01 11 2001 06:55:57 -0500") } + assert_raise(ArgumentError) { Time.rfc2822("18 \343\366\356\341\370 2000") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, Oct 2001 18:53:32") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 2 Nov 2001 03:47:54") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 27 Jul 2001 11.14.14 +0200") } + assert_raise(ArgumentError) { Time.rfc2822("Thu, 2 Nov 2000 04:13:53 -600") } + assert_raise(ArgumentError) { Time.rfc2822("Wed, 5 Apr 2000 22:57:09 JST") } + assert_raise(ArgumentError) { Time.rfc2822("Mon, 11 Sep 2000 19:47:33 00000") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 28 Apr 2000 20:40:47 +-900") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 19 Jan 2001 8:15:36 AM -0500") } + assert_raise(ArgumentError) { Time.rfc2822("Thursday, Sep 27 2001 7:42:35 AM EST") } + assert_raise(ArgumentError) { Time.rfc2822("3/11/2001 1:31:57 PM Pacific Daylight Time") } + assert_raise(ArgumentError) { Time.rfc2822("Mi, 28 Mrz 2001 11:51:36") } + assert_raise(ArgumentError) { Time.rfc2822("P, 30 sept 2001 23:03:14") } + assert_raise(ArgumentError) { Time.rfc2822("fr, 11 aug 2000 18:39:22") } + assert_raise(ArgumentError) { Time.rfc2822("Fr, 21 Sep 2001 17:44:03 -1000") } + assert_raise(ArgumentError) { Time.rfc2822("Mo, 18 Jun 2001 19:21:40 -1000") } + assert_raise(ArgumentError) { Time.rfc2822("l\366, 12 aug 2000 18:53:20") } + assert_raise(ArgumentError) { Time.rfc2822("l\366, 26 maj 2001 00:15:58") } + assert_raise(ArgumentError) { Time.rfc2822("Dom, 30 Sep 2001 17:36:30") } + assert_raise(ArgumentError) { Time.rfc2822("%&, 31 %2/ 2000 15:44:47 -0500") } + assert_raise(ArgumentError) { Time.rfc2822("dom, 26 ago 2001 03:57:07 -0300") } + assert_raise(ArgumentError) { Time.rfc2822("ter, 04 set 2001 16:27:58 -0300") } + assert_raise(ArgumentError) { Time.rfc2822("Wen, 3 oct 2001 23:17:49 -0400") } + assert_raise(ArgumentError) { Time.rfc2822("Wen, 3 oct 2001 23:17:49 -0400") } + assert_raise(ArgumentError) { Time.rfc2822("ele, 11 h: 2000 12:42:15 -0500") } + assert_raise(ArgumentError) { Time.rfc2822("Tue, 14 Aug 2001 3:55:3 +0200") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 25 Aug 2000 9:3:48 +0800") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 1 Dec 2000 0:57:50 EST") } + assert_raise(ArgumentError) { Time.rfc2822("Mon, 7 May 2001 9:39:51 +0200") } + assert_raise(ArgumentError) { Time.rfc2822("Wed, 1 Aug 2001 16:9:15 +0200") } + assert_raise(ArgumentError) { Time.rfc2822("Wed, 23 Aug 2000 9:17:36 +0800") } + assert_raise(ArgumentError) { Time.rfc2822("Fri, 11 Aug 2000 10:4:42 +0800") } + assert_raise(ArgumentError) { Time.rfc2822("Sat, 15 Sep 2001 13:22:2 +0300") } + assert_raise(ArgumentError) { Time.rfc2822("Wed,16 \276\305\324\302 2001 20:06:25 +0800") } + assert_raise(ArgumentError) { Time.rfc2822("Wed,7 \312\256\322\273\324\302 2001 23:47:22 +0800") } + assert_raise(ArgumentError) { Time.rfc2822("=?iso-8859-1?Q?(=C5=DA),?= 10 2 2001 23:32:26 +0900 (JST)") } + assert_raise(ArgumentError) { Time.rfc2822("\307\341\314\343\332\311, 30 \344\346\335\343\310\321 2001 10:01:06") } + assert_raise(ArgumentError) { Time.rfc2822("=?iso-8859-1?Q?(=BF=E5),?= 12 =?iso-8859-1?Q?9=B7=EE?= 2001 14:52:41\n+0900 (JST)") } + end + + def test_zone_0000 + assert_equal(true, Time.parse("2000-01-01T00:00:00Z").utc?) + assert_equal(true, Time.parse("2000-01-01T00:00:00-00:00").utc?) + assert_equal(false, Time.parse("2000-01-01T00:00:00+00:00").utc?) + assert_equal(false, Time.parse("Sat, 01 Jan 2000 00:00:00 GMT").utc?) + assert_equal(true, Time.parse("Sat, 01 Jan 2000 00:00:00 -0000").utc?) + assert_equal(false, Time.parse("Sat, 01 Jan 2000 00:00:00 +0000").utc?) + assert_equal(false, Time.rfc2822("Sat, 01 Jan 2000 00:00:00 GMT").utc?) + assert_equal(true, Time.rfc2822("Sat, 01 Jan 2000 00:00:00 -0000").utc?) + assert_equal(false, Time.rfc2822("Sat, 01 Jan 2000 00:00:00 +0000").utc?) + assert_equal(true, Time.rfc2822("Sat, 01 Jan 2000 00:00:00 UTC").utc?) + end + + def test_rfc2822_utc_roundtrip_winter + t1 = Time.local(2008,12,1) + t2 = Time.rfc2822(t1.rfc2822) + assert_equal(t1.utc?, t2.utc?, "[ruby-dev:37126]") + end + + def test_rfc2822_utc_roundtrip_summer + t1 = Time.local(2008,8,1) + t2 = Time.rfc2822(t1.rfc2822) + assert_equal(t1.utc?, t2.utc?) + end + + def test_parse_leap_second + t = Time.utc(1998,12,31,23,59,59) + assert_equal(t, Time.parse("Thu Dec 31 23:59:59 UTC 1998")) + assert_equal(t, Time.parse("Fri Dec 31 23:59:59 -0000 1998"));t.localtime + assert_equal(t, Time.parse("Fri Jan 1 08:59:59 +0900 1999")) + assert_equal(t, Time.parse("Fri Jan 1 00:59:59 +0100 1999")) + assert_equal(t, Time.parse("Fri Dec 31 23:59:59 +0000 1998")) + assert_equal(t, Time.parse("Fri Dec 31 22:59:59 -0100 1998"));t.utc + t += 1 + assert_equal(t, Time.parse("Thu Dec 31 23:59:60 UTC 1998")) + assert_equal(t, Time.parse("Fri Dec 31 23:59:60 -0000 1998"));t.localtime + assert_equal(t, Time.parse("Fri Jan 1 08:59:60 +0900 1999")) + assert_equal(t, Time.parse("Fri Jan 1 00:59:60 +0100 1999")) + assert_equal(t, Time.parse("Fri Dec 31 23:59:60 +0000 1998")) + assert_equal(t, Time.parse("Fri Dec 31 22:59:60 -0100 1998"));t.utc + t += 1 if t.sec == 60 + assert_equal(t, Time.parse("Thu Jan 1 00:00:00 UTC 1999")) + assert_equal(t, Time.parse("Fri Jan 1 00:00:00 -0000 1999"));t.localtime + assert_equal(t, Time.parse("Fri Jan 1 09:00:00 +0900 1999")) + assert_equal(t, Time.parse("Fri Jan 1 01:00:00 +0100 1999")) + assert_equal(t, Time.parse("Fri Jan 1 00:00:00 +0000 1999")) + assert_equal(t, Time.parse("Fri Dec 31 23:00:00 -0100 1998")) + end + + def test_rfc2822_leap_second + t = Time.utc(1998,12,31,23,59,59) + assert_equal(t, Time.rfc2822("Thu, 31 Dec 1998 23:59:59 UTC")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 23:59:59 -0000"));t.localtime + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 08:59:59 +0900")) + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 00:59:59 +0100")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 23:59:59 +0000")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 22:59:59 -0100"));t.utc + t += 1 + assert_equal(t, Time.rfc2822("Thu, 31 Dec 1998 23:59:60 UTC")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 23:59:60 -0000"));t.localtime + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 08:59:60 +0900")) + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 00:59:60 +0100")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 23:59:60 +0000")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 22:59:60 -0100"));t.utc + t += 1 if t.sec == 60 + assert_equal(t, Time.rfc2822("Thu, 1 Jan 1999 00:00:00 UTC")) + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 00:00:00 -0000"));t.localtime + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 09:00:00 +0900")) + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 01:00:00 +0100")) + assert_equal(t, Time.rfc2822("Fri, 1 Jan 1999 00:00:00 +0000")) + assert_equal(t, Time.rfc2822("Fri, 31 Dec 1998 23:00:00 -0100")) + end + + def test_xmlschema_leap_second + t = Time.utc(1998,12,31,23,59,59) + assert_equal(t, Time.xmlschema("1998-12-31T23:59:59Z")) + assert_equal(t, Time.xmlschema("1998-12-31T23:59:59-00:00"));t.localtime + assert_equal(t, Time.xmlschema("1999-01-01T08:59:59+09:00")) + assert_equal(t, Time.xmlschema("1999-01-01T00:59:59+01:00")) + assert_equal(t, Time.xmlschema("1998-12-31T23:59:59+00:00")) + assert_equal(t, Time.xmlschema("1998-12-31T22:59:59-01:00"));t.utc + t += 1 + assert_equal(t, Time.xmlschema("1998-12-31T23:59:60Z")) + assert_equal(t, Time.xmlschema("1998-12-31T23:59:60-00:00"));t.localtime + assert_equal(t, Time.xmlschema("1999-01-01T08:59:60+09:00")) + assert_equal(t, Time.xmlschema("1999-01-01T00:59:60+01:00")) + assert_equal(t, Time.xmlschema("1998-12-31T23:59:60+00:00")) + assert_equal(t, Time.xmlschema("1998-12-31T22:59:60-01:00"));t.utc + t += 1 if t.sec == 60 + assert_equal(t, Time.xmlschema("1999-01-01T00:00:00Z")) + assert_equal(t, Time.xmlschema("1999-01-01T00:00:00-00:00"));t.localtime + assert_equal(t, Time.xmlschema("1999-01-01T09:00:00+09:00")) + assert_equal(t, Time.xmlschema("1999-01-01T01:00:00+01:00")) + assert_equal(t, Time.xmlschema("1999-01-01T00:00:00+00:00")) + assert_equal(t, Time.xmlschema("1998-12-31T23:00:00-01:00")) + end + + def test_xmlschema_fraction + assert_equal(500000, Time.xmlschema("2000-01-01T00:00:00.5+00:00").tv_usec) + end + + def test_ruby_talk_152866 + t = Time::xmlschema('2005-08-30T22:48:00-07:00') + assert_equal(31, t.day) + assert_equal(8, t.mon) + end + + def test_parse_fraction + assert_equal(500000, Time.parse("2000-01-01T00:00:00.5+00:00").tv_usec) + end + + def test_strptime + assert_equal(Time.utc(2005, 8, 28, 06, 54, 20), Time.strptime("28/Aug/2005:06:54:20 +0000", "%d/%b/%Y:%T %z")) + end + + def test_nsec + assert_equal(123456789, Time.xmlschema("2000-01-01T00:00:00.123456789+00:00").tv_nsec) + assert_equal(123456789, Time.parse("2000-01-01T00:00:00.123456789+00:00").tv_nsec) + end + end +end diff --git a/lib/timeout.rb b/lib/timeout.rb new file mode 100644 index 0000000..19ccb96 --- /dev/null +++ b/lib/timeout.rb @@ -0,0 +1,108 @@ +# = timeout.rb +# +# execution timeout +# +# = Synopsis +# +# require 'timeout' +# status = Timeout::timeout(5) { +# # Something that should be interrupted if it takes too much time... +# } +# +# = Description +# +# A way of performing a potentially long-running operation in a thread, and terminating +# it's execution if it hasn't finished by a fixed amount of time. +# +# Previous versions of timeout didn't provide use a module for namespace. This version +# provides both Timeout.timeout, and a backwards-compatible #timeout. +# +# = Copyright +# +# Copyright:: (C) 2000 Network Applied Communication Laboratory, Inc. +# Copyright:: (C) 2000 Information-technology Promotion Agency, Japan + +module Timeout + # Raised by Timeout#timeout when the block times out. + class Error < RuntimeError + end + class ExitException < ::Exception # :nodoc: + end + + THIS_FILE = /\A#{Regexp.quote(__FILE__)}:/o + CALLER_OFFSET = ((c = caller[0]) && THIS_FILE =~ c) ? 1 : 0 + + # Executes the method's block. If the block execution terminates before + # +sec+ seconds has passed, it returns the result value of the block. + # If not, it terminates the execution and raises +exception+ (which defaults + # to Timeout::Error). + # + # Note that this is both a method of module Timeout, so you can 'include Timeout' + # into your classes so they have a #timeout method, as well as a module method, + # so you can call it directly as Timeout.timeout(). + def timeout(sec, klass = nil) #:yield: +sec+ + return yield(sec) if sec == nil or sec.zero? + exception = klass || Class.new(ExitException) + begin + x = Thread.current + y = Thread.start { + sleep sec + x.raise exception, "execution expired" if x.alive? + } + return yield(sec) + rescue exception => e + rej = /\A#{Regexp.quote(__FILE__)}:#{__LINE__-4}\z/o + (bt = e.backtrace).reject! {|m| rej =~ m} + level = -caller(CALLER_OFFSET).size + while THIS_FILE =~ bt[level] + bt.delete_at(level) + level += 1 + end + raise if klass # if exception class is specified, it + # would be expected outside. + raise Error, e.message, e.backtrace + ensure + if y and y.alive? + y.kill + y.join # make sure y is dead. + end + end + end + + module_function :timeout +end + +# Identical to: +# +# Timeout::timeout(n, e, &block). +# +# Defined for backwards compatibility with earlier versions of timeout.rb, see +# Timeout#timeout. +def timeout(n, e = nil, &block) + Timeout::timeout(n, e, &block) +end + +# Another name for Timeout::Error, defined for backwards compatibility with +# earlier versions of timeout.rb. +TimeoutError = Timeout::Error + +if __FILE__ == $0 + p timeout(5) { + 45 + } + p timeout(5, TimeoutError) { + 45 + } + p timeout(nil) { + 54 + } + p timeout(0) { + 54 + } + p timeout(5) { + loop { + p 10 + sleep 1 + } + } +end diff --git a/lib/tmpdir.rb b/lib/tmpdir.rb new file mode 100644 index 0000000..8cecb8f --- /dev/null +++ b/lib/tmpdir.rb @@ -0,0 +1,138 @@ +# +# tmpdir - retrieve temporary directory path +# +# $Id: tmpdir.rb 23781 2009-06-21 09:14:14Z yugui $ +# + +require 'fileutils' + +class Dir + + @@systmpdir = '/tmp' + + begin + require 'Win32API' + CSIDL_LOCAL_APPDATA = 0x001c + max_pathlen = 260 + windir = "\0"*(max_pathlen+1) + begin + getdir = Win32API.new('shell32', 'SHGetFolderPath', 'LLLLP', 'L') + raise RuntimeError if getdir.call(0, CSIDL_LOCAL_APPDATA, 0, 0, windir) != 0 + windir = File.expand_path(windir.rstrip) + rescue RuntimeError + begin + getdir = Win32API.new('kernel32', 'GetSystemWindowsDirectory', 'PL', 'L') + rescue RuntimeError + getdir = Win32API.new('kernel32', 'GetWindowsDirectory', 'PL', 'L') + end + len = getdir.call(windir, windir.size) + windir = File.expand_path(windir[0, len]) + end + windir.force_encoding(Dir.pwd.encoding) + temp = File.join(windir.untaint, 'temp') + @@systmpdir = temp if File.directory?(temp) and File.writable?(temp) + rescue LoadError + end + + ## + # Returns the operating system's temporary file path. + + def Dir::tmpdir + tmp = '.' + if $SAFE > 0 + tmp = @@systmpdir + else + for dir in [ENV['TMPDIR'], ENV['TMP'], ENV['TEMP'], + ENV['USERPROFILE'], @@systmpdir, '/tmp'] + if dir and File.directory?(dir) and File.writable?(dir) + tmp = dir + break + end + end + File.expand_path(tmp) + end + end + + # Dir.mktmpdir creates a temporary directory. + # + # The directory is created with 0700 permission. + # + # The prefix and suffix of the name of the directory is specified by + # the optional first argument, prefix_suffix. + # - If it is not specified or nil, "d" is used as the prefix and no suffix is used. + # - If it is a string, it is used as the prefix and no suffix is used. + # - If it is an array, first element is used as the prefix and second element is used as a suffix. + # + # Dir.mktmpdir {|dir| dir is ".../d..." } + # Dir.mktmpdir("foo") {|dir| dir is ".../foo..." } + # Dir.mktmpdir(["foo", "bar"]) {|dir| dir is ".../foo...bar" } + # + # The directory is created under Dir.tmpdir or + # the optional second argument tmpdir if non-nil value is given. + # + # Dir.mktmpdir {|dir| dir is "#{Dir.tmpdir}/d..." } + # Dir.mktmpdir(nil, "/var/tmp") {|dir| dir is "/var/tmp/d..." } + # + # If a block is given, + # it is yielded with the path of the directory. + # The directory and its contents are removed + # using FileUtils.remove_entry_secure before Dir.mktmpdir returns. + # The value of the block is returned. + # + # Dir.mktmpdir {|dir| + # # use the directory... + # open("#{dir}/foo", "w") { ... } + # } + # + # If a block is not given, + # The path of the directory is returned. + # In this case, Dir.mktmpdir doesn't remove the directory. + # + # dir = Dir.mktmpdir + # begin + # # use the directory... + # open("#{dir}/foo", "w") { ... } + # ensure + # # remove the directory. + # FileUtils.remove_entry_secure dir + # end + # + def Dir.mktmpdir(prefix_suffix=nil, tmpdir=nil) + case prefix_suffix + when nil + prefix = "d" + suffix = "" + when String + prefix = prefix_suffix + suffix = "" + when Array + prefix = prefix_suffix[0] + suffix = prefix_suffix[1] + else + raise ArgumentError, "unexpected prefix_suffix: #{prefix_suffix.inspect}" + end + tmpdir ||= Dir.tmpdir + t = Time.now.strftime("%Y%m%d") + n = nil + begin + path = "#{tmpdir}/#{prefix}#{t}-#{$$}-#{rand(0x100000000).to_s(36)}" + path << "-#{n}" if n + path << suffix + Dir.mkdir(path, 0700) + rescue Errno::EEXIST + n ||= 0 + n += 1 + retry + end + + if block_given? + begin + yield path + ensure + FileUtils.remove_entry_secure path + end + else + path + end + end +end diff --git a/lib/tracer.rb b/lib/tracer.rb new file mode 100644 index 0000000..9d197a6 --- /dev/null +++ b/lib/tracer.rb @@ -0,0 +1,166 @@ +# +# tracer.rb - +# $Release Version: 0.2$ +# $Revision: 1.8 $ +# by Keiju ISHITSUKA(Nippon Rational Inc.) +# +# -- +# +# +# + +# +# tracer main class +# +class Tracer + @RCS_ID='-$Id: tracer.rb,v 1.8 1998/05/19 03:42:49 keiju Exp keiju $-' + + @stdout = STDOUT + @verbose = false + class << self + attr_accessor :verbose + alias verbose? verbose + attr_accessor :stdout + end + + EVENT_SYMBOL = { + "line" => "-", + "call" => ">", + "return" => "<", + "class" => "C", + "end" => "E", + "c-call" => ">", + "c-return" => "<", + } + + def initialize + @threads = Hash.new + if defined? Thread.main + @threads[Thread.main.object_id] = 0 + else + @threads[Thread.current.object_id] = 0 + end + + @get_line_procs = {} + + @filters = [] + end + + def stdout + Tracer.stdout + end + + def on + if block_given? + on + begin + yield + ensure + off + end + else + set_trace_func method(:trace_func).to_proc + stdout.print "Trace on\n" if Tracer.verbose? + end + end + + def off + set_trace_func nil + stdout.print "Trace off\n" if Tracer.verbose? + end + + def add_filter(p = proc) + @filters.push p + end + + def set_get_line_procs(file, p = proc) + @get_line_procs[file] = p + end + + def get_line(file, line) + if p = @get_line_procs[file] + return p.call(line) + end + + unless list = SCRIPT_LINES__[file] + begin + f = open(file) + begin + SCRIPT_LINES__[file] = list = f.readlines + ensure + f.close + end + rescue + SCRIPT_LINES__[file] = list = [] + end + end + + if l = list[line - 1] + l + else + "-\n" + end + end + + def get_thread_no + if no = @threads[Thread.current.object_id] + no + else + @threads[Thread.current.object_id] = @threads.size + end + end + + def trace_func(event, file, line, id, binding, klass, *) + return if file == __FILE__ + + for p in @filters + return unless p.call event, file, line, id, binding, klass + end + + # saved_crit = Thread.critical + # Thread.critical = true + stdout.printf("#%d:%s:%d:%s:%s: %s", + get_thread_no, + file, + line, + klass || '', + EVENT_SYMBOL[event], + line == 0 ? "?\n" : get_line(file, line)) + # Thread.critical = saved_crit + end + + Single = new + def Tracer.on + if block_given? + Single.on{yield} + else + Single.on + end + end + + def Tracer.off + Single.off + end + + def Tracer.set_get_line_procs(file_name, p = proc) + Single.set_get_line_procs(file_name, p) + end + + def Tracer.add_filter(p = proc) + Single.add_filter(p) + end + +end + +SCRIPT_LINES__ = {} unless defined? SCRIPT_LINES__ + +if $0 == __FILE__ + # direct call + + $0 = ARGV[0] + ARGV.shift + Tracer.on + require $0 +elsif caller.size == 1 + Tracer.on +end diff --git a/lib/tsort.rb b/lib/tsort.rb new file mode 100644 index 0000000..9fc4fea --- /dev/null +++ b/lib/tsort.rb @@ -0,0 +1,290 @@ +#!/usr/bin/env ruby +#-- +# tsort.rb - provides a module for topological sorting and strongly connected components. +#++ +# + +# +# TSort implements topological sorting using Tarjan's algorithm for +# strongly connected components. +# +# TSort is designed to be able to be used with any object which can be +# interpreted as a directed graph. +# +# TSort requires two methods to interpret an object as a graph, +# tsort_each_node and tsort_each_child. +# +# * tsort_each_node is used to iterate for all nodes over a graph. +# * tsort_each_child is used to iterate for child nodes of a given node. +# +# The equality of nodes are defined by eql? and hash since +# TSort uses Hash internally. +# +# == A Simple Example +# +# The following example demonstrates how to mix the TSort module into an +# existing class (in this case, Hash). Here, we're treating each key in +# the hash as a node in the graph, and so we simply alias the required +# #tsort_each_node method to Hash's #each_key method. For each key in the +# hash, the associated value is an array of the node's child nodes. This +# choice in turn leads to our implementation of the required #tsort_each_child +# method, which fetches the array of child nodes and then iterates over that +# array using the user-supplied block. +# +# require 'tsort' +# +# class Hash +# include TSort +# alias tsort_each_node each_key +# def tsort_each_child(node, &block) +# fetch(node).each(&block) +# end +# end +# +# {1=>[2, 3], 2=>[3], 3=>[], 4=>[]}.tsort +# #=> [3, 2, 1, 4] +# +# {1=>[2], 2=>[3, 4], 3=>[2], 4=>[]}.strongly_connected_components +# #=> [[4], [2, 3], [1]] +# +# == A More Realistic Example +# +# A very simple `make' like tool can be implemented as follows: +# +# require 'tsort' +# +# class Make +# def initialize +# @dep = {} +# @dep.default = [] +# end +# +# def rule(outputs, inputs=[], &block) +# triple = [outputs, inputs, block] +# outputs.each {|f| @dep[f] = [triple]} +# @dep[triple] = inputs +# end +# +# def build(target) +# each_strongly_connected_component_from(target) {|ns| +# if ns.length != 1 +# fs = ns.delete_if {|n| Array === n} +# raise TSort::Cyclic.new("cyclic dependencies: #{fs.join ', '}") +# end +# n = ns.first +# if Array === n +# outputs, inputs, block = n +# inputs_time = inputs.map {|f| File.mtime f}.max +# begin +# outputs_time = outputs.map {|f| File.mtime f}.min +# rescue Errno::ENOENT +# outputs_time = nil +# end +# if outputs_time == nil || +# inputs_time != nil && outputs_time <= inputs_time +# sleep 1 if inputs_time != nil && inputs_time.to_i == Time.now.to_i +# block.call +# end +# end +# } +# end +# +# def tsort_each_child(node, &block) +# @dep[node].each(&block) +# end +# include TSort +# end +# +# def command(arg) +# print arg, "\n" +# system arg +# end +# +# m = Make.new +# m.rule(%w[t1]) { command 'date > t1' } +# m.rule(%w[t2]) { command 'date > t2' } +# m.rule(%w[t3]) { command 'date > t3' } +# m.rule(%w[t4], %w[t1 t3]) { command 'cat t1 t3 > t4' } +# m.rule(%w[t5], %w[t4 t2]) { command 'cat t4 t2 > t5' } +# m.build('t5') +# +# == Bugs +# +# * 'tsort.rb' is wrong name because this library uses +# Tarjan's algorithm for strongly connected components. +# Although 'strongly_connected_components.rb' is correct but too long. +# +# == References +# +# R. E. Tarjan, "Depth First Search and Linear Graph Algorithms", +# SIAM Journal on Computing, Vol. 1, No. 2, pp. 146-160, June 1972. +# + +module TSort + class Cyclic < StandardError + end + + # + # Returns a topologically sorted array of nodes. + # The array is sorted from children to parents, i.e. + # the first element has no child and the last node has no parent. + # + # If there is a cycle, TSort::Cyclic is raised. + # + def tsort + result = [] + tsort_each {|element| result << element} + result + end + + # + # The iterator version of the #tsort method. + # obj.tsort_each is similar to obj.tsort.each, but + # modification of _obj_ during the iteration may lead to unexpected results. + # + # #tsort_each returns +nil+. + # If there is a cycle, TSort::Cyclic is raised. + # + def tsort_each # :yields: node + each_strongly_connected_component {|component| + if component.size == 1 + yield component.first + else + raise Cyclic.new("topological sort failed: #{component.inspect}") + end + } + end + + # + # Returns strongly connected components as an array of arrays of nodes. + # The array is sorted from children to parents. + # Each elements of the array represents a strongly connected component. + # + def strongly_connected_components + result = [] + each_strongly_connected_component {|component| result << component} + result + end + + # + # The iterator version of the #strongly_connected_components method. + # obj.each_strongly_connected_component is similar to + # obj.strongly_connected_components.each, but + # modification of _obj_ during the iteration may lead to unexpected results. + # + # + # #each_strongly_connected_component returns +nil+. + # + def each_strongly_connected_component # :yields: nodes + id_map = {} + stack = [] + tsort_each_node {|node| + unless id_map.include? node + each_strongly_connected_component_from(node, id_map, stack) {|c| + yield c + } + end + } + nil + end + + # + # Iterates over strongly connected component in the subgraph reachable from + # _node_. + # + # Return value is unspecified. + # + # #each_strongly_connected_component_from doesn't call #tsort_each_node. + # + def each_strongly_connected_component_from(node, id_map={}, stack=[]) # :yields: nodes + minimum_id = node_id = id_map[node] = id_map.size + stack_length = stack.length + stack << node + + tsort_each_child(node) {|child| + if id_map.include? child + child_id = id_map[child] + minimum_id = child_id if child_id && child_id < minimum_id + else + sub_minimum_id = + each_strongly_connected_component_from(child, id_map, stack) {|c| + yield c + } + minimum_id = sub_minimum_id if sub_minimum_id < minimum_id + end + } + + if node_id == minimum_id + component = stack.slice!(stack_length .. -1) + component.each {|n| id_map[n] = nil} + yield component + end + + minimum_id + end + + # + # Should be implemented by a extended class. + # + # #tsort_each_node is used to iterate for all nodes over a graph. + # + def tsort_each_node # :yields: node + raise NotImplementedError.new + end + + # + # Should be implemented by a extended class. + # + # #tsort_each_child is used to iterate for child nodes of _node_. + # + def tsort_each_child(node) # :yields: child + raise NotImplementedError.new + end +end + +if __FILE__ == $0 + require 'test/unit' + + class TSortHash < Hash # :nodoc: + include TSort + alias tsort_each_node each_key + def tsort_each_child(node, &block) + fetch(node).each(&block) + end + end + + class TSortArray < Array # :nodoc: + include TSort + alias tsort_each_node each_index + def tsort_each_child(node, &block) + fetch(node).each(&block) + end + end + + class TSortTest < Test::Unit::TestCase # :nodoc: + def test_dag + h = TSortHash[{1=>[2, 3], 2=>[3], 3=>[]}] + assert_equal([3, 2, 1], h.tsort) + assert_equal([[3], [2], [1]], h.strongly_connected_components) + end + + def test_cycle + h = TSortHash[{1=>[2], 2=>[3, 4], 3=>[2], 4=>[]}] + assert_equal([[4], [2, 3], [1]], + h.strongly_connected_components.map {|nodes| nodes.sort}) + assert_raise(TSort::Cyclic) { h.tsort } + end + + def test_array + a = TSortArray[[1], [0], [0], [2]] + assert_equal([[0, 1], [2], [3]], + a.strongly_connected_components.map {|nodes| nodes.sort}) + + a = TSortArray[[], [0]] + assert_equal([[0], [1]], + a.strongly_connected_components.map {|nodes| nodes.sort}) + end + end + +end + diff --git a/lib/ubygems.rb b/lib/ubygems.rb new file mode 100644 index 0000000..fec880f --- /dev/null +++ b/lib/ubygems.rb @@ -0,0 +1,10 @@ +# This file allows for the running of rubygems with a nice +# command line look-and-feel: ruby -rubygems foo.rb +#-- +# Copyright 2006 by Chad Fowler, Rich Kilmer, Jim Weirich and others. +# All rights reserved. +# See LICENSE.txt for permissions. +#++ + + +require 'rubygems' diff --git a/lib/un.rb b/lib/un.rb new file mode 100644 index 0000000..42afeac --- /dev/null +++ b/lib/un.rb @@ -0,0 +1,304 @@ +# +# = un.rb +# +# Copyright (c) 2003 WATANABE Hirofumi +# +# This program is free software. +# You can distribute/modify this program under the same terms of Ruby. +# +# == Utilities to replace common UNIX commands in Makefiles etc +# +# == SYNOPSIS +# +# ruby -run -e cp -- [OPTION] SOURCE DEST +# ruby -run -e ln -- [OPTION] TARGET LINK_NAME +# ruby -run -e mv -- [OPTION] SOURCE DEST +# ruby -run -e rm -- [OPTION] FILE +# ruby -run -e mkdir -- [OPTION] DIRS +# ruby -run -e rmdir -- [OPTION] DIRS +# ruby -run -e install -- [OPTION] SOURCE DEST +# ruby -run -e chmod -- [OPTION] OCTAL-MODE FILE +# ruby -run -e touch -- [OPTION] FILE +# ruby -run -e wait_writable -- [OPTION] FILE +# ruby -run -e mkmf -- [OPTION] EXTNAME [OPTION] +# ruby -run -e help [COMMAND] + +require "fileutils" +require "optparse" + +module FileUtils +# @fileutils_label = "" + @fileutils_output = $stdout +end + +def setup(options = "", *long_options) + opt_hash = {} + argv = [] + OptionParser.new do |o| + options.scan(/.:?/) do |s| + opt_name = s.delete(":").intern + o.on("-" + s.tr(":", " ")) do |val| + opt_hash[opt_name] = val + end + end + long_options.each do |s| + opt_name = s[/\A(?:--)?([^\s=]+)/, 1].intern + o.on(s.sub(/\A(?!--)/, '--')) do |val| + opt_hash[opt_name] = val + end + end + o.on("-v") do opt_hash[:verbose] = true end + o.order!(ARGV) do |x| + if /[*?\[{]/ =~ x + argv.concat(Dir[x]) + else + argv << x + end + end + end + yield argv, opt_hash +end + +## +# Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY +# +# ruby -run -e cp -- [OPTION] SOURCE DEST +# +# -p preserve file attributes if possible +# -r copy recursively +# -v verbose +# + +def cp + setup("pr") do |argv, options| + cmd = "cp" + cmd += "_r" if options.delete :r + options[:preserve] = true if options.delete :p + dest = argv.pop + argv = argv[0] if argv.size == 1 + FileUtils.send cmd, argv, dest, options + end +end + +## +# Create a link to the specified TARGET with LINK_NAME. +# +# ruby -run -e ln -- [OPTION] TARGET LINK_NAME +# +# -s make symbolic links instead of hard links +# -f remove existing destination files +# -v verbose +# + +def ln + setup("sf") do |argv, options| + cmd = "ln" + cmd += "_s" if options.delete :s + options[:force] = true if options.delete :f + dest = argv.pop + argv = argv[0] if argv.size == 1 + FileUtils.send cmd, argv, dest, options + end +end + +## +# Rename SOURCE to DEST, or move SOURCE(s) to DIRECTORY. +# +# ruby -run -e mv -- [OPTION] SOURCE DEST +# +# -v verbose +# + +def mv + setup do |argv, options| + dest = argv.pop + argv = argv[0] if argv.size == 1 + FileUtils.mv argv, dest, options + end +end + +## +# Remove the FILE +# +# ruby -run -e rm -- [OPTION] FILE +# +# -f ignore nonexistent files +# -r remove the contents of directories recursively +# -v verbose +# + +def rm + setup("fr") do |argv, options| + cmd = "rm" + cmd += "_r" if options.delete :r + options[:force] = true if options.delete :f + FileUtils.send cmd, argv, options + end +end + +## +# Create the DIR, if they do not already exist. +# +# ruby -run -e mkdir -- [OPTION] DIR +# +# -p no error if existing, make parent directories as needed +# -v verbose +# + +def mkdir + setup("p") do |argv, options| + cmd = "mkdir" + cmd += "_p" if options.delete :p + FileUtils.send cmd, argv, options + end +end + +## +# Remove the DIR. +# +# ruby -run -e rmdir -- [OPTION] DIR +# +# -p remove DIRECTORY and its ancestors. +# -v verbose +# + +def rmdir + setup("p") do |argv, options| + options[:parents] = true if options.delete :p + FileUtils.rmdir argv, options + end +end + +## +# Copy SOURCE to DEST. +# +# ruby -run -e install -- [OPTION] SOURCE DEST +# +# -p apply access/modification times of SOURCE files to +# corresponding destination files +# -m set permission mode (as in chmod), instead of 0755 +# -v verbose +# + +def install + setup("pm:") do |argv, options| + options[:mode] = (mode = options.delete :m) ? mode.oct : 0755 + options[:preserve] = true if options.delete :p + dest = argv.pop + argv = argv[0] if argv.size == 1 + FileUtils.install argv, dest, options + end +end + +## +# Change the mode of each FILE to OCTAL-MODE. +# +# ruby -run -e chmod -- [OPTION] OCTAL-MODE FILE +# +# -v verbose +# + +def chmod + setup do |argv, options| + mode = argv.shift.oct + FileUtils.chmod mode, argv, options + end +end + +## +# Update the access and modification times of each FILE to the current time. +# +# ruby -run -e touch -- [OPTION] FILE +# +# -v verbose +# + +def touch + setup do |argv, options| + FileUtils.touch argv, options + end +end + +## +# Wait until the file becomes writable. +# +# ruby -run -e wait_writable -- [OPTION] FILE +# +# -n RETRY count to retry +# -w SEC each wait time in seconds +# -v verbose +# + +def wait_writable + setup("n:w:v") do |argv, options| + verbose = options[:verbose] + n = options[:n] and n = Integer(n) + wait = (wait = options[:w]) ? Float(wait) : 0.2 + argv.each do |file| + begin + open(file, "r+b") + rescue Errno::ENOENT + break + rescue Errno::EACCES => e + raise if n and (n -= 1) <= 0 + puts e + STDOUT.flush + sleep wait + retry + end + end + end +end + +## +# Create makefile using mkmf. +# +# ruby -run -e mkmf -- [OPTION] EXTNAME [OPTION] +# +# -d ARGS run dir_config +# -h ARGS run have_header +# -l ARGS run have_library +# -f ARGS run have_func +# -v ARGS run have_var +# -t ARGS run have_type +# -m ARGS run have_macro +# -c ARGS run have_const +# --vendor install to vendor_ruby +# + +def mkmf + setup("d:h:l:f:v:t:m:c:", "vendor") do |argv, options| + require 'mkmf' + opt = options[:d] and opt.split(/:/).each {|n| dir_config(*n.split(/,/))} + opt = options[:h] and opt.split(/:/).each {|n| have_header(*n.split(/,/))} + opt = options[:l] and opt.split(/:/).each {|n| have_library(*n.split(/,/))} + opt = options[:f] and opt.split(/:/).each {|n| have_func(*n.split(/,/))} + opt = options[:v] and opt.split(/:/).each {|n| have_var(*n.split(/,/))} + opt = options[:t] and opt.split(/:/).each {|n| have_type(*n.split(/,/))} + opt = options[:m] and opt.split(/:/).each {|n| have_macro(*n.split(/,/))} + opt = options[:c] and opt.split(/:/).each {|n| have_const(*n.split(/,/))} + $configure_args["--vendor"] = true if options[:vendor] + create_makefile(*argv) + end +end + +## +# Display help message. +# +# ruby -run -e help [COMMAND] +# + +def help + setup do |argv,| + all = argv.empty? + open(__FILE__) do |me| + while me.gets("##\n") + if help = me.gets("\n\n") + if all or argv.delete help[/-e \w+/].sub(/-e /, "") + print help.gsub(/^# ?/, "") + end + end + end + end + end +end diff --git a/lib/uri.rb b/lib/uri.rb new file mode 100644 index 0000000..e354612 --- /dev/null +++ b/lib/uri.rb @@ -0,0 +1,29 @@ +# +# URI support for Ruby +# +# Author:: Akira Yamada +# Documentation:: Akira Yamada , Dmitry V. Sabanin +# License:: +# Copyright (c) 2001 akira yamada +# You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: uri.rb 13772 2007-10-25 00:53:34Z akira $ +# +# See URI for documentation +# + +module URI + # :stopdoc: + VERSION_CODE = '000911'.freeze + VERSION = VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze + # :startdoc: + +end + +require 'uri/common' +require 'uri/generic' +require 'uri/ftp' +require 'uri/http' +require 'uri/https' +require 'uri/ldap' +require 'uri/ldaps' +require 'uri/mailto' diff --git a/lib/uri/common.rb b/lib/uri/common.rb new file mode 100644 index 0000000..5853e9c --- /dev/null +++ b/lib/uri/common.rb @@ -0,0 +1,727 @@ +# = uri/common.rb +# +# Author:: Akira Yamada +# Revision:: $Id: common.rb 22760 2009-03-04 09:21:12Z yugui $ +# License:: +# You can redistribute it and/or modify it under the same term as Ruby. +# + +module URI + module REGEXP + # + # Patterns used to parse URI's + # + module PATTERN + # :stopdoc: + + # RFC 2396 (URI Generic Syntax) + # RFC 2732 (IPv6 Literal Addresses in URL's) + # RFC 2373 (IPv6 Addressing Architecture) + + # alpha = lowalpha | upalpha + ALPHA = "a-zA-Z" + # alphanum = alpha | digit + ALNUM = "#{ALPHA}\\d" + + # hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | + # "a" | "b" | "c" | "d" | "e" | "f" + HEX = "a-fA-F\\d" + # escaped = "%" hex hex + ESCAPED = "%[#{HEX}]{2}" + # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | + # "(" | ")" + # unreserved = alphanum | mark + UNRESERVED = "-_.!~*'()#{ALNUM}" + # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + # "$" | "," + # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + # "$" | "," | "[" | "]" (RFC 2732) + RESERVED = ";/?:@&=+$,\\[\\]" + + # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)" + # toplabel = alpha | alpha *( alphanum | "-" ) alphanum + TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)" + # hostname = *( domainlabel "." ) toplabel [ "." ] + HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?" + + # :startdoc: + end # PATTERN + + # :startdoc: + end # REGEXP + + class Parser + include REGEXP + + # + # == Synopsis + # + # URI::Parser.new([opts]) + # + # == Args + # + # The constructor accepts a hash as options for parser. + # Keys of options are pattern names of URI components + # and values of options are pattern strings. + # The constructor generetes set of regexps for parsing URIs. + # + # You can use the following keys: + # + # * :ESCAPED (URI::PATTERN::ESCAPED in default) + # * :UNRESERVED (URI::PATTERN::UNRESERVED in default) + # * :DOMLABEL (URI::PATTERN::DOMLABEL in default) + # * :TOPLABEL (URI::PATTERN::TOPLABEL in default) + # * :HOSTNAME (URI::PATTERN::HOSTNAME in default) + # + # == Examples + # + # p = URI::Parser.new(:ESCPAED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})" + # u = p.parse("http://example.jp/%uABCD") #=> # + # URI.parse(u.to_s) #=> raises URI::InvalidURIError + # + # s = "http://examle.com/ABCD" + # u1 = p.parse(s) #=> # + # u2 = URI.parse(s) #=> # + # u1 == u2 #=> true + # u1.eql?(u2) #=> false + # + def initialize(opts = {}) + @pattern = initialize_pattern(opts) + @pattern.each_value {|v| v.freeze} + @pattern.freeze + + @regexp = initialize_regexp(@pattern) + @regexp.each_value {|v| v.freeze} + @regexp.freeze + end + attr_reader :pattern, :regexp + + def split(uri) + case uri + when '' + # null uri + + when @regexp[:ABS_URI] + scheme, opaque, userinfo, host, port, + registry, path, query, fragment = $~[1..-1] + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + # opaque_part = uric_no_slash *uric + + # abs_path = "/" path_segments + # net_path = "//" authority [ abs_path ] + + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + + if !scheme + raise InvalidURIError, + "bad URI(absolute but no scheme): #{uri}" + end + if !opaque && (!path && (!host && !registry)) + raise InvalidURIError, + "bad URI(absolute but no path): #{uri}" + end + + when @regexp[:REL_URI] + scheme = nil + opaque = nil + + userinfo, host, port, registry, + rel_segment, abs_path, query, fragment = $~[1..-1] + if rel_segment && abs_path + path = rel_segment + abs_path + elsif rel_segment + path = rel_segment + elsif abs_path + path = abs_path + end + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + + # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + + # net_path = "//" authority [ abs_path ] + # abs_path = "/" path_segments + # rel_path = rel_segment [ abs_path ] + + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + + else + raise InvalidURIError, "bad URI(is not URI?): #{uri}" + end + + path = '' if !path && !opaque # (see RFC2396 Section 5.2) + ret = [ + scheme, + userinfo, host, port, # X + registry, # X + path, # Y + opaque, # Y + query, + fragment + ] + return ret + end + + def parse(uri) + scheme, userinfo, host, port, + registry, path, opaque, query, fragment = self.split(uri) + + if scheme && URI.scheme_list.include?(scheme.upcase) + URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) + else + Generic.new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) + end + end + + def join(*str) + u = self.parse(str[0]) + str[1 .. -1].each do |x| + u = u.merge(x) + end + u + end + + def extract(str, schemes = nil, &block) + if block_given? + str.scan(make_regexp(schemes)) { yield $& } + nil + else + result = [] + str.scan(make_regexp(schemes)) { result.push $& } + result + end + end + + def make_regexp(schemes = nil) + unless schemes + @regexp[:ABS_URI_REF] + else + /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x + end + end + + def escape(str, unsafe = @regexp[:UNSAFE]) + unless unsafe.kind_of?(Regexp) + # perhaps unsafe is String object + unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false) + end + str.gsub(unsafe) do + us = $& + tmp = '' + us.each_byte do |uc| + tmp << sprintf('%%%02X', uc) + end + tmp + end.force_encoding(Encoding::US_ASCII) + end + + def unescape(str, escaped = @regexp[:ESCAPED]) + str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding) + end + + @@to_s = Kernel.instance_method(:to_s) + def inspect + @@to_s.bind(self).call + end + + private + + def initialize_pattern(opts = {}) + ret = {} + ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED) + ret[:UNRESERVED] = unreserved = opts.delete(:UNRESERVED) || PATTERN::UNRESERVED + ret[:RESERVED] = reserved = opts.delete(:RESERVED) || PATTERN::RESERVED + ret[:DOMLABEL] = domlabel = opts.delete(:DOMLABEL) || PATTERN::DOMLABEL + ret[:TOPLABEL] = toplabel = opts.delete(:TOPLABEL) || PATTERN::TOPLABEL + ret[:HOSTNAME] = hostname = opts.delete(:HOSTNAME) + + # RFC 2396 (URI Generic Syntax) + # RFC 2732 (IPv6 Literal Addresses in URL's) + # RFC 2373 (IPv6 Addressing Architecture) + + # uric = reserved | unreserved | escaped + ret[:URIC] = uric = "(?:[#{unreserved}#{reserved}]|#{escaped})" + # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | + # "&" | "=" | "+" | "$" | "," + ret[:URIC_NO_SLASH] = uric_no_slash = "(?:[#{unreserved};?:@&=+$,]|#{escaped})" + # query = *uric + ret[:QUERY] = query = "#{uric}*" + # fragment = *uric + ret[:FRAGMENT] = fragment = "#{uric}*" + + # hostname = *( domainlabel "." ) toplabel [ "." ] + unless hostname + ret[:HOSTNAME] = hostname = "(?:#{domlabel}\\.)*#{toplabel}\\.?" + end + + # RFC 2373, APPENDIX B: + # IPv6address = hexpart [ ":" IPv4address ] + # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + # hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ] + # hexseq = hex4 *( ":" hex4) + # hex4 = 1*4HEXDIG + # + # XXX: This definition has a flaw. "::" + IPv4address must be + # allowed too. Here is a replacement. + # + # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}" + # hex4 = 1*4HEXDIG + hex4 = "[#{PATTERN::HEX}]{1,4}" + # lastpart = hex4 | IPv4address + lastpart = "(?:#{hex4}|#{ipv4addr})" + # hexseq1 = *( hex4 ":" ) hex4 + hexseq1 = "(?:#{hex4}:)*#{hex4}" + # hexseq2 = *( hex4 ":" ) lastpart + hexseq2 = "(?:#{hex4}:)*#{lastpart}" + # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ] + ret[:IPV6ADDR] = ipv6addr = "(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)" + + # IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT + # unused + + # ipv6reference = "[" IPv6address "]" (RFC 2732) + ret[:IPV6REF] = ipv6ref = "\\[#{ipv6addr}\\]" + + # host = hostname | IPv4address + # host = hostname | IPv4address | IPv6reference (RFC 2732) + ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})" + # port = *digit + port = '\d*' + # hostport = host [ ":" port ] + ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?" + + # userinfo = *( unreserved | escaped | + # ";" | ":" | "&" | "=" | "+" | "$" | "," ) + ret[:USERINFO] = userinfo = "(?:[#{unreserved};:&=+$,]|#{escaped})*" + + # pchar = unreserved | escaped | + # ":" | "@" | "&" | "=" | "+" | "$" | "," + pchar = "(?:[#{unreserved}:@&=+$,]|#{escaped})" + # param = *pchar + param = "#{pchar}*" + # segment = *pchar *( ";" param ) + segment = "#{pchar}*(?:;#{param})*" + # path_segments = segment *( "/" segment ) + ret[:PATH_SEGMENTS] = path_segments = "#{segment}(?:/#{segment})*" + + # server = [ [ userinfo "@" ] hostport ] + server = "(?:#{userinfo}@)?#{hostport}" + # reg_name = 1*( unreserved | escaped | "$" | "," | + # ";" | ":" | "@" | "&" | "=" | "+" ) + ret[:REG_NAME] = reg_name = "(?:[#{unreserved}$,;:@&=+]|#{escaped})+" + # authority = server | reg_name + authority = "(?:#{server}|#{reg_name})" + + # rel_segment = 1*( unreserved | escaped | + # ";" | "@" | "&" | "=" | "+" | "$" | "," ) + ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+" + + # scheme = alpha *( alpha | digit | "+" | "-" | "." ) + ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][-+.#{PATTERN::ALPHA}\\d]*" + + # abs_path = "/" path_segments + ret[:ABS_PATH] = abs_path = "/#{path_segments}" + # rel_path = rel_segment [ abs_path ] + ret[:REL_PATH] = rel_path = "#{rel_segment}(?:#{abs_path})?" + # net_path = "//" authority [ abs_path ] + ret[:NET_PATH] = net_path = "//#{authority}(?:#{abs_path})?" + + # hier_part = ( net_path | abs_path ) [ "?" query ] + ret[:HIER_PART] = hier_part = "(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?" + # opaque_part = uric_no_slash *uric + ret[:OPAQUE_PART] = opaque_part = "#{uric_no_slash}#{uric}*" + + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + ret[:ABS_URI] = abs_uri = "#{scheme}:(?:#{hier_part}|#{opaque_part})" + # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + ret[:REL_URI] = rel_uri = "(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?" + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + ret[:URI_REF] = uri_ref = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?" + + ret[:X_ABS_URI] = " + (#{scheme}): (?# 1: scheme) + (?: + (#{opaque_part}) (?# 2: opaque) + | + (?:(?: + //(?: + (?:(?:(#{userinfo})@)? (?# 3: userinfo) + (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port) + | + (#{reg_name}) (?# 6: registry) + ) + | + (?!//)) (?# XXX: '//' is the mark for hostport) + (#{abs_path})? (?# 7: path) + )(?:\\?(#{query}))? (?# 8: query) + ) + (?:\\#(#{fragment}))? (?# 9: fragment) + " + + ret[:X_REL_URI] = " + (?: + (?: + // + (?: + (?:(#{userinfo})@)? (?# 1: userinfo) + (#{host})?(?::(\\d*))? (?# 2: host, 3: port) + | + (#{reg_name}) (?# 4: registry) + ) + ) + | + (#{rel_segment}) (?# 5: rel_segment) + )? + (#{abs_path})? (?# 6: abs_path) + (?:\\?(#{query}))? (?# 7: query) + (?:\\#(#{fragment}))? (?# 8: fragment) + " + + ret + end + + def initialize_regexp(pattern) + ret = {} + + # for URI::split + ret[:ABS_URI] = Regexp.new('^' + pattern[:X_ABS_URI] + '$', Regexp::EXTENDED) + ret[:REL_URI] = Regexp.new('^' + pattern[:X_REL_URI] + '$', Regexp::EXTENDED) + + # for URI::extract + ret[:URI_REF] = Regexp.new(pattern[:URI_REF]) + ret[:ABS_URI_REF] = Regexp.new(pattern[:X_ABS_URI], Regexp::EXTENDED) + ret[:REL_URI_REF] = Regexp.new(pattern[:X_REL_URI], Regexp::EXTENDED) + + # for URI::escape/unescape + ret[:ESCAPED] = Regexp.new(pattern[:ESCAPED]) + ret[:UNSAFE] = Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]") + + # for Generic#initialize + ret[:SCHEME] = Regexp.new("^#{pattern[:SCHEME]}$") + ret[:USERINFO] = Regexp.new("^#{pattern[:USERINFO]}$") + ret[:HOST] = Regexp.new("^#{pattern[:HOST]}$") + ret[:PORT] = Regexp.new("^#{pattern[:PORT]}$") + ret[:OPAQUE] = Regexp.new("^#{pattern[:OPAQUE_PART]}$") + ret[:REGISTRY] = Regexp.new("^#{pattern[:REG_NAME]}$") + ret[:ABS_PATH] = Regexp.new("^#{pattern[:ABS_PATH]}$") + ret[:REL_PATH] = Regexp.new("^#{pattern[:REL_PATH]}$") + ret[:QUERY] = Regexp.new("^#{pattern[:QUERY]}$") + ret[:FRAGMENT] = Regexp.new("^#{pattern[:FRAGMENT]}$") + + ret + end + end # class Parser + + DEFAULT_PARSER = Parser.new + DEFAULT_PARSER.pattern.each_pair do |sym, str| + unless REGEXP::PATTERN.const_defined?(sym) + REGEXP::PATTERN.const_set(sym, str) + end + end + DEFAULT_PARSER.regexp.each_pair do |sym, str| + const_set(sym, str) + end + + module Util # :nodoc: + def make_components_hash(klass, array_hash) + tmp = {} + if array_hash.kind_of?(Array) && + array_hash.size == klass.component.size - 1 + klass.component[1..-1].each_index do |i| + begin + tmp[klass.component[i + 1]] = array_hash[i].clone + rescue TypeError + tmp[klass.component[i + 1]] = array_hash[i] + end + end + + elsif array_hash.kind_of?(Hash) + array_hash.each do |key, value| + begin + tmp[key] = value.clone + rescue TypeError + tmp[key] = value + end + end + else + raise ArgumentError, + "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})" + end + tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase + + return tmp + end + module_function :make_components_hash + end + + module Escape + # + # == Synopsis + # + # URI.escape(str [, unsafe]) + # + # == Args + # + # +str+:: + # String to replaces in. + # +unsafe+:: + # Regexp that matches all symbols that must be replaced with codes. + # By default uses REGEXP::UNSAFE. + # When this argument is a String, it represents a character set. + # + # == Description + # + # Escapes the string, replacing all unsafe characters with codes. + # + # == Usage + # + # require 'uri' + # + # enc_uri = URI.escape("http://example.com/?a=\11\15") + # p enc_uri + # # => "http://example.com/?a=%09%0D" + # + # p URI.unescape(enc_uri) + # # => "http://example.com/?a=\t\r" + # + # p URI.escape("@?@!", "!?") + # # => "@%3F@%21" + # + def escape(*arg) + DEFAULT_PARSER.escape(*arg) + end + alias encode escape + # + # == Synopsis + # + # URI.unescape(str) + # + # == Args + # + # +str+:: + # Unescapes the string. + # + # == Usage + # + # require 'uri' + # + # enc_uri = URI.escape("http://example.com/?a=\11\15") + # p enc_uri + # # => "http://example.com/?a=%09%0D" + # + # p URI.unescape(enc_uri) + # # => "http://example.com/?a=\t\r" + # + def unescape(*arg) + DEFAULT_PARSER.unescape(*arg) + end + alias decode unescape + end + + extend Escape + include REGEXP + + @@schemes = {} + def self.scheme_list + @@schemes + end + + # + # Base class for all URI exceptions. + # + class Error < StandardError; end + # + # Not a URI. + # + class InvalidURIError < Error; end + # + # Not a URI component. + # + class InvalidComponentError < Error; end + # + # URI is valid, bad usage is not. + # + class BadURIError < Error; end + + # + # == Synopsis + # + # URI::split(uri) + # + # == Args + # + # +uri+:: + # String with URI. + # + # == Description + # + # Splits the string on following parts and returns array with result: + # + # * Scheme + # * Userinfo + # * Host + # * Port + # * Registry + # * Path + # * Opaque + # * Query + # * Fragment + # + # == Usage + # + # require 'uri' + # + # p URI.split("http://www.ruby-lang.org/") + # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil] + # + def self.split(uri) + DEFAULT_PARSER.split(uri) + end + + # + # == Synopsis + # + # URI::parse(uri_str) + # + # == Args + # + # +uri_str+:: + # String with URI. + # + # == Description + # + # Creates one of the URI's subclasses instance from the string. + # + # == Raises + # + # URI::InvalidURIError + # Raised if URI given is not a correct one. + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse("http://www.ruby-lang.org/") + # p uri + # # => # + # p uri.scheme + # # => "http" + # p uri.host + # # => "www.ruby-lang.org" + # + def self.parse(uri) + DEFAULT_PARSER.parse(uri) + end + + # + # == Synopsis + # + # URI::join(str[, str, ...]) + # + # == Args + # + # +str+:: + # String(s) to work with + # + # == Description + # + # Joins URIs. + # + # == Usage + # + # require 'uri' + # + # p URI.join("http://localhost/","main.rbx") + # # => # + # + def self.join(*str) + DEFAULT_PARSER.join(*str) + end + + # + # == Synopsis + # + # URI::extract(str[, schemes][,&blk]) + # + # == Args + # + # +str+:: + # String to extract URIs from. + # +schemes+:: + # Limit URI matching to a specific schemes. + # + # == Description + # + # Extracts URIs from a string. If block given, iterates through all matched URIs. + # Returns nil if block given or array with matches. + # + # == Usage + # + # require "uri" + # + # URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.") + # # => ["http://foo.example.com/bla", "mailto:test@example.com"] + # + def self.extract(str, schemes = nil, &block) + DEFAULT_PARSER.extract(str, schemes, &block) + end + + # + # == Synopsis + # + # URI::regexp([match_schemes]) + # + # == Args + # + # +match_schemes+:: + # Array of schemes. If given, resulting regexp matches to URIs + # whose scheme is one of the match_schemes. + # + # == Description + # Returns a Regexp object which matches to URI-like strings. + # The Regexp object returned by this method includes arbitrary + # number of capture group (parentheses). Never rely on it's number. + # + # == Usage + # + # require 'uri' + # + # # extract first URI from html_string + # html_string.slice(URI.regexp) + # + # # remove ftp URIs + # html_string.sub(URI.regexp(['ftp']) + # + # # You should not rely on the number of parentheses + # html_string.scan(URI.regexp) do |*matches| + # p $& + # end + # + def self.regexp(schemes = nil) + DEFAULT_PARSER.make_regexp(schemes) + end + +end + +module Kernel + # alias for URI.parse. + # + # This method is introduced at 1.8.2. + def URI(uri_str) # :doc: + URI.parse(uri_str) + end + module_function :URI +end diff --git a/lib/uri/ftp.rb b/lib/uri/ftp.rb new file mode 100644 index 0000000..6428634 --- /dev/null +++ b/lib/uri/ftp.rb @@ -0,0 +1,198 @@ +# +# = uri/ftp.rb +# +# Author:: Akira Yamada +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: ftp.rb 11708 2007-02-12 23:01:19Z shyouhei $ +# + +require 'uri/generic' + +module URI + + # + # FTP URI syntax is defined by RFC1738 section 3.2. + # + class FTP < Generic + DEFAULT_PORT = 21 + + COMPONENT = [ + :scheme, + :userinfo, :host, :port, + :path, :typecode + ].freeze + # + # Typecode is "a", "i" or "d". + # + # * "a" indicates a text file (the FTP command was ASCII) + # * "i" indicates a binary file (FTP command IMAGE) + # * "d" indicates the contents of a directory should be displayed + # + TYPECODE = ['a', 'i', 'd'].freeze + TYPECODE_PREFIX = ';type='.freeze + + def self.new2(user, password, host, port, path, + typecode = nil, arg_check = true) + typecode = nil if typecode.size == 0 + if typecode && !TYPECODE.include?(typecode) + raise ArgumentError, + "bad typecode is specified: #{typecode}" + end + + # do escape + + self.new('ftp', + [user, password], + host, port, nil, + typecode ? path + TYPECODE_PREFIX + typecode : path, + nil, nil, nil, arg_check) + end + + # + # == Description + # + # Creates a new URI::FTP object from components, with syntax checking. + # + # The components accepted are +userinfo+, +host+, +port+, +path+ and + # +typecode+. + # + # The components should be provided either as an Array, or as a Hash + # with keys formed by preceding the component names with a colon. + # + # If an Array is used, the components must be passed in the order + # [userinfo, host, port, path, typecode] + # + # If the path supplied is absolute, it will be escaped in order to + # make it absolute in the URI. Examples: + # + # require 'uri' + # + # uri = URI::FTP.build(['user:password', 'ftp.example.com', nil, + # '/path/file.> zip', 'i']) + # puts uri.to_s -> ftp://user:password@ftp.example.com/%2Fpath/file.zip;type=a + # + # uri2 = URI::FTP.build({:host => 'ftp.example.com', + # :path => 'ruby/src'}) + # puts uri2.to_s -> ftp://ftp.example.com/ruby/src + # + def self.build(args) + + # Fix the incoming path to be generic URL syntax + # FTP path -> URL path + # foo/bar /foo/bar + # /foo/bar /%2Ffoo/bar + # + if args.kind_of?(Array) + args[3] = '/' + args[3].sub(/^\//, '%2F') + else + args[:path] = '/' + args[:path].sub(/^\//, '%2F') + end + + tmp = Util::make_components_hash(self, args) + + if tmp[:typecode] + if tmp[:typecode].size == 1 + tmp[:typecode] = TYPECODE_PREFIX + tmp[:typecode] + end + tmp[:path] << tmp[:typecode] + end + + return super(tmp) + end + + # + # == Description + # + # Creates a new URI::FTP object from generic URL components with no + # syntax checking. + # + # Unlike build(), this method does not escape the path component as + # required by RFC1738; instead it is treated as per RFC2396. + # + # Arguments are +scheme+, +userinfo+, +host+, +port+, +registry+, +path+, + # +opaque+, +query+ and +fragment+, in that order. + # + def initialize(*arg) + super(*arg) + @typecode = nil + tmp = @path.index(TYPECODE_PREFIX) + if tmp + typecode = @path[tmp + TYPECODE_PREFIX.size..-1] + self.set_path(@path[0..tmp - 1]) + + if arg[-1] + self.typecode = typecode + else + self.set_typecode(typecode) + end + end + end + attr_reader :typecode + + def check_typecode(v) + if TYPECODE.include?(v) + return true + else + raise InvalidComponentError, + "bad typecode(expected #{TYPECODE.join(', ')}): #{v}" + end + end + private :check_typecode + + def set_typecode(v) + @typecode = v + end + protected :set_typecode + + def typecode=(typecode) + check_typecode(typecode) + set_typecode(typecode) + typecode + end + + def merge(oth) # :nodoc: + tmp = super(oth) + if self != tmp + tmp.set_typecode(oth.typecode) + end + + return tmp + end + + # Returns the path from an FTP URI. + # + # RFC 1738 specifically states that the path for an FTP URI does not + # include the / which separates the URI path from the URI host. Example: + # + # ftp://ftp.example.com/pub/ruby + # + # The above URI indicates that the client should connect to + # ftp.example.com then cd pub/ruby from the initial login directory. + # + # If you want to cd to an absolute directory, you must include an + # escaped / (%2F) in the path. Example: + # + # ftp://ftp.example.com/%2Fpub/ruby + # + # This method will then return "/pub/ruby" + # + def path + return @path.sub(/^\//,'').sub(/^%2F/,'/') + end + + def to_s + save_path = nil + if @typecode + save_path = @path + @path = @path + TYPECODE_PREFIX + @typecode + end + str = super + if @typecode + @path = save_path + end + + return str + end + end + @@schemes['FTP'] = FTP +end diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb new file mode 100644 index 0000000..004a23c --- /dev/null +++ b/lib/uri/generic.rb @@ -0,0 +1,1128 @@ +# +# = uri/generic.rb +# +# Author:: Akira Yamada +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: generic.rb 20258 2008-11-18 16:46:16Z yugui $ +# + +require 'uri/common' + +module URI + + # + # Base class for all URI classes. + # Implements generic URI syntax as per RFC 2396. + # + class Generic + include URI + + DEFAULT_PORT = nil + + # + # Returns default port + # + def self.default_port + self::DEFAULT_PORT + end + + def default_port + self.class.default_port + end + + COMPONENT = [ + :scheme, + :userinfo, :host, :port, :registry, + :path, :opaque, + :query, + :fragment + ].freeze + + # + # Components of the URI in the order. + # + def self.component + self::COMPONENT + end + + USE_REGISTRY = false + + # + # DOC: FIXME! + # + def self.use_registry + self::USE_REGISTRY + end + + # + # == Synopsis + # + # See #new + # + # == Description + # + # At first, tries to create a new URI::Generic instance using + # URI::Generic::build. But, if exception URI::InvalidComponentError is raised, + # then it URI::Escape.escape all URI components and tries again. + # + # + def self.build2(args) + begin + return self.build(args) + rescue InvalidComponentError + if args.kind_of?(Array) + return self.build(args.collect{|x| + if x + @parser.escape(x) + else + x + end + }) + elsif args.kind_of?(Hash) + tmp = {} + args.each do |key, value| + tmp[key] = if value + @parser.escape(value) + else + value + end + end + return self.build(tmp) + end + end + end + + # + # == Synopsis + # + # See #new + # + # == Description + # + # Creates a new URI::Generic instance from components of URI::Generic + # with check. Components are: scheme, userinfo, host, port, registry, path, + # opaque, query and fragment. You can provide arguments either by an Array or a Hash. + # See #new for hash keys to use or for order of array items. + # + def self.build(args) + if args.kind_of?(Array) && + args.size == ::URI::Generic::COMPONENT.size + tmp = args + elsif args.kind_of?(Hash) + tmp = ::URI::Generic::COMPONENT.collect do |c| + if args.include?(c) + args[c] + else + nil + end + end + else + raise ArgumentError, + "expected Array of or Hash of components of #{self.class} (#{self.class.component.join(', ')})" + end + + tmp << DEFAULT_PARSER + tmp << true + return self.new(*tmp) + end + # + # == Args + # + # +scheme+:: + # Protocol scheme, i.e. 'http','ftp','mailto' and so on. + # +userinfo+:: + # User name and password, i.e. 'sdmitry:bla' + # +host+:: + # Server host name + # +port+:: + # Server port + # +registry+:: + # DOC: FIXME! + # +path+:: + # Path on server + # +opaque+:: + # DOC: FIXME! + # +query+:: + # Query data + # +fragment+:: + # A part of URI after '#' sign + # +parser+:: + # Parser for internal use [URI::DEFAULT_PARSER by default] + # +arg_check+:: + # Check arguments [false by default] + # + # == Description + # + # Creates a new URI::Generic instance from ``generic'' components without check. + # + def initialize(scheme, + userinfo, host, port, registry, + path, opaque, + query, + fragment, + parser = DEFAULT_PARSER, + arg_check = false) + @scheme = nil + @user = nil + @password = nil + @host = nil + @port = nil + @path = nil + @query = nil + @opaque = nil + @registry = nil + @fragment = nil + @parser = parser + + if arg_check + self.scheme = scheme + self.userinfo = userinfo + self.host = host + self.port = port + self.path = path + self.query = query + self.opaque = opaque + self.registry = registry + self.fragment = fragment + else + self.set_scheme(scheme) + self.set_userinfo(userinfo) + self.set_host(host) + self.set_port(port) + self.set_path(path) + self.set_query(query) + self.set_opaque(opaque) + self.set_registry(registry) + self.set_fragment(fragment) + end + if @registry && !self.class.use_registry + raise InvalidURIError, + "the scheme #{@scheme} does not accept registry part: #{@registry} (or bad hostname?)" + end + + @scheme.freeze if @scheme + self.set_path('') if !@path && !@opaque # (see RFC2396 Section 5.2) + self.set_port(self.default_port) if self.default_port && !@port + end + attr_reader :scheme + attr_reader :host + attr_reader :port + attr_reader :registry + attr_reader :path + attr_reader :query + attr_reader :opaque + attr_reader :fragment + attr_reader :parser + + # replace self by other URI object + def replace!(oth) + if self.class != oth.class + raise ArgumentError, "expected #{self.class} object" + end + + component.each do |c| + self.__send__("#{c}=", oth.__send__(c)) + end + end + private :replace! + + def component + self.class.component + end + + def check_scheme(v) + if v && @parser.regexp[:SCHEME] !~ v + raise InvalidComponentError, + "bad component(expected scheme component): #{v}" + end + + return true + end + private :check_scheme + + def set_scheme(v) + @scheme = v + end + protected :set_scheme + + def scheme=(v) + check_scheme(v) + set_scheme(v) + v + end + + def check_userinfo(user, password = nil) + if !password + user, password = split_userinfo(user) + end + check_user(user) + check_password(password, user) + + return true + end + private :check_userinfo + + def check_user(v) + if @registry || @opaque + raise InvalidURIError, + "can not set user with registry or opaque" + end + + return v unless v + + if @parser.regexp[:USERINFO] !~ v + raise InvalidComponentError, + "bad component(expected userinfo component or user component): #{v}" + end + + return true + end + private :check_user + + def check_password(v, user = @user) + if @registry || @opaque + raise InvalidURIError, + "can not set password with registry or opaque" + end + return v unless v + + if !user + raise InvalidURIError, + "password component depends user component" + end + + if @parser.regexp[:USERINFO] !~ v + raise InvalidComponentError, + "bad component(expected user component): #{v}" + end + + return true + end + private :check_password + + # + # Sets userinfo, argument is string like 'name:pass' + # + def userinfo=(userinfo) + if userinfo.nil? + return nil + end + check_userinfo(*userinfo) + set_userinfo(*userinfo) + # returns userinfo + end + + def user=(user) + check_user(user) + set_user(user) + # returns user + end + + def password=(password) + check_password(password) + set_password(password) + # returns password + end + + def set_userinfo(user, password = nil) + unless password + user, password = split_userinfo(user) + end + @user = user + @password = password if password + + [@user, @password] + end + protected :set_userinfo + + def set_user(v) + set_userinfo(v, @password) + v + end + protected :set_user + + def set_password(v) + @password = v + # returns v + end + protected :set_password + + def split_userinfo(ui) + return nil, nil unless ui + user, password = ui.split(/:/, 2) + + return user, password + end + private :split_userinfo + + def escape_userpass(v) + v = @parser.escape(v, /[@:\/]/o) # RFC 1738 section 3.1 #/ + end + private :escape_userpass + + def userinfo + if @user.nil? + nil + elsif @password.nil? + @user + else + @user + ':' + @password + end + end + + def user + @user + end + + def password + @password + end + + def check_host(v) + return v unless v + + if @registry || @opaque + raise InvalidURIError, + "can not set host with registry or opaque" + elsif @parser.regexp[:HOST] !~ v + raise InvalidComponentError, + "bad component(expected host component): #{v}" + end + + return true + end + private :check_host + + def set_host(v) + @host = v + end + protected :set_host + + def host=(v) + check_host(v) + set_host(v) + v + end + + def check_port(v) + return v unless v + + if @registry || @opaque + raise InvalidURIError, + "can not set port with registry or opaque" + elsif !v.kind_of?(Fixnum) && @parser.regexp[:PORT] !~ v + raise InvalidComponentError, + "bad component(expected port component): #{v}" + end + + return true + end + private :check_port + + def set_port(v) + unless !v || v.kind_of?(Fixnum) + if v.empty? + v = nil + else + v = v.to_i + end + end + @port = v + end + protected :set_port + + def port=(v) + check_port(v) + set_port(v) + port + end + + def check_registry(v) + return v unless v + + # raise if both server and registry are not nil, because: + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + if @host || @port || @user # userinfo = @user + ':' + @password + raise InvalidURIError, + "can not set registry with host, port, or userinfo" + elsif v && @parser.regexp[:REGISTRY] !~ v + raise InvalidComponentError, + "bad component(expected registry component): #{v}" + end + + return true + end + private :check_registry + + def set_registry(v) + @registry = v + end + protected :set_registry + + def registry=(v) + check_registry(v) + set_registry(v) + v + end + + def check_path(v) + # raise if both hier and opaque are not nil, because: + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + if v && @opaque + raise InvalidURIError, + "path conflicts with opaque" + end + + if @scheme + if v && v != '' && @parser.regexp[:ABS_PATH] !~ v + raise InvalidComponentError, + "bad component(expected absolute path component): #{v}" + end + else + if v && v != '' && @parser.regexp[:ABS_PATH] !~ v && @parser.regexp[:REL_PATH] !~ v + raise InvalidComponentError, + "bad component(expected relative path component): #{v}" + end + end + + return true + end + private :check_path + + def set_path(v) + @path = v + end + protected :set_path + + def path=(v) + check_path(v) + set_path(v) + v + end + + def check_query(v) + return v unless v + + # raise if both hier and opaque are not nil, because: + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + if @opaque + raise InvalidURIError, + "query conflicts with opaque" + end + + if v && v != '' && @parser.regexp[:QUERY] !~ v + raise InvalidComponentError, + "bad component(expected query component): #{v}" + end + + return true + end + private :check_query + + def set_query(v) + @query = v + end + protected :set_query + + def query=(v) + check_query(v) + set_query(v) + v + end + + def check_opaque(v) + return v unless v + + # raise if both hier and opaque are not nil, because: + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + if @host || @port || @user || @path # userinfo = @user + ':' + @password + raise InvalidURIError, + "can not set opaque with host, port, userinfo or path" + elsif v && @parser.regexp[:OPAQUE] !~ v + raise InvalidComponentError, + "bad component(expected opaque component): #{v}" + end + + return true + end + private :check_opaque + + def set_opaque(v) + @opaque = v + end + protected :set_opaque + + def opaque=(v) + check_opaque(v) + set_opaque(v) + v + end + + def check_fragment(v) + return v unless v + + if v && v != '' && @parser.regexp[:FRAGMENT] !~ v + raise InvalidComponentError, + "bad component(expected fragment component): #{v}" + end + + return true + end + private :check_fragment + + def set_fragment(v) + @fragment = v + end + protected :set_fragment + + def fragment=(v) + check_fragment(v) + set_fragment(v) + v + end + + # + # Checks if URI has a path + # + def hierarchical? + if @path + true + else + false + end + end + + # + # Checks if URI is an absolute one + # + def absolute? + if @scheme + true + else + false + end + end + alias absolute absolute? + + # + # Checks if URI is relative + # + def relative? + !absolute? + end + + def split_path(path) + path.split(%r{/+}, -1) + end + private :split_path + + def merge_path(base, rel) + + # RFC2396, Section 5.2, 5) + # RFC2396, Section 5.2, 6) + base_path = split_path(base) + rel_path = split_path(rel) + + # RFC2396, Section 5.2, 6), a) + base_path << '' if base_path.last == '..' + while i = base_path.index('..') + base_path.slice!(i - 1, 2) + end + + if (first = rel_path.first) and first.empty? + base_path.clear + rel_path.shift + end + + # RFC2396, Section 5.2, 6), c) + # RFC2396, Section 5.2, 6), d) + rel_path.push('') if rel_path.last == '.' || rel_path.last == '..' + rel_path.delete('.') + + # RFC2396, Section 5.2, 6), e) + tmp = [] + rel_path.each do |x| + if x == '..' && + !(tmp.empty? || tmp.last == '..') + tmp.pop + else + tmp << x + end + end + + add_trailer_slash = !tmp.empty? + if base_path.empty? + base_path = [''] # keep '/' for root directory + elsif add_trailer_slash + base_path.pop + end + while x = tmp.shift + if x == '..' + # RFC2396, Section 4 + # a .. or . in an absolute path has no special meaning + base_path.pop if base_path.size > 1 + else + # if x == '..' + # valid absolute (but abnormal) path "/../..." + # else + # valid absolute path + # end + base_path << x + tmp.each {|t| base_path << t} + add_trailer_slash = false + break + end + end + base_path.push('') if add_trailer_slash + + return base_path.join('/') + end + private :merge_path + + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Destructive form of #merge + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse("http://my.example.com") + # uri.merge!("/main.rbx?page=1") + # p uri + # # => # + # + def merge!(oth) + t = merge(oth) + if self == t + nil + else + replace!(t) + self + end + end + + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Merges two URI's. + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse("http://my.example.com") + # p uri.merge("/main.rbx?page=1") + # # => # + # + def merge(oth) + begin + base, rel = merge0(oth) + rescue + raise $!.class, $!.message + end + + if base == rel + return base + end + + authority = rel.userinfo || rel.host || rel.port + + # RFC2396, Section 5.2, 2) + if (rel.path.nil? || rel.path.empty?) && !authority && !rel.query + base.set_fragment(rel.fragment) if rel.fragment + return base + end + + base.set_query(nil) + base.set_fragment(nil) + + # RFC2396, Section 5.2, 4) + if !authority + base.set_path(merge_path(base.path, rel.path)) if base.path && rel.path + else + # RFC2396, Section 5.2, 4) + base.set_path(rel.path) if rel.path + end + + # RFC2396, Section 5.2, 7) + base.set_userinfo(rel.userinfo) if rel.userinfo + base.set_host(rel.host) if rel.host + base.set_port(rel.port) if rel.port + base.set_query(rel.query) if rel.query + base.set_fragment(rel.fragment) if rel.fragment + + return base + end # merge + alias + merge + + # return base and rel. + # you can modify `base', but can not `rel'. + def merge0(oth) + case oth + when Generic + when String + oth = @parser.parse(oth) + else + raise ArgumentError, + "bad argument(expected URI object or URI string)" + end + + if self.relative? && oth.relative? + raise BadURIError, + "both URI are relative" + end + + if self.absolute? && oth.absolute? + #raise BadURIError, + # "both URI are absolute" + # hmm... should return oth for usability? + return oth, oth + end + + if self.absolute? + return self.dup, oth + else + return oth, oth + end + end + private :merge0 + + def route_from_path(src, dst) + # RFC2396, Section 4.2 + return '' if src == dst + + src_path = split_path(src) + dst_path = split_path(dst) + + # hmm... dst has abnormal absolute path, + # like "/./", "/../", "/x/../", ... + if dst_path.include?('..') || + dst_path.include?('.') + return dst.dup + end + + src_path.pop + + # discard same parts + while dst_path.first == src_path.first + break if dst_path.empty? + + src_path.shift + dst_path.shift + end + + tmp = dst_path.join('/') + + # calculate + if src_path.empty? + if tmp.empty? + return './' + elsif dst_path.first.include?(':') # (see RFC2396 Section 5) + return './' + tmp + else + return tmp + end + end + + return '../' * src_path.size + tmp + end + private :route_from_path + + def route_from0(oth) + case oth + when Generic + when String + oth = @parser.parse(oth) + else + raise ArgumentError, + "bad argument(expected URI object or URI string)" + end + + if self.relative? + raise BadURIError, + "relative URI: #{self}" + end + if oth.relative? + raise BadURIError, + "relative URI: #{oth}" + end + + if self.scheme != oth.scheme + return self, self.dup + end + rel = URI::Generic.new(nil, # it is relative URI + self.userinfo, self.host, self.port, + self.registry, self.path, self.opaque, + self.query, self.fragment, @parser) + + if rel.userinfo != oth.userinfo || + rel.host.to_s.downcase != oth.host.to_s.downcase || + rel.port != oth.port + if self.userinfo.nil? && self.host.nil? + return self, self.dup + end + rel.set_port(nil) if rel.port == oth.default_port + return rel, rel + end + rel.set_userinfo(nil) + rel.set_host(nil) + rel.set_port(nil) + + if rel.path && rel.path == oth.path + rel.set_path('') + rel.set_query(nil) if rel.query == oth.query + return rel, rel + elsif rel.opaque && rel.opaque == oth.opaque + rel.set_opaque('') + rel.set_query(nil) if rel.query == oth.query + return rel, rel + end + + # you can modify `rel', but can not `oth'. + return oth, rel + end + private :route_from0 + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Calculates relative path from oth to self + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse('http://my.example.com/main.rbx?page=1') + # p uri.route_from('http://my.example.com') + # #=> # + # + def route_from(oth) + # you can modify `rel', but can not `oth'. + begin + oth, rel = route_from0(oth) + rescue + raise $!.class, $!.message + end + if oth == rel + return rel + end + + rel.set_path(route_from_path(oth.path, self.path)) + if rel.path == './' && self.query + # "./?foo" -> "?foo" + rel.set_path('') + end + + return rel + end + + alias - route_from + + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Calculates relative path to oth from self + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse('http://my.example.com') + # p uri.route_to('http://my.example.com/main.rbx?page=1') + # #=> # + # + def route_to(oth) + case oth + when Generic + when String + oth = @parser.parse(oth) + else + raise ArgumentError, + "bad argument(expected URI object or URI string)" + end + + oth.route_from(self) + end + + # + # Returns normalized URI + # + def normalize + uri = dup + uri.normalize! + uri + end + + # + # Destructive version of #normalize + # + def normalize! + if path && path == '' + set_path('/') + end + if host && host != host.downcase + set_host(self.host.downcase) + end + end + + def path_query + str = @path + if @query + str += '?' + @query + end + str + end + private :path_query + + # + # Constructs String from URI + # + def to_s + str = '' + if @scheme + str << @scheme + str << ':' + end + + if @opaque + str << @opaque + + else + if @registry + str << @registry + else + if @host + str << '//' + end + if self.userinfo + str << self.userinfo + str << '@' + end + if @host + str << @host + end + if @port && @port != self.default_port + str << ':' + str << @port.to_s + end + end + + str << path_query + end + + if @fragment + str << '#' + str << @fragment + end + + str + end + + # + # Compares to URI's + # + def ==(oth) + if self.class == oth.class + self.normalize.component_ary == oth.normalize.component_ary + else + false + end + end + + def hash + self.component_ary.hash + end + + def eql?(oth) + @parser == oth.parser && + self.component_ary.eql?(oth.component_ary) + end + +=begin + +--- URI::Generic#===(oth) + +=end +# def ===(oth) +# raise NotImplementedError +# end + +=begin +=end + def component_ary + component.collect do |x| + self.send(x) + end + end + protected :component_ary + + # == Args + # + # +components+:: + # Multiple Symbol arguments defined in URI::HTTP + # + # == Description + # + # Selects specified components from URI + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse('http://myuser:mypass@my.example.com/test.rbx') + # p uri.select(:userinfo, :host, :path) + # # => ["myuser:mypass", "my.example.com", "/test.rbx"] + # + def select(*components) + components.collect do |c| + if component.include?(c) + self.send(c) + else + raise ArgumentError, + "expected of components of #{self.class} (#{self.class.component.join(', ')})" + end + end + end + + @@to_s = Kernel.instance_method(:to_s) + def inspect + @@to_s.bind(self).call.sub!(/>\z/) {" URL:#{self}>"} + end + + def coerce(oth) + case oth + when String + oth = @parser.parse(oth) + else + super + end + + return oth, self + end + end +end diff --git a/lib/uri/http.rb b/lib/uri/http.rb new file mode 100644 index 0000000..e1aa06d --- /dev/null +++ b/lib/uri/http.rb @@ -0,0 +1,100 @@ +# +# = uri/http.rb +# +# Author:: Akira Yamada +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: http.rb 11708 2007-02-12 23:01:19Z shyouhei $ +# + +require 'uri/generic' + +module URI + + # + # The syntax of HTTP URIs is defined in RFC1738 section 3.3. + # + # Note that the Ruby URI library allows HTTP URLs containing usernames and + # passwords. This is not legal as per the RFC, but used to be + # supported in Internet Explorer 5 and 6, before the MS04-004 security + # update. See . + # + class HTTP < Generic + DEFAULT_PORT = 80 + + COMPONENT = [ + :scheme, + :userinfo, :host, :port, + :path, + :query, + :fragment + ].freeze + + # + # == Description + # + # Create a new URI::HTTP object from components, with syntax checking. + # + # The components accepted are userinfo, host, port, path, query and + # fragment. + # + # The components should be provided either as an Array, or as a Hash + # with keys formed by preceding the component names with a colon. + # + # If an Array is used, the components must be passed in the order + # [userinfo, host, port, path, query, fragment]. + # + # Example: + # + # newuri = URI::HTTP.build({:host => 'www.example.com', + # :path> => '/foo/bar'}) + # + # newuri = URI::HTTP.build([nil, "www.example.com", nil, "/path", + # "query", 'fragment']) + # + # Currently, if passed userinfo components this method generates + # invalid HTTP URIs as per RFC 1738. + # + def self.build(args) + tmp = Util::make_components_hash(self, args) + return super(tmp) + end + + # + # == Description + # + # Create a new URI::HTTP object from generic URI components as per + # RFC 2396. No HTTP-specific syntax checking (as per RFC 1738) is + # performed. + # + # Arguments are +scheme+, +userinfo+, +host+, +port+, +registry+, +path+, + # +opaque+, +query+ and +fragment+, in that order. + # + # Example: + # + # uri = URI::HTTP.new(['http', nil, "www.example.com", nil, "/path", + # "query", 'fragment']) + # + def initialize(*arg) + super(*arg) + end + + # + # == Description + # + # Returns the full path for an HTTP request, as required by Net::HTTP::Get. + # + # If the URI contains a query, the full path is URI#path + '?' + URI#query. + # Otherwise, the path is simply URI#path. + # + def request_uri + r = path_query + if r[0] != ?/ + r = '/' + r + end + + r + end + end + + @@schemes['HTTP'] = HTTP +end diff --git a/lib/uri/https.rb b/lib/uri/https.rb new file mode 100644 index 0000000..c8e9e35 --- /dev/null +++ b/lib/uri/https.rb @@ -0,0 +1,20 @@ +# +# = uri/https.rb +# +# Author:: Akira Yamada +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: https.rb 11708 2007-02-12 23:01:19Z shyouhei $ +# + +require 'uri/http' + +module URI + + # The default port for HTTPS URIs is 443, and the scheme is 'https:' rather + # than 'http:'. Other than that, HTTPS URIs are identical to HTTP URIs; + # see URI::HTTP. + class HTTPS < HTTP + DEFAULT_PORT = 443 + end + @@schemes['HTTPS'] = HTTPS +end diff --git a/lib/uri/ldap.rb b/lib/uri/ldap.rb new file mode 100644 index 0000000..7c14ff8 --- /dev/null +++ b/lib/uri/ldap.rb @@ -0,0 +1,190 @@ +# +# = uri/ldap.rb +# +# Author:: +# Takaaki Tateishi +# Akira Yamada +# License:: +# URI::LDAP is copyrighted free software by Takaaki Tateishi and Akira Yamada. +# You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: ldap.rb 11708 2007-02-12 23:01:19Z shyouhei $ +# + +require 'uri/generic' + +module URI + + # + # LDAP URI SCHEMA (described in RFC2255) + # ldap:///[?[?[?[?]]]] + # + class LDAP < Generic + + DEFAULT_PORT = 389 + + COMPONENT = [ + :scheme, + :host, :port, + :dn, + :attributes, + :scope, + :filter, + :extensions, + ].freeze + + SCOPE = [ + SCOPE_ONE = 'one', + SCOPE_SUB = 'sub', + SCOPE_BASE = 'base', + ].freeze + + def self.build(args) + tmp = Util::make_components_hash(self, args) + + if tmp[:dn] + tmp[:path] = tmp[:dn] + end + + query = [] + [:extensions, :filter, :scope, :attributes].collect do |x| + next if !tmp[x] && query.size == 0 + query.unshift(tmp[x]) + end + + tmp[:query] = query.join('?') + + return super(tmp) + end + + def initialize(*arg) + super(*arg) + + if @fragment + raise InvalidURIError, 'bad LDAP URL' + end + + parse_dn + parse_query + end + + def parse_dn + @dn = @path[1..-1] + end + private :parse_dn + + def parse_query + @attributes = nil + @scope = nil + @filter = nil + @extensions = nil + + if @query + attrs, scope, filter, extensions = @query.split('?') + + @attributes = attrs if attrs && attrs.size > 0 + @scope = scope if scope && scope.size > 0 + @filter = filter if filter && filter.size > 0 + @extensions = extensions if extensions && extensions.size > 0 + end + end + private :parse_query + + def build_path_query + @path = '/' + @dn + + query = [] + [@extensions, @filter, @scope, @attributes].each do |x| + next if !x && query.size == 0 + query.unshift(x) + end + @query = query.join('?') + end + private :build_path_query + + def dn + @dn + end + + def set_dn(val) + @dn = val + build_path_query + @dn + end + protected :set_dn + + def dn=(val) + set_dn(val) + val + end + + def attributes + @attributes + end + + def set_attributes(val) + @attributes = val + build_path_query + @attributes + end + protected :set_attributes + + def attributes=(val) + set_attributes(val) + val + end + + def scope + @scope + end + + def set_scope(val) + @scope = val + build_path_query + @scope + end + protected :set_scope + + def scope=(val) + set_scope(val) + val + end + + def filter + @filter + end + + def set_filter(val) + @filter = val + build_path_query + @filter + end + protected :set_filter + + def filter=(val) + set_filter(val) + val + end + + def extensions + @extensions + end + + def set_extensions(val) + @extensions = val + build_path_query + @extensions + end + protected :set_extensions + + def extensions=(val) + set_extensions(val) + val + end + + def hierarchical? + false + end + end + + @@schemes['LDAP'] = LDAP +end diff --git a/lib/uri/ldaps.rb b/lib/uri/ldaps.rb new file mode 100644 index 0000000..6da3331 --- /dev/null +++ b/lib/uri/ldaps.rb @@ -0,0 +1,12 @@ +require 'uri/ldap' + +module URI + + # The default port for LDAPS URIs is 636, and the scheme is 'ldaps:' rather + # than 'ldap:'. Other than that, LDAPS URIs are identical to LDAP URIs; + # see URI::LDAP. + class LDAPS < LDAP + DEFAULT_PORT = 636 + end + @@schemes['LDAPS'] = LDAPS +end diff --git a/lib/uri/mailto.rb b/lib/uri/mailto.rb new file mode 100644 index 0000000..ed3d77e --- /dev/null +++ b/lib/uri/mailto.rb @@ -0,0 +1,266 @@ +# +# = uri/mailto.rb +# +# Author:: Akira Yamada +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id: mailto.rb 19495 2008-09-23 18:16:08Z drbrain $ +# + +require 'uri/generic' + +module URI + + # + # RFC2368, The mailto URL scheme + # + class MailTo < Generic + include REGEXP + + DEFAULT_PORT = nil + + COMPONENT = [ :scheme, :to, :headers ].freeze + + # :stopdoc: + # "hname" and "hvalue" are encodings of an RFC 822 header name and + # value, respectively. As with "to", all URL reserved characters must + # be encoded. + # + # "#mailbox" is as specified in RFC 822 [RFC822]. This means that it + # consists of zero or more comma-separated mail addresses, possibly + # including "phrase" and "comment" components. Note that all URL + # reserved characters in "to" must be encoded: in particular, + # parentheses, commas, and the percent sign ("%"), which commonly occur + # in the "mailbox" syntax. + # + # Within mailto URLs, the characters "?", "=", "&" are reserved. + + # hname = *urlc + # hvalue = *urlc + # header = hname "=" hvalue + HEADER_PATTERN = "(?:[^?=&]*=[^?=&]*)".freeze + HEADER_REGEXP = Regexp.new(HEADER_PATTERN, 'N').freeze + # headers = "?" header *( "&" header ) + # to = #mailbox + # mailtoURL = "mailto:" [ to ] [ headers ] + MAILBOX_PATTERN = "(?:#{PATTERN::ESCAPED}|[^(),%?=&])".freeze + MAILTO_REGEXP = Regexp.new(" # :nodoc: + \\A + (#{MAILBOX_PATTERN}*?) (?# 1: to) + (?: + \\? + (#{HEADER_PATTERN}(?:\\&#{HEADER_PATTERN})*) (?# 2: headers) + )? + (?: + \\# + (#{PATTERN::FRAGMENT}) (?# 3: fragment) + )? + \\z + ", Regexp::EXTENDED).freeze + # :startdoc: + + # + # == Description + # + # Creates a new URI::MailTo object from components, with syntax checking. + # + # Components can be provided as an Array or Hash. If an Array is used, + # the components must be supplied as [to, headers]. + # + # If a Hash is used, the keys are the component names preceded by colons. + # + # The headers can be supplied as a pre-encoded string, such as + # "subject=subscribe&cc=address", or as an Array of Arrays like + # [['subject', 'subscribe'], ['cc', 'address']] + # + # Examples: + # + # require 'uri' + # + # m1 = URI::MailTo.build(['joe@example.com', 'subject=Ruby']) + # puts m1.to_s -> mailto:joe@example.com?subject=Ruby + # + # m2 = URI::MailTo.build(['john@example.com', [['Subject', 'Ruby'], ['Cc', 'jack@example.com']]]) + # puts m2.to_s -> mailto:john@example.com?Subject=Ruby&Cc=jack@example.com + # + # m3 = URI::MailTo.build({:to => 'listman@example.com', :headers => [['subject', 'subscribe']]}) + # puts m3.to_s -> mailto:listman@example.com?subject=subscribe + # + def self.build(args) + tmp = Util::make_components_hash(self, args) + + if tmp[:to] + tmp[:opaque] = tmp[:to] + else + tmp[:opaque] = '' + end + + if tmp[:headers] + tmp[:opaque] << '?' + + if tmp[:headers].kind_of?(Array) + tmp[:opaque] << tmp[:headers].collect { |x| + if x.kind_of?(Array) + x[0] + '=' + x[1..-1].to_s + else + x.to_s + end + }.join('&') + + elsif tmp[:headers].kind_of?(Hash) + tmp[:opaque] << tmp[:headers].collect { |h,v| + h + '=' + v + }.join('&') + + else + tmp[:opaque] << tmp[:headers].to_s + end + end + + return super(tmp) + end + + # + # == Description + # + # Creates a new URI::MailTo object from generic URL components with + # no syntax checking. + # + # This method is usually called from URI::parse, which checks + # the validity of each component. + # + def initialize(*arg) + super(*arg) + + @to = nil + @headers = [] + + if MAILTO_REGEXP =~ @opaque + if arg[-1] + self.to = $1 + self.headers = $2 + else + set_to($1) + set_headers($2) + end + + else + raise InvalidComponentError, + "unrecognised opaque part for mailtoURL: #{@opaque}" + end + end + + # The primary e-mail address of the URL, as a String + attr_reader :to + + # E-mail headers set by the URL, as an Array of Arrays + attr_reader :headers + + def check_to(v) + return true unless v + return true if v.size == 0 + + if @parser.regexp[:OPAQUE] !~ v || /\A#{MAILBOX_PATTERN}*\z/o !~ v + raise InvalidComponentError, + "bad component(expected opaque component): #{v}" + end + + return true + end + private :check_to + + def set_to(v) + @to = v + end + protected :set_to + + def to=(v) + check_to(v) + set_to(v) + v + end + + def check_headers(v) + return true unless v + return true if v.size == 0 + + if @parser.regexp[:OPAQUE] !~ v || + /\A(#{HEADER_PATTERN}(?:\&#{HEADER_PATTERN})*)\z/o !~ v + raise InvalidComponentError, + "bad component(expected opaque component): #{v}" + end + + return true + end + private :check_headers + + def set_headers(v) + @headers = [] + if v + v.scan(HEADER_REGEXP) do |x| + @headers << x.split(/=/o, 2) + end + end + end + protected :set_headers + + def headers=(v) + check_headers(v) + set_headers(v) + v + end + + def to_s + @scheme + ':' + + if @to + @to + else + '' + end + + if @headers.size > 0 + '?' + @headers.collect{|x| x.join('=')}.join('&') + else + '' + end + + if @fragment + '#' + @fragment + else + '' + end + end + + # Returns the RFC822 e-mail text equivalent of the URL, as a String. + # + # Example: + # + # require 'uri' + # + # uri = URI.parse("mailto:ruby-list@ruby-lang.org?Subject=subscribe&cc=myaddr") + # uri.to_mailtext + # # => "To: ruby-list@ruby-lang.org\nSubject: subscribe\nCc: myaddr\n\n\n" + # + def to_mailtext + to = @parser.unescape(@to) + head = '' + body = '' + @headers.each do |x| + case x[0] + when 'body' + body = @parser.unescape(x[1]) + when 'to' + to << ', ' + @parser.unescape(x[1]) + else + head << @parser.unescape(x[0]).capitalize + ': ' + + @parser.unescape(x[1]) + "\n" + end + end + + return "To: #{to} +#{head} +#{body} +" + end + alias to_rfc822text to_mailtext + end + + @@schemes['MAILTO'] = MailTo +end diff --git a/lib/weakref.rb b/lib/weakref.rb new file mode 100644 index 0000000..ba39242 --- /dev/null +++ b/lib/weakref.rb @@ -0,0 +1,80 @@ +# Weak Reference class that does not bother GCing. +# +# Usage: +# foo = Object.new +# foo = Object.new +# p foo.to_s # original's class +# foo = WeakRef.new(foo) +# p foo.to_s # should be same class +# ObjectSpace.garbage_collect +# p foo.to_s # should raise exception (recycled) + +require "delegate" +require 'thread' + +class WeakRef < Delegator + + class RefError < StandardError + end + + @@id_map = {} # obj -> [ref,...] + @@id_rev_map = {} # ref -> obj + @@mutex = Mutex.new + @@final = lambda {|id| + @@mutex.synchronize { + rids = @@id_map[id] + if rids + for rid in rids + @@id_rev_map.delete(rid) + end + @@id_map.delete(id) + end + rid = @@id_rev_map[id] + if rid + @@id_rev_map.delete(id) + @@id_map[rid].delete(id) + @@id_map.delete(rid) if @@id_map[rid].empty? + end + } + } + + def initialize(orig) + @__id = orig.object_id + ObjectSpace.define_finalizer orig, @@final + ObjectSpace.define_finalizer self, @@final + @@mutex.synchronize { + @@id_map[@__id] = [] unless @@id_map[@__id] + } + @@id_map[@__id].push self.object_id + @@id_rev_map[self.object_id] = @__id + super + end + + def __getobj__ + unless @@id_rev_map[self.object_id] == @__id + Kernel::raise RefError, "Invalid Reference - probably recycled", Kernel::caller(2) + end + begin + ObjectSpace._id2ref(@__id) + rescue RangeError + Kernel::raise RefError, "Invalid Reference - probably recycled", Kernel::caller(2) + end + end + def __setobj__(obj) + end + + def weakref_alive? + @@id_rev_map[self.object_id] == @__id + end +end + +if __FILE__ == $0 +# require 'thread' + foo = Object.new + p foo.to_s # original's class + foo = WeakRef.new(foo) + p foo.to_s # should be same class + ObjectSpace.garbage_collect + ObjectSpace.garbage_collect + p foo.to_s # should raise exception (recycled) +end diff --git a/lib/webrick.rb b/lib/webrick.rb new file mode 100644 index 0000000..8fca81b --- /dev/null +++ b/lib/webrick.rb @@ -0,0 +1,29 @@ +# +# WEBrick -- WEB server toolkit. +# +# Author: IPR -- Internet Programming with Ruby -- writers +# Copyright (c) 2000 TAKAHASHI Masayoshi, GOTOU YUUZOU +# Copyright (c) 2002 Internet Programming with Ruby writers. All rights +# reserved. +# +# $IPR: webrick.rb,v 1.12 2002/10/01 17:16:31 gotoyuzo Exp $ + +require 'webrick/compat.rb' + +require 'webrick/version.rb' +require 'webrick/config.rb' +require 'webrick/log.rb' +require 'webrick/server.rb' +require 'webrick/utils.rb' +require 'webrick/accesslog' + +require 'webrick/htmlutils.rb' +require 'webrick/httputils.rb' +require 'webrick/cookie.rb' +require 'webrick/httpversion.rb' +require 'webrick/httpstatus.rb' +require 'webrick/httprequest.rb' +require 'webrick/httpresponse.rb' +require 'webrick/httpserver.rb' +require 'webrick/httpservlet.rb' +require 'webrick/httpauth.rb' diff --git a/lib/yaml.rb b/lib/yaml.rb new file mode 100644 index 0000000..7843f7f --- /dev/null +++ b/lib/yaml.rb @@ -0,0 +1,440 @@ +# -*- mode: ruby; ruby-indent-level: 4; tab-width: 4 -*- vim: sw=4 ts=4 +# $Id: yaml.rb 19495 2008-09-23 18:16:08Z drbrain $ +# +# = yaml.rb: top-level module with methods for loading and parsing YAML documents +# +# Author:: why the lucky stiff +# + +require 'stringio' +require 'yaml/error' +require 'yaml/syck' +require 'yaml/tag' +require 'yaml/stream' +require 'yaml/constants' + +# == YAML +# +# YAML(tm) (rhymes with 'camel') is a +# straightforward machine parsable data serialization format designed for +# human readability and interaction with scripting languages such as Perl +# and Python. YAML is optimized for data serialization, formatted +# dumping, configuration files, log files, Internet messaging and +# filtering. This specification describes the YAML information model and +# serialization format. Together with the Unicode standard for characters, it +# provides all the information necessary to understand YAML Version 1.0 +# and construct computer programs to process it. +# +# See http://yaml.org/ for more information. For a quick tutorial, please +# visit YAML In Five Minutes (http://yaml.kwiki.org/?YamlInFiveMinutes). +# +# == About This Library +# +# The YAML 1.0 specification outlines four stages of YAML loading and dumping. +# This library honors all four of those stages, although data is really only +# available to you in three stages. +# +# The four stages are: native, representation, serialization, and presentation. +# +# The native stage refers to data which has been loaded completely into Ruby's +# own types. (See +YAML::load+.) +# +# The representation stage means data which has been composed into +# +YAML::BaseNode+ objects. In this stage, the document is available as a +# tree of node objects. You can perform YPath queries and transformations +# at this level. (See +YAML::parse+.) +# +# The serialization stage happens inside the parser. The YAML parser used in +# Ruby is called Syck. Serialized nodes are available in the extension as +# SyckNode structs. +# +# The presentation stage is the YAML document itself. This is accessible +# to you as a string. (See +YAML::dump+.) +# +# For more information about the various information models, see Chapter +# 3 of the YAML 1.0 Specification (http://yaml.org/spec/#id2491269). +# +# The YAML module provides quick access to the most common loading (YAML::load) +# and dumping (YAML::dump) tasks. This module also provides an API for registering +# global types (YAML::add_domain_type). +# +# == Example +# +# A simple round-trip (load and dump) of an object. +# +# require "yaml" +# +# test_obj = ["dogs", "cats", "badgers"] +# +# yaml_obj = YAML::dump( test_obj ) +# # -> --- +# - dogs +# - cats +# - badgers +# ruby_obj = YAML::load( yaml_obj ) +# # => ["dogs", "cats", "badgers"] +# ruby_obj == test_obj +# # => true +# +# To register your custom types with the global resolver, use +add_domain_type+. +# +# YAML::add_domain_type( "your-site.com,2004", "widget" ) do |type, val| +# Widget.new( val ) +# end +# +module YAML + + Resolver = YAML::Syck::Resolver + DefaultResolver = YAML::Syck::DefaultResolver + DefaultResolver.use_types_at( @@tagged_classes ) + GenericResolver = YAML::Syck::GenericResolver + Parser = YAML::Syck::Parser + Emitter = YAML::Syck::Emitter + + # Returns a new default parser + def YAML.parser; Parser.new.set_resolver( YAML.resolver ); end + # Returns a new generic parser + def YAML.generic_parser; Parser.new.set_resolver( GenericResolver ); end + # Returns the default resolver + def YAML.resolver; DefaultResolver; end + # Returns a new default emitter + def YAML.emitter; Emitter.new.set_resolver( YAML.resolver ); end + + # + # Converts _obj_ to YAML and writes the YAML result to _io_. + # + # File.open( 'animals.yaml', 'w' ) do |out| + # YAML.dump( ['badger', 'elephant', 'tiger'], out ) + # end + # + # If no _io_ is provided, a string containing the dumped YAML + # is returned. + # + # YAML.dump( :locked ) + # #=> "--- :locked" + # + def YAML.dump( obj, io = nil ) + obj.to_yaml( io || io2 = StringIO.new ) + io || ( io2.rewind; io2.read ) + end + + # + # Load a document from the current _io_ stream. + # + # File.open( 'animals.yaml' ) { |yf| YAML::load( yf ) } + # #=> ['badger', 'elephant', 'tiger'] + # + # Can also load from a string. + # + # YAML.load( "--- :locked" ) + # #=> :locked + # + def YAML.load( io ) + yp = parser.load( io ) + end + + # + # Load a document from the file located at _filepath_. + # + # YAML.load_file( 'animals.yaml' ) + # #=> ['badger', 'elephant', 'tiger'] + # + def YAML.load_file( filepath ) + File.open( filepath ) do |f| + load( f ) + end + end + + # + # Parse the first document from the current _io_ stream + # + # File.open( 'animals.yaml' ) { |yf| YAML::load( yf ) } + # #=> #, + # #, + # #]> + # + # Can also load from a string. + # + # YAML.parse( "--- :locked" ) + # #=> # + # + def YAML.parse( io ) + yp = generic_parser.load( io ) + end + + # + # Parse a document from the file located at _filepath_. + # + # YAML.parse_file( 'animals.yaml' ) + # #=> #, + # #, + # #]> + # + def YAML.parse_file( filepath ) + File.open( filepath ) do |f| + parse( f ) + end + end + + # + # Calls _block_ with each consecutive document in the YAML + # stream contained in _io_. + # + # File.open( 'many-docs.yaml' ) do |yf| + # YAML.each_document( yf ) do |ydoc| + # ## ydoc contains the single object + # ## from the YAML document + # end + # end + # + def YAML.each_document( io, &block ) + yp = parser.load_documents( io, &block ) + end + + # + # Calls _block_ with each consecutive document in the YAML + # stream contained in _io_. + # + # File.open( 'many-docs.yaml' ) do |yf| + # YAML.load_documents( yf ) do |ydoc| + # ## ydoc contains the single object + # ## from the YAML document + # end + # end + # + def YAML.load_documents( io, &doc_proc ) + YAML.each_document( io, &doc_proc ) + end + + # + # Calls _block_ with a tree of +YAML::BaseNodes+, one tree for + # each consecutive document in the YAML stream contained in _io_. + # + # File.open( 'many-docs.yaml' ) do |yf| + # YAML.each_node( yf ) do |ydoc| + # ## ydoc contains a tree of nodes + # ## from the YAML document + # end + # end + # + def YAML.each_node( io, &doc_proc ) + yp = generic_parser.load_documents( io, &doc_proc ) + end + + # + # Calls _block_ with a tree of +YAML::BaseNodes+, one tree for + # each consecutive document in the YAML stream contained in _io_. + # + # File.open( 'many-docs.yaml' ) do |yf| + # YAML.parse_documents( yf ) do |ydoc| + # ## ydoc contains a tree of nodes + # ## from the YAML document + # end + # end + # + def YAML.parse_documents( io, &doc_proc ) + YAML.each_node( io, &doc_proc ) + end + + # + # Loads all documents from the current _io_ stream, + # returning a +YAML::Stream+ object containing all + # loaded documents. + # + def YAML.load_stream( io ) + d = nil + parser.load_documents( io ) do |doc| + d = YAML::Stream.new if not d + d.add( doc ) + end + return d + end + + # + # Returns a YAML stream containing each of the items in +objs+, + # each having their own document. + # + # YAML.dump_stream( 0, [], {} ) + # #=> --- 0 + # --- [] + # --- {} + # + def YAML.dump_stream( *objs ) + d = YAML::Stream.new + objs.each do |doc| + d.add( doc ) + end + d.emit + end + + # + # Add a global handler for a YAML domain type. + # + def YAML.add_domain_type( domain, type_tag, &transfer_proc ) + resolver.add_type( "tag:#{ domain }:#{ type_tag }", transfer_proc ) + end + + # + # Add a transfer method for a builtin type + # + def YAML.add_builtin_type( type_tag, &transfer_proc ) + resolver.add_type( "tag:yaml.org,2002:#{ type_tag }", transfer_proc ) + end + + # + # Add a transfer method for a builtin type + # + def YAML.add_ruby_type( type_tag, &transfer_proc ) + resolver.add_type( "tag:ruby.yaml.org,2002:#{ type_tag }", transfer_proc ) + end + + # + # Add a private document type + # + def YAML.add_private_type( type_re, &transfer_proc ) + resolver.add_type( "x-private:" + type_re, transfer_proc ) + end + + # + # Detect typing of a string + # + def YAML.detect_implicit( val ) + resolver.detect_implicit( val ) + end + + # + # Convert a type_id to a taguri + # + def YAML.tagurize( val ) + resolver.tagurize( val ) + end + + # + # Apply a transfer method to a Ruby object + # + def YAML.transfer( type_id, obj ) + resolver.transfer( YAML.tagurize( type_id ), obj ) + end + + # + # Apply any implicit a node may qualify for + # + def YAML.try_implicit( obj ) + YAML.transfer( YAML.detect_implicit( obj ), obj ) + end + + # + # Method to extract colon-seperated type and class, returning + # the type and the constant of the class + # + def YAML.read_type_class( type, obj_class ) + scheme, domain, type, tclass = type.split( ':', 4 ) + tclass.split( "::" ).each { |c| obj_class = obj_class.const_get( c ) } if tclass + return [ type, obj_class ] + end + + # + # Allocate blank object + # + def YAML.object_maker( obj_class, val ) + if Hash === val + o = obj_class.allocate + val.each_pair { |k,v| + o.instance_variable_set("@#{k}", v) + } + o + else + raise YAML::Error, "Invalid object explicitly tagged !ruby/Object: " + val.inspect + end + end + + # + # Allocate an Emitter if needed + # + def YAML.quick_emit( oid, opts = {}, &e ) + out = + if opts.is_a? YAML::Emitter + opts + else + emitter.reset( opts ) + end + oid = + case oid when Fixnum, NilClass; oid + else oid = "#{oid.object_id}-#{oid.hash}" + end + out.emit( oid, &e ) + end + +end + +require 'yaml/rubytypes' +require 'yaml/types' + +module Kernel + # + # ryan:: You know how Kernel.p is a really convenient way to dump ruby + # structures? The only downside is that it's not as legible as + # YAML. + # + # _why:: (listening) + # + # ryan:: I know you don't want to urinate all over your users' namespaces. + # But, on the other hand, convenience of dumping for debugging is, + # IMO, a big YAML use case. + # + # _why:: Go nuts! Have a pony parade! + # + # ryan:: Either way, I certainly will have a pony parade. + # + + # Prints any supplied _objects_ out in YAML. Intended as + # a variation on +Kernel::p+. + # + # S = Struct.new(:name, :state) + # s = S['dave', 'TX'] + # y s + # + # _produces:_ + # + # --- !ruby/struct:S + # name: dave + # state: TX + # + def y( object, *objects ) + objects.unshift object + puts( if objects.length == 1 + YAML::dump( *objects ) + else + YAML::dump_stream( *objects ) + end ) + end + private :y +end + + diff --git a/lib/yaml/baseemitter.rb b/lib/yaml/baseemitter.rb new file mode 100644 index 0000000..4bdc796 --- /dev/null +++ b/lib/yaml/baseemitter.rb @@ -0,0 +1,242 @@ +# +# BaseEmitter +# + +require 'yaml/constants' +require 'yaml/encoding' +require 'yaml/error' + +module YAML + module BaseEmitter + def options( opt = nil ) + if opt + @options[opt] || YAML::DEFAULTS[opt] + else + @options + end + end + + def options=( opt ) + @options = opt + end + + # + # Emit binary data + # + def binary_base64( value ) + self << "!binary " + self.node_text( [value].pack("m"), '|' ) + end + + # + # Emit plain, normal flowing text + # + def node_text( value, block = nil ) + @seq_map = false + valx = value.dup + unless block + block = + if options(:UseBlock) + '|' + elsif not options(:UseFold) and valx =~ /\n[ \t]/ and not valx =~ /#{YAML::ESCAPE_CHAR}/ + '|' + else + '>' + end + indt = $&.to_i if block =~ /\d+/ + if valx =~ /(\A\n*[ \t#]|^---\s+)/ + indt = options(:Indent) unless indt.to_i > 0 + block += indt.to_s + end + + block += + if valx =~ /\n\Z\n/ + "+" + elsif valx =~ /\Z\n/ + "" + else + "-" + end + end + block += "\n" + if block[0] == ?" + esc_skip = ( "\t\n" unless valx =~ /^[ \t]/ ) || "" + valx = fold( YAML::escape( valx, esc_skip ) + "\"" ).chomp + self << '"' + indent_text( valx, indt, false ) + else + if block[0] == ?> + valx = fold( valx ) + end + #p [block, indt] + self << block + indent_text( valx, indt ) + end + end + + # + # Emit a simple, unqouted string + # + def simple( value ) + @seq_map = false + self << value.to_s + end + + # + # Emit double-quoted string + # + def double( value ) + "\"#{YAML.escape( value )}\"" + end + + # + # Emit single-quoted string + # + def single( value ) + "'#{value}'" + end + + # + # Write a text block with the current indent + # + def indent_text( text, mod, first_line = true ) + return "" if text.to_s.empty? + spacing = indent( mod ) + text = text.gsub( /\A([^\n])/, "#{ spacing }\\1" ) if first_line + return text.gsub( /\n^([^\n])/, "\n#{spacing}\\1" ) + end + + # + # Write a current indent + # + def indent( mod = nil ) + #p [ self.id, level, mod, :INDENT ] + if level <= 0 + mod ||= 0 + else + mod ||= options(:Indent) + mod += ( level - 1 ) * options(:Indent) + end + return " " * mod + end + + # + # Add indent to the buffer + # + def indent! + self << indent + end + + # + # Folding paragraphs within a column + # + def fold( value ) + value.gsub( /(^[ \t]+.*$)|(\S.{0,#{options(:BestWidth) - 1}})(?:[ \t]+|(\n+(?=[ \t]|\Z))|$)/ ) do + $1 || $2 + ( $3 || "\n" ) + end + end + + # + # Quick mapping + # + def map( type, &e ) + val = Mapping.new + e.call( val ) + self << "#{type} " if type.length.nonzero? + + # + # Empty hashes + # + if val.length.zero? + self << "{}" + @seq_map = false + else + # FIXME + # if @buffer.length == 1 and options(:UseHeader) == false and type.length.zero? + # @headless = 1 + # end + + defkey = @options.delete( :DefaultKey ) + if defkey + seq_map_shortcut + self << "= : " + defkey.to_yaml( :Emitter => self ) + end + + # + # Emit the key and value + # + val.each { |v| + seq_map_shortcut + if v[0].is_complex_yaml? + self << "? " + end + v[0].to_yaml( :Emitter => self ) + if v[0].is_complex_yaml? + self << "\n" + indent! + end + self << ": " + v[1].to_yaml( :Emitter => self ) + } + end + end + + def seq_map_shortcut + # FIXME: seq_map needs to work with the new anchoring system + # if @seq_map + # @anchor_extras[@buffer.length - 1] = "\n" + indent + # @seq_map = false + # else + self << "\n" + indent! + # end + end + + # + # Quick sequence + # + def seq( type, &e ) + @seq_map = false + val = Sequence.new + e.call( val ) + self << "#{type} " if type.length.nonzero? + + # + # Empty arrays + # + if val.length.zero? + self << "[]" + else + # FIXME + # if @buffer.length == 1 and options(:UseHeader) == false and type.length.zero? + # @headless = 1 + # end + + # + # Emit the key and value + # + val.each { |v| + self << "\n" + indent! + self << "- " + @seq_map = true if v.class == Hash + v.to_yaml( :Emitter => self ) + } + end + end + end + + # + # Emitter helper classes + # + class Mapping < Array + def add( k, v ) + push [k, v] + end + end + + class Sequence < Array + def add( v ) + push v + end + end +end diff --git a/lib/yaml/basenode.rb b/lib/yaml/basenode.rb new file mode 100644 index 0000000..5439903 --- /dev/null +++ b/lib/yaml/basenode.rb @@ -0,0 +1,216 @@ +# +# YAML::BaseNode class +# +require 'yaml/ypath' + +module YAML + + # + # YAML Generic Model container + # + module BaseNode + + # + # Search for YPath entry and return + # qualified nodes. + # + def select( ypath_str ) + matches = match_path( ypath_str ) + + # + # Create a new generic view of the elements selected + # + if matches + result = [] + matches.each { |m| + result.push m.last + } + YAML.transfer( 'seq', result ) + end + end + + # + # Search for YPath entry and return + # transformed nodes. + # + def select!( ypath_str ) + matches = match_path( ypath_str ) + + # + # Create a new generic view of the elements selected + # + if matches + result = [] + matches.each { |m| + result.push m.last.transform + } + result + end + end + + # + # Search for YPath entry and return a list of + # qualified paths. + # + def search( ypath_str ) + matches = match_path( ypath_str ) + + if matches + matches.collect { |m| + path = [] + m.each_index { |i| + path.push m[i] if ( i % 2 ).zero? + } + "/" + path.compact.join( "/" ) + } + end + end + + def at( seg ) + if Hash === @value + self[seg] + elsif Array === @value and seg =~ /\A\d+\Z/ and @value[seg.to_i] + @value[seg.to_i] + end + end + + # + # YPath search returning a complete depth array + # + def match_path( ypath_str ) + depth = 0 + matches = [] + YPath.each_path( ypath_str ) do |ypath| + seg = match_segment( ypath, 0 ) + matches += seg if seg + end + matches.uniq + end + + # + # Search a node for a single YPath segment + # + def match_segment( ypath, depth ) + deep_nodes = [] + seg = ypath.segments[ depth ] + if seg == "/" + unless String === @value + idx = -1 + @value.collect { |v| + idx += 1 + if Hash === @value + match_init = [v[0].transform, v[1]] + match_deep = v[1].match_segment( ypath, depth ) + else + match_init = [idx, v] + match_deep = v.match_segment( ypath, depth ) + end + if match_deep + match_deep.each { |m| + deep_nodes.push( match_init + m ) + } + end + } + end + depth += 1 + seg = ypath.segments[ depth ] + end + match_nodes = + case seg + when "." + [[nil, self]] + when ".." + [["..", nil]] + when "*" + if @value.is_a? Enumerable + idx = -1 + @value.collect { |h| + idx += 1 + if Hash === @value + [h[0].transform, h[1]] + else + [idx, h] + end + } + end + else + if seg =~ /^"(.*)"$/ + seg = $1 + elsif seg =~ /^'(.*)'$/ + seg = $1 + end + if ( v = at( seg ) ) + [[ seg, v ]] + end + end + return deep_nodes unless match_nodes + pred = ypath.predicates[ depth ] + if pred + case pred + when /^\.=/ + pred = $' # ' + match_nodes.reject! { |n| + n.last.value != pred + } + else + match_nodes.reject! { |n| + n.last.at( pred ).nil? + } + end + end + return match_nodes + deep_nodes unless ypath.segments.length > depth + 1 + + #puts "DEPTH: #{depth + 1}" + deep_nodes = [] + match_nodes.each { |n| + if n[1].is_a? BaseNode + match_deep = n[1].match_segment( ypath, depth + 1 ) + if match_deep + match_deep.each { |m| + deep_nodes.push( n + m ) + } + end + else + deep_nodes = [] + end + } + deep_nodes = nil if deep_nodes.length == 0 + deep_nodes + end + + # + # We want the node to act like as Hash + # if it is. + # + def []( *key ) + if Hash === @value + v = @value.detect { |k,| k.transform == key.first } + v[1] if v + elsif Array === @value + @value.[]( *key ) + end + end + + def children + if Hash === @value + @value.values.collect { |c| c[1] } + elsif Array === @value + @value + end + end + + def children_with_index + if Hash === @value + @value.keys.collect { |i| [self[i], i] } + elsif Array === @value + i = -1; @value.collect { |v| i += 1; [v, i] } + end + end + + def emit + transform.to_yaml + end + end + +end + diff --git a/lib/yaml/constants.rb b/lib/yaml/constants.rb new file mode 100644 index 0000000..fb833d3 --- /dev/null +++ b/lib/yaml/constants.rb @@ -0,0 +1,45 @@ +# +# Constants used throughout the library +# +module YAML + + # + # Constants + # + VERSION = '0.60' + SUPPORTED_YAML_VERSIONS = ['1.0'] + + # + # Parser tokens + # + WORD_CHAR = 'A-Za-z0-9' + PRINTABLE_CHAR = '-_A-Za-z0-9!?/()$\'". ' + NOT_PLAIN_CHAR = '\x7f\x0-\x1f\x80-\x9f' + ESCAPE_CHAR = '[\\x00-\\x09\\x0b-\\x1f]' + INDICATOR_CHAR = '*&!|\\\\^@%{}[]=' + SPACE_INDICATORS = '-#:,?' + RESTRICTED_INDICATORS = '#:,}]' + DNS_COMP_RE = "\\w(?:[-\\w]*\\w)?" + DNS_NAME_RE = "(?:(?:#{DNS_COMP_RE}\\.)+#{DNS_COMP_RE}|#{DNS_COMP_RE})" + ESCAPES = %w{\x00 \x01 \x02 \x03 \x04 \x05 \x06 \a + \x08 \t \n \v \f \r \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 + \x18 \x19 \x1a \e \x1c \x1d \x1e \x1f + } + UNESCAPES = { + 'a' => "\x07", 'b' => "\x08", 't' => "\x09", + 'n' => "\x0a", 'v' => "\x0b", 'f' => "\x0c", + 'r' => "\x0d", 'e' => "\x1b", '\\' => '\\', + } + + # + # Default settings + # + DEFAULTS = { + :Indent => 2, :UseHeader => false, :UseVersion => false, :Version => '1.0', + :SortKeys => false, :AnchorFormat => 'id%03d', :ExplicitTypes => false, + :WidthType => 'absolute', :BestWidth => 80, + :UseBlock => false, :UseFold => false, :Encoding => :None + } + +end diff --git a/lib/yaml/dbm.rb b/lib/yaml/dbm.rb new file mode 100644 index 0000000..87d6009 --- /dev/null +++ b/lib/yaml/dbm.rb @@ -0,0 +1,111 @@ +require 'yaml' +require 'dbm' +# +# YAML + DBM = YDBM +# - Same interface as DBM class +# +module YAML + +class DBM < ::DBM + VERSION = "0.1" + def []( key ) + fetch( key ) + end + def []=( key, val ) + store( key, val ) + end + def fetch( keystr, ifnone = nil ) + begin + val = super( keystr ) + return YAML::load( val ) if String === val + rescue IndexError + end + if block_given? + yield keystr + else + ifnone + end + end + def index( keystr ) + super( keystr.to_yaml ) + end + def values_at( *keys ) + keys.collect { |k| fetch( k ) } + end + def delete( key ) + v = super( key ) + if String === v + v = YAML::load( v ) + end + v + end + def delete_if + del_keys = keys.dup + del_keys.delete_if { |k| yield( k, fetch( k ) ) == false } + del_keys.each { |k| delete( k ) } + self + end + def reject + hsh = self.to_hash + hsh.reject { |k,v| yield k, v } + end + def each_pair + keys.each { |k| yield k, fetch( k ) } + self + end + def each_value + super { |v| yield YAML::load( v ) } + self + end + def values + super.collect { |v| YAML::load( v ) } + end + def has_value?( val ) + each_value { |v| return true if v == val } + return false + end + def invert + h = {} + keys.each { |k| h[ self.fetch( k ) ] = k } + h + end + def replace( hsh ) + clear + update( hsh ) + end + def shift + a = super + a[1] = YAML::load( a[1] ) if a + a + end + def select( *keys ) + if block_given? + self.keys.collect { |k| v = self[k]; [k, v] if yield k, v }.compact + else + values_at( *keys ) + end + end + def store( key, val ) + super( key, val.to_yaml ) + val + end + def update( hsh ) + hsh.keys.each do |k| + self.store( k, hsh.fetch( k ) ) + end + self + end + def to_a + a = [] + keys.each { |k| a.push [ k, self.fetch( k ) ] } + a + end + def to_hash + h = {} + keys.each { |k| h[ k ] = self.fetch( k ) } + h + end + alias :each :each_pair +end + +end diff --git a/lib/yaml/encoding.rb b/lib/yaml/encoding.rb new file mode 100644 index 0000000..57dc553 --- /dev/null +++ b/lib/yaml/encoding.rb @@ -0,0 +1,33 @@ +# +# Handle Unicode-to-Internal conversion +# + +module YAML + + # + # Escape the string, condensing common escapes + # + def YAML.escape( value, skip = "" ) + value.gsub( /\\/, "\\\\\\" ). + gsub( /"/, "\\\"" ). + gsub( /([\x00-\x1f])/ ) do + skip[$&] || ESCAPES[ $&.unpack("C")[0] ] + end + end + + # + # Unescape the condenses escapes + # + def YAML.unescape( value ) + value.gsub( /\\(?:([nevfbart\\])|0?x([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/ ) { + if $3 + ["#$3".hex ].pack('U*') + elsif $2 + [$2].pack( "H2" ) + else + UNESCAPES[$1] + end + } + end + +end diff --git a/lib/yaml/error.rb b/lib/yaml/error.rb new file mode 100644 index 0000000..15865a9 --- /dev/null +++ b/lib/yaml/error.rb @@ -0,0 +1,34 @@ +# +# Error messages and exception class +# + +module YAML + + # + # Error messages + # + + ERROR_NO_HEADER_NODE = "With UseHeader=false, the node Array or Hash must have elements" + ERROR_NEED_HEADER = "With UseHeader=false, the node must be an Array or Hash" + ERROR_BAD_EXPLICIT = "Unsupported explicit transfer: '%s'" + ERROR_MANY_EXPLICIT = "More than one explicit transfer" + ERROR_MANY_IMPLICIT = "More than one implicit request" + ERROR_NO_ANCHOR = "No anchor for alias '%s'" + ERROR_BAD_ANCHOR = "Invalid anchor: %s" + ERROR_MANY_ANCHOR = "More than one anchor" + ERROR_ANCHOR_ALIAS = "Can't define both an anchor and an alias" + ERROR_BAD_ALIAS = "Invalid alias: %s" + ERROR_MANY_ALIAS = "More than one alias" + ERROR_ZERO_INDENT = "Can't use zero as an indentation width" + ERROR_UNSUPPORTED_VERSION = "This release of YAML.rb does not support YAML version %s" + ERROR_UNSUPPORTED_ENCODING = "Attempt to use unsupported encoding: %s" + + # + # YAML Error classes + # + + class Error < StandardError; end + class ParseError < Error; end + class TypeError < StandardError; end + +end diff --git a/lib/yaml/loader.rb b/lib/yaml/loader.rb new file mode 100644 index 0000000..eb0709e --- /dev/null +++ b/lib/yaml/loader.rb @@ -0,0 +1,14 @@ +# +# YAML::Loader class +# .. type handling .. +# +module YAML + class Loader + TRANSFER_DOMAINS = { + 'yaml.org,2002' => {}, + 'ruby.yaml.org,2002' => {} + } + PRIVATE_TYPES = {} + IMPLICIT_TYPES = [ 'null', 'bool', 'time', 'int', 'float' ] + end +end diff --git a/lib/yaml/rubytypes.rb b/lib/yaml/rubytypes.rb new file mode 100644 index 0000000..ae65b35 --- /dev/null +++ b/lib/yaml/rubytypes.rb @@ -0,0 +1,446 @@ +# -*- mode: ruby; ruby-indent-level: 4; tab-width: 4 -*- vim: sw=4 ts=4 +require 'date' + +class Class + def to_yaml( opts = {} ) + raise TypeError, "can't dump anonymous class %s" % self.class + end +end + +class Object + yaml_as "tag:ruby.yaml.org,2002:object" + def to_yaml_style; end + def to_yaml_properties; instance_variables.sort; end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.map( taguri, to_yaml_style ) do |map| + to_yaml_properties.each do |m| + map.add( m[1..-1], instance_variable_get( m ) ) + end + end + end + end +end + +class Hash + yaml_as "tag:ruby.yaml.org,2002:hash" + yaml_as "tag:yaml.org,2002:map" + def yaml_initialize( tag, val ) + if Array === val + update Hash.[]( *val ) # Convert the map to a sequence + elsif Hash === val + update val + else + raise YAML::TypeError, "Invalid map explicitly tagged #{ tag }: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.map( taguri, to_yaml_style ) do |map| + each do |k, v| + map.add( k, v ) + end + end + end + end +end + +class Struct + yaml_as "tag:ruby.yaml.org,2002:struct" + def self.yaml_tag_class_name; self.name.gsub( "Struct::", "" ); end + def self.yaml_tag_read_class( name ); "Struct::#{ name }"; end + def self.yaml_new( klass, tag, val ) + if Hash === val + struct_type = nil + + # + # Use existing Struct if it exists + # + props = {} + val.delete_if { |k,v| props[k] = v if k =~ /^@/ } + begin + struct_name, struct_type = YAML.read_type_class( tag, Struct ) + rescue NameError + end + if not struct_type + struct_def = [ tag.split( ':', 4 ).last ] + struct_type = Struct.new( *struct_def.concat( val.keys.collect { |k| k.intern } ) ) + end + + # + # Set the Struct properties + # + st = YAML::object_maker( struct_type, {} ) + st.members.each do |m| + st.send( "#{m}=", val[m] ) + end + props.each do |k,v| + st.instance_variable_set(k, v) + end + st + else + raise YAML::TypeError, "Invalid Ruby Struct: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + # + # Basic struct is passed as a YAML map + # + out.map( taguri, to_yaml_style ) do |map| + self.members.each do |m| + map.add( m, self[m] ) + end + self.to_yaml_properties.each do |m| + map.add( m, instance_variable_get( m ) ) + end + end + end + end +end + +class Array + yaml_as "tag:ruby.yaml.org,2002:array" + yaml_as "tag:yaml.org,2002:seq" + def yaml_initialize( tag, val ); concat( val.to_a ); end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.seq( taguri, to_yaml_style ) do |seq| + each do |x| + seq.add( x ) + end + end + end + end +end + +class Exception + yaml_as "tag:ruby.yaml.org,2002:exception" + def Exception.yaml_new( klass, tag, val ) + o = YAML.object_maker( klass, { 'mesg' => val.delete( 'message' ) } ) + val.each_pair do |k,v| + o.instance_variable_set("@#{k}", v) + end + o + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.map( taguri, to_yaml_style ) do |map| + map.add( 'message', message ) + to_yaml_properties.each do |m| + map.add( m[1..-1], instance_variable_get( m ) ) + end + end + end + end +end + +class String + yaml_as "tag:ruby.yaml.org,2002:string" + yaml_as "tag:yaml.org,2002:binary" + yaml_as "tag:yaml.org,2002:str" + def is_complex_yaml? + to_yaml_style or not to_yaml_properties.empty? or self =~ /\n.+/ + end + def is_binary_data? + ( self.count( "^ -~", "^\r\n" ).fdiv(self.size) > 0.3 || self.index( "\x00" ) ) unless empty? + end + def String.yaml_new( klass, tag, val ) + val = val.unpack("m")[0] if tag == "tag:yaml.org,2002:binary" + val = { 'str' => val } if String === val + if Hash === val + s = klass.allocate + # Thank you, NaHi + String.instance_method(:initialize). + bind(s). + call( val.delete( 'str' ) ) + val.each { |k,v| s.instance_variable_set( k, v ) } + s + else + raise YAML::TypeError, "Invalid String: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( is_complex_yaml? ? self : nil, opts ) do |out| + if is_binary_data? + out.scalar( "tag:yaml.org,2002:binary", [self].pack("m"), :literal ) + elsif to_yaml_properties.empty? + out.scalar( taguri, self, self =~ /^:/ ? :quote2 : to_yaml_style ) + else + out.map( taguri, to_yaml_style ) do |map| + map.add( 'str', "#{self}" ) + to_yaml_properties.each do |m| + map.add( m, instance_variable_get( m ) ) + end + end + end + end + end +end + +class Symbol + yaml_as "tag:ruby.yaml.org,2002:symbol" + yaml_as "tag:ruby.yaml.org,2002:sym" + def Symbol.yaml_new( klass, tag, val ) + if String === val + val = YAML::load( val ) if val =~ /\A(["']).*\1\z/ + val.intern + else + raise YAML::TypeError, "Invalid Symbol: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + out.scalar( "tag:yaml.org,2002:str", self.inspect, :plain ) + end + end +end + +class Range + yaml_as "tag:ruby.yaml.org,2002:range" + def Range.yaml_new( klass, tag, val ) + inr = %r'(\w+|[+-]?\d+(?:\.\d+)?(?:e[+-]\d+)?|"(?:[^\\"]|\\.)*")' + opts = {} + if String === val and val =~ /^#{inr}(\.{2,3})#{inr}$/o + r1, rdots, r2 = $1, $2, $3 + opts = { + 'begin' => YAML.load( "--- #{r1}" ), + 'end' => YAML.load( "--- #{r2}" ), + 'excl' => rdots.length == 3 + } + val = {} + elsif Hash === val + opts['begin'] = val.delete('begin') + opts['end'] = val.delete('end') + opts['excl'] = val.delete('excl') + end + if Hash === opts + r = YAML::object_maker( klass, {} ) + # Thank you, NaHi + Range.instance_method(:initialize). + bind(r). + call( opts['begin'], opts['end'], opts['excl'] ) + val.each { |k,v| r.instance_variable_set( k, v ) } + r + else + raise YAML::TypeError, "Invalid Range: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + # if self.begin.is_complex_yaml? or self.begin.respond_to? :to_str or + # self.end.is_complex_yaml? or self.end.respond_to? :to_str or + # not to_yaml_properties.empty? + out.map( taguri, to_yaml_style ) do |map| + map.add( 'begin', self.begin ) + map.add( 'end', self.end ) + map.add( 'excl', self.exclude_end? ) + to_yaml_properties.each do |m| + map.add( m, instance_variable_get( m ) ) + end + end + # else + # out.scalar( taguri ) do |sc| + # sc.embed( self.begin ) + # sc.concat( self.exclude_end? ? "..." : ".." ) + # sc.embed( self.end ) + # end + # end + end + end +end + +class Regexp + yaml_as "tag:ruby.yaml.org,2002:regexp" + def Regexp.yaml_new( klass, tag, val ) + if String === val and val =~ /^\/(.*)\/([mix]*)$/ + val = { 'regexp' => $1, 'mods' => $2 } + end + if Hash === val + mods = nil + unless val['mods'].to_s.empty? + mods = 0x00 + mods |= Regexp::EXTENDED if val['mods'].include?( 'x' ) + mods |= Regexp::IGNORECASE if val['mods'].include?( 'i' ) + mods |= Regexp::MULTILINE if val['mods'].include?( 'm' ) + end + val.delete( 'mods' ) + r = YAML::object_maker( klass, {} ) + Regexp.instance_method(:initialize). + bind(r). + call( val.delete( 'regexp' ), mods ) + val.each { |k,v| r.instance_variable_set( k, v ) } + r + else + raise YAML::TypeError, "Invalid Regular expression: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + if to_yaml_properties.empty? + out.scalar( taguri, self.inspect, :plain ) + else + out.map( taguri, to_yaml_style ) do |map| + src = self.inspect + if src =~ /\A\/(.*)\/([a-z]*)\Z/ + map.add( 'regexp', $1 ) + map.add( 'mods', $2 ) + else + raise YAML::TypeError, "Invalid Regular expression: " + src + end + to_yaml_properties.each do |m| + map.add( m, instance_variable_get( m ) ) + end + end + end + end + end +end + +class Time + yaml_as "tag:ruby.yaml.org,2002:time" + yaml_as "tag:yaml.org,2002:timestamp" + def Time.yaml_new( klass, tag, val ) + if Hash === val + t = val.delete( 'at' ) + val.each { |k,v| t.instance_variable_set( k, v ) } + t + else + raise YAML::TypeError, "Invalid Time: " + val.inspect + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + tz = "Z" + # from the tidy Tobias Peters Thanks! + unless self.utc? + utc_same_instant = self.dup.utc + utc_same_writing = Time.utc(year,month,day,hour,min,sec,usec) + difference_to_utc = utc_same_writing - utc_same_instant + if (difference_to_utc < 0) + difference_sign = '-' + absolute_difference = -difference_to_utc + else + difference_sign = '+' + absolute_difference = difference_to_utc + end + difference_minutes = (absolute_difference/60).round + tz = "%s%02d:%02d" % [ difference_sign, difference_minutes / 60, difference_minutes % 60] + end + standard = self.strftime( "%Y-%m-%d %H:%M:%S" ) + standard += ".%06d" % [usec] if usec.nonzero? + standard += " %s" % [tz] + if to_yaml_properties.empty? + out.scalar( taguri, standard, :plain ) + else + out.map( taguri, to_yaml_style ) do |map| + map.add( 'at', standard ) + to_yaml_properties.each do |m| + map.add( m, instance_variable_get( m ) ) + end + end + end + end + end +end + +class Date + yaml_as "tag:yaml.org,2002:timestamp#ymd" + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.scalar( "tag:yaml.org,2002:timestamp", self.to_s, :plain ) + end + end +end + +class Integer + yaml_as "tag:yaml.org,2002:int" + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + out.scalar( "tag:yaml.org,2002:int", self.to_s, :plain ) + end + end +end + +class Float + yaml_as "tag:yaml.org,2002:float" + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + str = self.to_s + if str == "Infinity" + str = ".Inf" + elsif str == "-Infinity" + str = "-.Inf" + elsif str == "NaN" + str = ".NaN" + end + out.scalar( "tag:yaml.org,2002:float", str, :plain ) + end + end +end + +class Rational + yaml_as "tag:ruby.yaml.org,2002:object:Rational" + def Rational.yaml_new( klass, tag, val ) + if val.is_a? String + Rational( val ) + else + Rational( val['numerator'], val['denominator'] ) + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.map( taguri, nil ) do |map| + map.add( 'denominator', denominator ) + map.add( 'numerator', numerator ) + end + end + end +end + +class Complex + yaml_as "tag:ruby.yaml.org,2002:object:Complex" + def Complex.yaml_new( klass, tag, val ) + if val.is_a? String + Complex( val ) + else + Complex( val['real'], val['image'] ) + end + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.map( taguri, nil ) do |map| + map.add( 'image', imaginary ) + map.add( 'real', real ) + end + end + end +end + +class TrueClass + yaml_as "tag:yaml.org,2002:bool#yes" + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + out.scalar( taguri, "true", :plain ) + end + end +end + +class FalseClass + yaml_as "tag:yaml.org,2002:bool#no" + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + out.scalar( taguri, "false", :plain ) + end + end +end + +class NilClass + yaml_as "tag:yaml.org,2002:null" + def to_yaml( opts = {} ) + YAML::quick_emit( nil, opts ) do |out| + out.scalar( taguri, "", :plain ) + end + end +end + diff --git a/lib/yaml/store.rb b/lib/yaml/store.rb new file mode 100644 index 0000000..e3a8e9f --- /dev/null +++ b/lib/yaml/store.rb @@ -0,0 +1,43 @@ +# +# YAML::Store +# +require 'yaml' +require 'pstore' + +class YAML::Store < PStore + def initialize( *o ) + @opt = YAML::DEFAULTS.dup + if String === o.first + super(o.shift) + end + if o.last.is_a? Hash + @opt.update(o.pop) + end + end + + def dump(table) + @table.to_yaml(@opt) + end + + def load(content) + table = YAML::load(content) + if table == false + {} + else + table + end + end + + def marshal_dump_supports_canonical_option? + false + end + + EMPTY_MARSHAL_DATA = {}.to_yaml + EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA) + def empty_marshal_data + EMPTY_MARSHAL_DATA + end + def empty_marshal_checksum + EMPTY_MARSHAL_CHECKSUM + end +end diff --git a/lib/yaml/stream.rb b/lib/yaml/stream.rb new file mode 100644 index 0000000..060fbc4 --- /dev/null +++ b/lib/yaml/stream.rb @@ -0,0 +1,40 @@ +module YAML + + # + # YAML::Stream -- for emitting many documents + # + class Stream + + attr_accessor :documents, :options + + def initialize( opts = {} ) + @options = opts + @documents = [] + end + + def []( i ) + @documents[ i ] + end + + def add( doc ) + @documents << doc + end + + def edit( doc_num, doc ) + @documents[ doc_num ] = doc + end + + def emit( io = nil ) + # opts = @options.dup + # opts[:UseHeader] = true if @documents.length > 1 + out = YAML.emitter + out.reset( io || io2 = StringIO.new ) + @documents.each { |v| + v.to_yaml( out ) + } + io || ( io2.rewind; io2.read ) + end + + end + +end diff --git a/lib/yaml/stringio.rb b/lib/yaml/stringio.rb new file mode 100644 index 0000000..8ad949f --- /dev/null +++ b/lib/yaml/stringio.rb @@ -0,0 +1,83 @@ +# +# Limited StringIO if no core lib is available +# +begin +require 'stringio' +rescue LoadError + # StringIO based on code by MoonWolf + class StringIO + def initialize(string="") + @string=string + @pos=0 + @eof=(string.size==0) + end + def pos + @pos + end + def eof + @eof + end + alias eof? eof + def readline(rs=$/) + if @eof + raise EOFError + else + if p = @string[@pos..-1]=~rs + line = @string[@pos,p+1] + else + line = @string[@pos..-1] + end + @pos+=line.size + @eof =true if @pos==@string.size + $_ = line + end + end + def rewind + seek(0,0) + end + def seek(offset,whence) + case whence + when 0 + @pos=offset + when 1 + @pos+=offset + when 2 + @pos=@string.size+offset + end + @eof=(@pos>=@string.size) + 0 + end + end + + # + # Class method for creating streams + # + def YAML.make_stream( io ) + if String === io + io = StringIO.new( io ) + elsif not IO === io + raise YAML::Error, "YAML stream must be an IO or String object." + end + if YAML::unicode + def io.readline + YAML.utf_to_internal( readline( @ln_sep ), @utf_encoding ) + end + def io.check_unicode + @utf_encoding = YAML.sniff_encoding( read( 4 ) ) + @ln_sep = YAML.enc_separator( @utf_encoding ) + seek( -4, IO::SEEK_CUR ) + end + def io.utf_encoding + @utf_encoding + end + io.check_unicode + else + def io.utf_encoding + :None + end + end + io + end + +end + diff --git a/lib/yaml/syck.rb b/lib/yaml/syck.rb new file mode 100644 index 0000000..faf57e8 --- /dev/null +++ b/lib/yaml/syck.rb @@ -0,0 +1,19 @@ +# +# YAML::Syck module +# .. glues syck and yaml.rb together .. +# +require 'syck' +require 'yaml/basenode' + +module YAML + module Syck + + # + # Mixin BaseNode functionality + # + class Node + include YAML::BaseNode + end + + end +end diff --git a/lib/yaml/tag.rb b/lib/yaml/tag.rb new file mode 100644 index 0000000..a91f2bd --- /dev/null +++ b/lib/yaml/tag.rb @@ -0,0 +1,91 @@ +# -*- mode: ruby; ruby-indent-level: 4; tab-width: 4 -*- vim: sw=4 ts=4 +# $Id: tag.rb 11708 2007-02-12 23:01:19Z shyouhei $ +# +# = yaml/tag.rb: methods for associating a taguri to a class. +# +# Author:: why the lucky stiff +# +module YAML + # A dictionary of taguris which map to + # Ruby classes. + @@tagged_classes = {} + + # + # Associates a taguri _tag_ with a Ruby class _cls_. The taguri is used to give types + # to classes when loading YAML. Taguris are of the form: + # + # tag:authorityName,date:specific + # + # The +authorityName+ is a domain name or email address. The +date+ is the date the type + # was issued in YYYY or YYYY-MM or YYYY-MM-DD format. The +specific+ is a name for + # the type being added. + # + # For example, built-in YAML types have 'yaml.org' as the +authorityName+ and '2002' as the + # +date+. The +specific+ is simply the name of the type: + # + # tag:yaml.org,2002:int + # tag:yaml.org,2002:float + # tag:yaml.org,2002:timestamp + # + # The domain must be owned by you on the +date+ declared. If you don't own any domains on the + # date you declare the type, you can simply use an e-mail address. + # + # tag:why@ruby-lang.org,2004:notes/personal + # + def YAML.tag_class( tag, cls ) + if @@tagged_classes.has_key? tag + warn "class #{ @@tagged_classes[tag] } held ownership of the #{ tag } tag" + end + @@tagged_classes[tag] = cls + end + + # Returns the complete dictionary of taguris, paired with classes. The key for + # the dictionary is the full taguri. The value for each key is the class constant + # associated to that taguri. + # + # YAML.tagged_classes["tag:yaml.org,2002:int"] => Integer + # + def YAML.tagged_classes + @@tagged_classes + end +end + +class Module + # :stopdoc: + + # Adds a taguri _tag_ to a class, used when dumping or loading the class + # in YAML. See YAML::tag_class for detailed information on typing and + # taguris. + def yaml_as( tag, sc = true ) + verbose, $VERBOSE = $VERBOSE, nil + class_eval <<-"end;", __FILE__, __LINE__+1 + attr_writer :taguri + def taguri + if respond_to? :to_yaml_type + YAML::tagurize( to_yaml_type[1..-1] ) + else + return @taguri if defined?(@taguri) and @taguri + tag = #{ tag.dump } + if self.class.yaml_tag_subclasses? and self.class != YAML::tagged_classes[tag] + tag = "\#{ tag }:\#{ self.class.yaml_tag_class_name }" + end + tag + end + end + def self.yaml_tag_subclasses?; #{ sc ? 'true' : 'false' }; end + end; + YAML::tag_class tag, self + ensure + $VERBOSE = verbose + end + # Transforms the subclass name into a name suitable for display + # in a subclassed tag. + def yaml_tag_class_name + self.name + end + # Transforms the subclass name found in the tag into a Ruby + # constant name. + def yaml_tag_read_class( name ) + name + end +end diff --git a/lib/yaml/types.rb b/lib/yaml/types.rb new file mode 100644 index 0000000..3871c62 --- /dev/null +++ b/lib/yaml/types.rb @@ -0,0 +1,192 @@ +# -*- mode: ruby; ruby-indent-level: 4 -*- vim: sw=4 +# +# Classes required by the full core typeset +# + +module YAML + + # + # Default private type + # + class PrivateType + def self.tag_subclasses?; false; end + verbose, $VERBOSE = $VERBOSE, nil + def initialize( type, val ) + @type_id = type; @value = val + @value.taguri = "x-private:#{ @type_id }" + end + def to_yaml( opts = {} ) + @value.to_yaml( opts ) + end + ensure + $VERBOSE = verbose + end + + # + # Default domain type + # + class DomainType + def self.tag_subclasses?; false; end + verbose, $VERBOSE = $VERBOSE, nil + def initialize( domain, type, val ) + @domain = domain; @type_id = type; @value = val + @value.taguri = "tag:#{ @domain }:#{ @type_id }" + end + def to_yaml( opts = {} ) + @value.to_yaml( opts ) + end + ensure + $VERBOSE = verbose + end + + # + # Unresolved objects + # + class Object + def self.tag_subclasses?; false; end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.map( "tag:ruby.yaml.org,2002:object:#{ @class }", to_yaml_style ) do |map| + @ivars.each do |k,v| + map.add( k, v ) + end + end + end + end + end + + # + # YAML Hash class to support comments and defaults + # + class SpecialHash < ::Hash + attr_accessor :default + def inspect + self.default.to_s + end + def to_s + self.default.to_s + end + def update( h ) + if YAML::SpecialHash === h + @default = h.default if h.default + end + super( h ) + end + def to_yaml( opts = {} ) + opts[:DefaultKey] = self.default + super( opts ) + end + end + + # + # Builtin collection: !omap + # + class Omap < ::Array + yaml_as "tag:yaml.org,2002:omap" + def yaml_initialize( tag, val ) + if Array === val + val.each do |v| + if Hash === v + concat( v.to_a ) # Convert the map to a sequence + else + raise YAML::Error, "Invalid !omap entry: " + val.inspect + end + end + else + raise YAML::Error, "Invalid !omap: " + val.inspect + end + self + end + def self.[]( *vals ) + o = Omap.new + 0.step( vals.length - 1, 2 ) do |i| + o[vals[i]] = vals[i+1] + end + o + end + def []( k ) + self.assoc( k ).to_a[1] + end + def []=( k, *rest ) + val, set = rest.reverse + if ( tmp = self.assoc( k ) ) and not set + tmp[1] = val + else + self << [ k, val ] + end + val + end + def has_key?( k ) + self.assoc( k ) ? true : false + end + def is_complex_yaml? + true + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.seq( taguri, to_yaml_style ) do |seq| + self.each do |v| + seq.add( Hash[ *v ] ) + end + end + end + end + end + + # + # Builtin collection: !pairs + # + class Pairs < ::Array + yaml_as "tag:yaml.org,2002:pairs" + def yaml_initialize( tag, val ) + if Array === val + val.each do |v| + if Hash === v + concat( v.to_a ) # Convert the map to a sequence + else + raise YAML::Error, "Invalid !pairs entry: " + val.inspect + end + end + else + raise YAML::Error, "Invalid !pairs: " + val.inspect + end + self + end + def self.[]( *vals ) + p = Pairs.new + 0.step( vals.length - 1, 2 ) { |i| + p[vals[i]] = vals[i+1] + } + p + end + def []( k ) + self.assoc( k ).to_a + end + def []=( k, val ) + self << [ k, val ] + val + end + def has_key?( k ) + self.assoc( k ) ? true : false + end + def is_complex_yaml? + true + end + def to_yaml( opts = {} ) + YAML::quick_emit( self, opts ) do |out| + out.seq( taguri, to_yaml_style ) do |seq| + self.each do |v| + seq.add( Hash[ *v ] ) + end + end + end + end + end + + # + # Builtin collection: !set + # + class Set < ::Hash + yaml_as "tag:yaml.org,2002:set" + end +end diff --git a/lib/yaml/yamlnode.rb b/lib/yaml/yamlnode.rb new file mode 100644 index 0000000..8afa142 --- /dev/null +++ b/lib/yaml/yamlnode.rb @@ -0,0 +1,54 @@ +# +# YAML::YamlNode class +# +require 'yaml/basenode' + +module YAML + + # + # YAML Generic Model container + # + class YamlNode + include BaseNode + attr_accessor :kind, :type_id, :value, :anchor + def initialize(t, v) + @type_id = t + if Hash === v + @kind = 'map' + @value = {} + v.each {|key,val| + @value[key.transform] = [key, val] + } + elsif Array === v + @kind = 'seq' + @value = v + elsif String === v + @kind = 'scalar' + @value = v + end + end + + # + # Transform this node fully into a native type + # + def transform + t = nil + if @value.is_a? Hash + t = {} + @value.each { |k,v| + t[ k ] = v[1].transform + } + elsif @value.is_a? Array + t = [] + @value.each { |v| + t.push v.transform + } + else + t = @value + end + YAML.transfer_method( @type_id, t ) + end + + end + +end diff --git a/lib/yaml/ypath.rb b/lib/yaml/ypath.rb new file mode 100644 index 0000000..81348ca --- /dev/null +++ b/lib/yaml/ypath.rb @@ -0,0 +1,52 @@ +# +# YAML::YPath +# + +module YAML + + class YPath + attr_accessor :segments, :predicates, :flags + def initialize( str ) + @segments = [] + @predicates = [] + @flags = nil + while str =~ /^\/?(\/|[^\/\[]+)(?:\[([^\]]+)\])?/ + @segments.push $1 + @predicates.push $2 + str = $' + end + unless str.to_s.empty? + @segments += str.split( "/" ) + end + if @segments.length == 0 + @segments.push "." + end + end + def YPath.each_path( str ) + # + # Find choices + # + paths = [] + str = "(#{ str })" + while str.sub!( /\(([^()]+)\)/, "\n#{ paths.length }\n" ) + paths.push $1.split( '|' ) + end + + # + # Construct all possible paths + # + all = [ str ] + ( paths.length - 1 ).downto( 0 ) do |i| + all = all.collect do |a| + paths[i].collect do |p| + a.gsub( /\n#{ i }\n/, p ) + end + end.flatten.uniq + end + all.collect do |path| + yield YPath.new( path ) + end + end + end + +end diff --git a/load.c b/load.c new file mode 100644 index 0000000..a588a5c --- /dev/null +++ b/load.c @@ -0,0 +1,725 @@ +/* + * load methods from eval.c + */ + +#include "ruby/ruby.h" +#include "ruby/util.h" +#include "dln.h" +#include "eval_intern.h" + +VALUE ruby_dln_librefs; + +#define IS_RBEXT(e) (strcmp(e, ".rb") == 0) +#define IS_SOEXT(e) (strcmp(e, ".so") == 0 || strcmp(e, ".o") == 0) +#ifdef DLEXT2 +#define IS_DLEXT(e) (strcmp(e, DLEXT) == 0 || strcmp(e, DLEXT2) == 0) +#else +#define IS_DLEXT(e) (strcmp(e, DLEXT) == 0) +#endif + + +static const char *const loadable_ext[] = { + ".rb", DLEXT, +#ifdef DLEXT2 + DLEXT2, +#endif + 0 +}; + +VALUE +rb_get_load_path(void) +{ + VALUE load_path = GET_VM()->load_path; + return load_path; +} + +VALUE +rb_get_expanded_load_path(void) +{ + VALUE load_path = rb_get_load_path(); + VALUE ary = rb_ary_new2(RARRAY_LEN(load_path)); + long i; + + for (i = 0; i < RARRAY_LEN(load_path); ++i) { + VALUE path = rb_file_expand_path(RARRAY_PTR(load_path)[i], Qnil); + rb_str_freeze(path); + rb_ary_push(ary, path); + } + rb_obj_freeze(ary); + return ary; +} + +static VALUE +load_path_getter(ID id, rb_vm_t *vm) +{ + return vm->load_path; +} + +static VALUE +get_loaded_features(void) +{ + return GET_VM()->loaded_features; +} + +static st_table * +get_loading_table(void) +{ + return GET_VM()->loading_table; +} + +static VALUE +loaded_feature_path(const char *name, long vlen, const char *feature, long len, + int type, VALUE load_path) +{ + long i; + + for (i = 0; i < RARRAY_LEN(load_path); ++i) { + VALUE p = RARRAY_PTR(load_path)[i]; + const char *s = StringValuePtr(p); + long n = RSTRING_LEN(p); + + if (vlen < n + len + 1) continue; + if (n && (strncmp(name, s, n) || name[n] != '/')) continue; + if (strncmp(name + n + 1, feature, len)) continue; + if (name[n+len+1] && name[n+len+1] != '.') continue; + switch (type) { + case 's': + if (IS_DLEXT(&name[n+len+1])) return p; + break; + case 'r': + if (IS_RBEXT(&name[n+len+1])) return p; + break; + default: + return p; + } + } + return 0; +} + +struct loaded_feature_searching { + const char *name; + long len; + int type; + VALUE load_path; + const char *result; +}; + +static int +loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f) +{ + const char *s = (const char *)v; + struct loaded_feature_searching *fp = (struct loaded_feature_searching *)f; + VALUE p = loaded_feature_path(s, strlen(s), fp->name, fp->len, + fp->type, fp->load_path); + if (!p) return ST_CONTINUE; + fp->result = s; + return ST_STOP; +} + +static int +rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn) +{ + VALUE v, features, p, load_path = 0; + const char *f, *e; + long i, len, elen, n; + st_table *loading_tbl; + st_data_t data; + int type; + + if (fn) *fn = 0; + if (ext) { + elen = strlen(ext); + len = strlen(feature) - elen; + type = rb ? 'r' : 's'; + } + else { + len = strlen(feature); + elen = 0; + type = 0; + } + features = get_loaded_features(); + for (i = 0; i < RARRAY_LEN(features); ++i) { + v = RARRAY_PTR(features)[i]; + f = StringValuePtr(v); + if ((n = RSTRING_LEN(v)) < len) continue; + if (strncmp(f, feature, len) != 0) { + if (expanded) continue; + if (!load_path) load_path = rb_get_expanded_load_path(); + if (!(p = loaded_feature_path(f, n, feature, len, type, load_path))) + continue; + expanded = 1; + f += RSTRING_LEN(p) + 1; + } + if (!*(e = f + len)) { + if (ext) continue; + return 'u'; + } + if (*e != '.') continue; + if ((!rb || !ext) && (IS_SOEXT(e) || IS_DLEXT(e))) { + return 's'; + } + if ((rb || !ext) && (IS_RBEXT(e))) { + return 'r'; + } + } + loading_tbl = get_loading_table(); + if (loading_tbl) { + f = 0; + if (!expanded) { + struct loaded_feature_searching fs; + fs.name = feature; + fs.len = len; + fs.type = type; + fs.load_path = load_path ? load_path : rb_get_load_path(); + fs.result = 0; + st_foreach(loading_tbl, loaded_feature_path_i, (st_data_t)&fs); + if ((f = fs.result) != 0) { + if (fn) *fn = f; + goto loading; + } + } + if (st_get_key(loading_tbl, (st_data_t)feature, &data)) { + if (fn) *fn = (const char*)data; + loading: + if (!ext) return 'u'; + return !IS_RBEXT(ext) ? 's' : 'r'; + } + else { + VALUE bufstr; + char *buf; + + if (ext && *ext) return 0; + bufstr = rb_str_tmp_new(len + DLEXT_MAXLEN); + buf = RSTRING_PTR(bufstr); + MEMCPY(buf, feature, char, len); + for (i = 0; (e = loadable_ext[i]) != 0; i++) { + strlcpy(buf + len, e, DLEXT_MAXLEN + 1); + if (st_get_key(loading_tbl, (st_data_t)buf, &data)) { + rb_str_resize(bufstr, 0); + if (fn) *fn = (const char*)data; + return i ? 's' : 'r'; + } + } + rb_str_resize(bufstr, 0); + } + } + return 0; +} + +int +rb_provided(const char *feature) +{ + return rb_feature_provided(feature, 0); +} + +int +rb_feature_provided(const char *feature, const char **loading) +{ + const char *ext = strrchr(feature, '.'); + volatile VALUE fullpath = 0; + + if (*feature == '.' && + (feature[1] == '/' || strncmp(feature+1, "./", 2) == 0)) { + fullpath = rb_file_expand_path(rb_str_new2(feature), Qnil); + feature = RSTRING_PTR(fullpath); + } + if (ext && !strchr(ext, '/')) { + if (IS_RBEXT(ext)) { + if (rb_feature_p(feature, ext, Qtrue, Qfalse, loading)) return Qtrue; + return Qfalse; + } + else if (IS_SOEXT(ext) || IS_DLEXT(ext)) { + if (rb_feature_p(feature, ext, Qfalse, Qfalse, loading)) return Qtrue; + return Qfalse; + } + } + if (rb_feature_p(feature, feature + strlen(feature), Qtrue, Qfalse, loading)) + return Qtrue; + return Qfalse; +} + +static void +rb_provide_feature(VALUE feature) +{ + rb_ary_push(get_loaded_features(), feature); +} + +void +rb_provide(const char *feature) +{ + rb_provide_feature(rb_str_new2(feature)); +} + +NORETURN(static void load_failed(VALUE)); + +void +rb_load(VALUE fname, int wrap) +{ + VALUE tmp; + int state; + rb_thread_t *th = GET_THREAD(); + volatile VALUE wrapper = th->top_wrapper; + volatile VALUE self = th->top_self; + volatile int loaded = Qfalse; + volatile int mild_compile_error; +#ifndef __GNUC__ + rb_thread_t *volatile th0 = th; +#endif + + FilePathValue(fname); + fname = rb_str_new4(fname); + tmp = rb_find_file(fname); + if (!tmp) { + load_failed(fname); + } + RB_GC_GUARD(fname) = rb_str_new4(tmp); + + th->errinfo = Qnil; /* ensure */ + + if (!wrap) { + rb_secure(4); /* should alter global state */ + th->top_wrapper = 0; + } + else { + /* load in anonymous module as toplevel */ + th->top_self = rb_obj_clone(rb_vm_top_self()); + th->top_wrapper = rb_module_new(); + rb_extend_object(th->top_self, th->top_wrapper); + } + + mild_compile_error = th->mild_compile_error; + PUSH_TAG(); + state = EXEC_TAG(); + if (state == 0) { + NODE *node; + VALUE iseq; + + th->mild_compile_error++; + node = (NODE *)rb_load_file(RSTRING_PTR(fname)); + loaded = Qtrue; + iseq = rb_iseq_new_top(node, rb_str_new2(""), fname, Qfalse); + th->mild_compile_error--; + rb_iseq_eval(iseq); + } + POP_TAG(); + +#ifndef __GNUC__ + th = th0; + fname = RB_GC_GUARD(fname); +#endif + th->mild_compile_error = mild_compile_error; + th->top_self = self; + th->top_wrapper = wrapper; + + if (!loaded) { + rb_exc_raise(GET_THREAD()->errinfo); + } + if (state) { + rb_vm_jump_tag_but_local_jump(state, Qundef); + } + + if (!NIL_P(GET_THREAD()->errinfo)) { + /* exception during load */ + rb_exc_raise(th->errinfo); + } +} + +void +rb_load_protect(VALUE fname, int wrap, int *state) +{ + int status; + + PUSH_TAG(); + if ((status = EXEC_TAG()) == 0) { + rb_load(fname, wrap); + } + POP_TAG(); + if (state) + *state = status; +} + +/* + * call-seq: + * load(filename, wrap=false) => true + * + * Loads and executes the Ruby + * program in the file _filename_. If the filename does not + * resolve to an absolute path, the file is searched for in the library + * directories listed in $:. If the optional _wrap_ + * parameter is +true+, the loaded script will be executed + * under an anonymous module, protecting the calling program's global + * namespace. In no circumstance will any local variables in the loaded + * file be propagated to the loading environment. + */ + +static VALUE +rb_f_load(int argc, VALUE *argv) +{ + VALUE fname, wrap; + + rb_scan_args(argc, argv, "11", &fname, &wrap); + rb_load(fname, RTEST(wrap)); + return Qtrue; +} + +static char * +load_lock(const char *ftptr) +{ + st_data_t data; + st_table *loading_tbl = get_loading_table(); + + if (!loading_tbl || !st_lookup(loading_tbl, (st_data_t)ftptr, &data)) { + /* loading ruby library should be serialized. */ + if (!loading_tbl) { + GET_VM()->loading_table = loading_tbl = st_init_strtable(); + } + /* partial state */ + ftptr = ruby_strdup(ftptr); + data = (st_data_t)rb_barrier_new(); + st_insert(loading_tbl, (st_data_t)ftptr, data); + return (char *)ftptr; + } + return RTEST(rb_barrier_wait((VALUE)data)) ? (char *)ftptr : 0; +} + +static void +load_unlock(const char *ftptr, int done) +{ + if (ftptr) { + st_data_t key = (st_data_t)ftptr; + st_data_t data; + st_table *loading_tbl = get_loading_table(); + + if (st_delete(loading_tbl, &key, &data)) { + VALUE barrier = (VALUE)data; + xfree((char *)key); + if (done) + rb_barrier_destroy(barrier); + else + rb_barrier_release(barrier); + } + } +} + + +/* + * call-seq: + * require(string) => true or false + * + * Ruby tries to load the library named _string_, returning + * +true+ if successful. If the filename does not resolve to + * an absolute path, it will be searched for in the directories listed + * in $:. If the file has the extension ``.rb'', it is + * loaded as a source file; if the extension is ``.so'', ``.o'', or + * ``.dll'', or whatever the default shared library extension is on + * the current platform, Ruby loads the shared library as a Ruby + * extension. Otherwise, Ruby tries adding ``.rb'', ``.so'', and so on + * to the name. The name of the loaded feature is added to the array in + * $". A feature will not be loaded if its name already + * appears in $". The file name is converted to an absolute + * path, so ``require 'a'; require './a''' will not load + * a.rb twice. + * + * require "my-library.rb" + * require "db-driver" + */ + +VALUE +rb_f_require(VALUE obj, VALUE fname) +{ + return rb_require_safe(fname, rb_safe_level()); +} + +static int +search_required(VALUE fname, volatile VALUE *path) +{ + VALUE tmp; + char *ext, *ftptr; + int type, ft = 0; + const char *loading; + + *path = 0; + ext = strrchr(ftptr = RSTRING_PTR(fname), '.'); + if (ext && !strchr(ext, '/')) { + if (IS_RBEXT(ext)) { + if (rb_feature_p(ftptr, ext, Qtrue, Qfalse, &loading)) { + if (loading) *path = rb_str_new2(loading); + return 'r'; + } + if ((tmp = rb_find_file(fname)) != 0) { + ext = strrchr(ftptr = RSTRING_PTR(tmp), '.'); + if (!rb_feature_p(ftptr, ext, Qtrue, Qtrue, &loading) || loading) + *path = tmp; + return 'r'; + } + return 0; + } + else if (IS_SOEXT(ext)) { + if (rb_feature_p(ftptr, ext, Qfalse, Qfalse, &loading)) { + if (loading) *path = rb_str_new2(loading); + return 's'; + } + tmp = rb_str_new(RSTRING_PTR(fname), ext - RSTRING_PTR(fname)); +#ifdef DLEXT2 + OBJ_FREEZE(tmp); + if (rb_find_file_ext(&tmp, loadable_ext + 1)) { + ext = strrchr(ftptr = RSTRING_PTR(tmp), '.'); + if (!rb_feature_p(ftptr, ext, Qfalse, Qtrue, &loading) || loading) + *path = tmp; + return 's'; + } +#else + rb_str_cat2(tmp, DLEXT); + OBJ_FREEZE(tmp); + if ((tmp = rb_find_file(tmp)) != 0) { + ext = strrchr(ftptr = RSTRING_PTR(tmp), '.'); + if (!rb_feature_p(ftptr, ext, Qfalse, Qtrue, &loading) || loading) + *path = tmp; + return 's'; + } +#endif + } + else if (IS_DLEXT(ext)) { + if (rb_feature_p(ftptr, ext, Qfalse, Qfalse, &loading)) { + if (loading) *path = rb_str_new2(loading); + return 's'; + } + if ((tmp = rb_find_file(fname)) != 0) { + ext = strrchr(ftptr = RSTRING_PTR(tmp), '.'); + if (!rb_feature_p(ftptr, ext, Qfalse, Qtrue, &loading) || loading) + *path = tmp; + return 's'; + } + } + } + else if ((ft = rb_feature_p(ftptr, 0, Qfalse, Qfalse, &loading)) == 'r') { + if (loading) *path = rb_str_new2(loading); + return 'r'; + } + tmp = fname; + type = rb_find_file_ext(&tmp, loadable_ext); + tmp = rb_file_expand_path(tmp, Qnil); + switch (type) { + case 0: + if (ft) + break; + ftptr = RSTRING_PTR(tmp); + return rb_feature_p(ftptr, 0, Qfalse, Qtrue, 0); + + default: + if (ft) + break; + case 1: + ext = strrchr(ftptr = RSTRING_PTR(tmp), '.'); + if (rb_feature_p(ftptr, ext, !--type, Qtrue, &loading) && !loading) + break; + *path = tmp; + } + return type ? 's' : 'r'; +} + +static void +load_failed(VALUE fname) +{ + rb_raise(rb_eLoadError, "no such file to load -- %s", + RSTRING_PTR(fname)); +} + +static VALUE +load_ext(VALUE path) +{ + SCOPE_SET(NOEX_PUBLIC); + return (VALUE)dln_load(RSTRING_PTR(path)); +} + +VALUE +rb_require_safe(VALUE fname, int safe) +{ + VALUE result = Qnil; + rb_thread_t *th = GET_THREAD(); + volatile VALUE errinfo = th->errinfo; + int state; + struct { + int safe; + } volatile saved; + char *volatile ftptr = 0; + + PUSH_TAG(); + saved.safe = rb_safe_level(); + if ((state = EXEC_TAG()) == 0) { + VALUE path; + long handle; + int found; + + rb_set_safe_level_force(safe); + FilePathValue(fname); + RB_GC_GUARD(fname) = rb_str_new4(fname); + rb_set_safe_level_force(0); + found = search_required(fname, &path); + if (found) { + if (!path || !(ftptr = load_lock(RSTRING_PTR(path)))) { + result = Qfalse; + } + else { + if (safe > 0 && OBJ_TAINTED(path)) { + rb_raise(rb_eSecurityError, "cannot load from insecure path - %s", + RSTRING_PTR(path)); + } + switch (found) { + case 'r': + rb_load(path, 0); + break; + + case 's': + handle = (long)rb_vm_call_cfunc(rb_vm_top_self(), load_ext, + path, 0, path); + rb_ary_push(ruby_dln_librefs, LONG2NUM(handle)); + break; + } + rb_provide_feature(path); + result = Qtrue; + } + } + } + POP_TAG(); + load_unlock(ftptr, !state); + + rb_set_safe_level_force(saved.safe); + if (state) { + JUMP_TAG(state); + } + + if (NIL_P(result)) { + load_failed(fname); + } + + th->errinfo = errinfo; + + return result; +} + +VALUE +rb_require(const char *fname) +{ + VALUE fn = rb_str_new2(fname); + OBJ_FREEZE(fn); + return rb_require_safe(fn, rb_safe_level()); +} + +static VALUE +init_ext_call(VALUE arg) +{ + SCOPE_SET(NOEX_PUBLIC); + (*(void (*)(void))arg)(); + return Qnil; +} + +void +ruby_init_ext(const char *name, void (*init)(void)) +{ + if (load_lock(name)) { + rb_vm_call_cfunc(rb_vm_top_self(), init_ext_call, (VALUE)init, + 0, rb_str_new2(name)); + rb_provide(name); + load_unlock(name, 1); + } +} + +/* + * call-seq: + * mod.autoload(module, filename) => nil + * + * Registers _filename_ to be loaded (using Kernel::require) + * the first time that _module_ (which may be a String or + * a symbol) is accessed in the namespace of _mod_. + * + * module A + * end + * A.autoload(:B, "b") + * A::B.doit # autoloads "b" + */ + +static VALUE +rb_mod_autoload(VALUE mod, VALUE sym, VALUE file) +{ + ID id = rb_to_id(sym); + + Check_SafeStr(file); + rb_autoload(mod, id, RSTRING_PTR(file)); + return Qnil; +} + +/* + * MISSING: documentation + */ + +static VALUE +rb_mod_autoload_p(VALUE mod, VALUE sym) +{ + return rb_autoload_p(mod, rb_to_id(sym)); +} + +/* + * call-seq: + * autoload(module, filename) => nil + * + * Registers _filename_ to be loaded (using Kernel::require) + * the first time that _module_ (which may be a String or + * a symbol) is accessed. + * + * autoload(:MyModule, "/usr/local/lib/modules/my_module.rb") + */ + +static VALUE +rb_f_autoload(VALUE obj, VALUE sym, VALUE file) +{ + VALUE klass = rb_vm_cbase(); + if (NIL_P(klass)) { + rb_raise(rb_eTypeError, "Can not set autoload on singleton class"); + } + return rb_mod_autoload(klass, sym, file); +} + +/* + * MISSING: documentation + */ + +static VALUE +rb_f_autoload_p(VALUE obj, VALUE sym) +{ + /* use rb_vm_cbase() as same as rb_f_autoload. */ + VALUE klass = rb_vm_cbase(); + if (NIL_P(klass)) { + return Qnil; + } + return rb_mod_autoload_p(klass, sym); +} + +void +Init_load() +{ +#undef rb_intern +#define rb_intern(str) rb_intern2(str, strlen(str)) + rb_vm_t *vm = GET_VM(); + static const char var_load_path[] = "$:"; + ID id_load_path = rb_intern2(var_load_path, sizeof(var_load_path)-1); + + rb_define_hooked_variable(var_load_path, (VALUE*)vm, load_path_getter, rb_gvar_readonly_setter); + rb_alias_variable(rb_intern("$-I"), id_load_path); + rb_alias_variable(rb_intern("$LOAD_PATH"), id_load_path); + vm->load_path = rb_ary_new(); + + rb_define_virtual_variable("$\"", get_loaded_features, 0); + rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0); + vm->loaded_features = rb_ary_new(); + + rb_define_global_function("load", rb_f_load, -1); + rb_define_global_function("require", rb_f_require, 1); + rb_define_method(rb_cModule, "autoload", rb_mod_autoload, 2); + rb_define_method(rb_cModule, "autoload?", rb_mod_autoload_p, 1); + rb_define_global_function("autoload", rb_f_autoload, 2); + rb_define_global_function("autoload?", rb_f_autoload_p, 1); + + ruby_dln_librefs = rb_ary_new(); + rb_gc_register_mark_object(ruby_dln_librefs); +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..3730327 --- /dev/null +++ b/main.c @@ -0,0 +1,37 @@ +/********************************************************************** + + main.c - + + $Author: akr $ + created at: Fri Aug 19 13:19:58 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#undef RUBY_EXPORT +#include "ruby.h" +#include "debug.h" +#ifdef HAVE_LOCALE_H +#include +#endif + +RUBY_GLOBAL_SETUP + +int +main(int argc, char **argv) +{ +#ifdef RUBY_DEBUG_ENV + ruby_set_debug_option(getenv("RUBY_DEBUG")); +#endif +#ifdef HAVE_LOCALE_H + setlocale(LC_CTYPE, ""); +#endif + + ruby_sysinit(&argc, &argv); + { + RUBY_INIT_STACK; + ruby_init(); + return ruby_run_node(ruby_options(argc, argv)); + } +} diff --git a/marshal.c b/marshal.c new file mode 100644 index 0000000..cd17a2f --- /dev/null +++ b/marshal.c @@ -0,0 +1,1830 @@ +/********************************************************************** + + marshal.c - + + $Author: yugui $ + created at: Thu Apr 27 16:30:01 JST 1995 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/io.h" +#include "ruby/st.h" +#include "ruby/util.h" +#include "ruby/encoding.h" + +#include +#ifdef HAVE_FLOAT_H +#include +#endif +#ifdef HAVE_IEEEFP_H +#include +#endif + +#define BITSPERSHORT (2*CHAR_BIT) +#define SHORTMASK ((1<newclass); + rb_gc_mark(p->oldclass); + return ST_CONTINUE; +} + +static void +mark_marshal_compat_t(void *tbl) +{ + if (!tbl) return; + st_foreach(tbl, mark_marshal_compat_i, 0); +} + +void +rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE)) +{ + marshal_compat_t *compat; + rb_alloc_func_t allocator = rb_get_alloc_func(newclass); + + if (!allocator) { + rb_raise(rb_eTypeError, "no allocator"); + } + + compat = ALLOC(marshal_compat_t); + compat->newclass = Qnil; + compat->oldclass = Qnil; + compat->newclass = newclass; + compat->oldclass = oldclass; + compat->dumper = dumper; + compat->loader = loader; + + st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat); +} + +struct dump_arg { + VALUE obj; + VALUE str, dest; + st_table *symbols; + st_table *data; + int taint; + int untrust; + st_table *compat_tbl; + VALUE wrapper; + st_table *encodings; +}; + +struct dump_call_arg { + VALUE obj; + struct dump_arg *arg; + int limit; +}; + +static void +check_dump_arg(struct dump_arg *arg, ID sym) +{ + if (!DATA_PTR(arg->wrapper)) { + rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s", + rb_id2name(sym)); + } +} + +static void +mark_dump_arg(void *ptr) +{ + struct dump_arg *p = ptr; + if (!ptr) + return; + rb_mark_set(p->data); + rb_mark_hash(p->compat_tbl); +} + +static const char * +must_not_be_anonymous(const char *type, VALUE path) +{ + char *n = RSTRING_PTR(path); + + if (!rb_enc_asciicompat(rb_enc_get(path))) { + /* cannot occur? */ + rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type); + } + if (n[0] == '#') { + rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type, + (int)RSTRING_LEN(path), n); + } + return n; +} + +static VALUE +class2path(VALUE klass) +{ + VALUE path = rb_class_path(klass); + const char *n; + + n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path); + if (rb_path_to_class(path) != rb_class_real(klass)) { + rb_raise(rb_eTypeError, "%s can't be referred to", n); + } + return path; +} + +static void w_long(long, struct dump_arg*); +static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg); + +static void +w_nbyte(const char *s, int n, struct dump_arg *arg) +{ + VALUE buf = arg->str; + rb_str_buf_cat(buf, s, n); + if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) { + if (arg->taint) OBJ_TAINT(buf); + if (arg->untrust) OBJ_UNTRUST(buf); + rb_io_write(arg->dest, buf); + rb_str_resize(buf, 0); + } +} + +static void +w_byte(char c, struct dump_arg *arg) +{ + w_nbyte(&c, 1, arg); +} + +static void +w_bytes(const char *s, int n, struct dump_arg *arg) +{ + w_long(n, arg); + w_nbyte(s, n, arg); +} + +static void +w_short(int x, struct dump_arg *arg) +{ + w_byte((char)((x >> 0) & 0xff), arg); + w_byte((char)((x >> 8) & 0xff), arg); +} + +static void +w_long(long x, struct dump_arg *arg) +{ + char buf[sizeof(long)+1]; + int i, len = 0; + +#if SIZEOF_LONG > 4 + if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) { + /* big long does not fit in 4 bytes */ + rb_raise(rb_eTypeError, "long too big to dump"); + } +#endif + + if (x == 0) { + w_byte(0, arg); + return; + } + if (0 < x && x < 123) { + w_byte((char)(x + 5), arg); + return; + } + if (-124 < x && x < 0) { + w_byte((char)((x - 5)&0xff), arg); + return; + } + for (i=1;i 32 +#define MANT_BITS 32 +#elif DBL_MANT_DIG > 24 +#define MANT_BITS 24 +#elif DBL_MANT_DIG > 16 +#define MANT_BITS 16 +#else +#define MANT_BITS 8 +#endif + +static int +save_mantissa(double d, char *buf) +{ + int e, i = 0; + unsigned long m; + double n; + + d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); + if (d > 0) { + buf[i++] = 0; + do { + d = modf(ldexp(d, MANT_BITS), &n); + m = (unsigned long)n; +#if MANT_BITS > 24 + buf[i++] = m >> 24; +#endif +#if MANT_BITS > 16 + buf[i++] = m >> 16; +#endif +#if MANT_BITS > 8 + buf[i++] = m >> 8; +#endif + buf[i++] = m; + } while (d > 0); + while (!buf[i - 1]) --i; + } + return i; +} + +static double +load_mantissa(double d, const char *buf, int len) +{ + if (--len > 0 && !*buf++) { /* binary mantissa mark */ + int e, s = d < 0, dig = 0; + unsigned long m; + + modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); + do { + m = 0; + switch (len) { + default: m = *buf++ & 0xff; +#if MANT_BITS > 24 + case 3: m = (m << 8) | (*buf++ & 0xff); +#endif +#if MANT_BITS > 16 + case 2: m = (m << 8) | (*buf++ & 0xff); +#endif +#if MANT_BITS > 8 + case 1: m = (m << 8) | (*buf++ & 0xff); +#endif + } + dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS; + d += ldexp((double)m, dig); + } while ((len -= MANT_BITS / 8) > 0); + d = ldexp(d, e - DECIMAL_MANT); + if (s) d = -d; + } + return d; +} +#else +#define load_mantissa(d, buf, len) (d) +#define save_mantissa(d, buf) 0 +#endif + +#ifdef DBL_DIG +#define FLOAT_DIG (DBL_DIG+2) +#else +#define FLOAT_DIG 17 +#endif + +static void +w_float(double d, struct dump_arg *arg) +{ + char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10]; + + if (isinf(d)) { + if (d < 0) strcpy(buf, "-inf"); + else strcpy(buf, "inf"); + } + else if (isnan(d)) { + strcpy(buf, "nan"); + } + else if (d == 0.0) { + if (1.0/d < 0) strcpy(buf, "-0"); + else strcpy(buf, "0"); + } + else { + int len; + + /* xxx: should not use system's sprintf(3) */ + snprintf(buf, sizeof(buf), "%.*g", FLOAT_DIG, d); + len = strlen(buf); + w_bytes(buf, len + save_mantissa(d, buf + len), arg); + return; + } + w_bytes(buf, strlen(buf), arg); +} + +static void +w_symbol(ID id, struct dump_arg *arg) +{ + VALUE sym; + st_data_t num; + int encidx = -1; + + if (st_lookup(arg->symbols, id, &num)) { + w_byte(TYPE_SYMLINK, arg); + w_long((long)num, arg); + } + else { + sym = rb_id2str(id); + if (!sym) { + rb_raise(rb_eTypeError, "can't dump anonymous ID %ld", id); + } + encidx = rb_enc_get_index(sym); + if (encidx == rb_usascii_encindex()) { + encidx = -1; + } + else if (rb_enc_str_coderange(sym) != ENC_CODERANGE_7BIT) { + w_byte(TYPE_IVAR, arg); + } + w_byte(TYPE_SYMBOL, arg); + w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg); + st_add_direct(arg->symbols, id, arg->symbols->num_entries); + if (encidx != -1) { + struct dump_call_arg c_arg; + c_arg.limit = 1; + c_arg.arg = arg; + w_encoding(sym, 0, &c_arg); + } + } +} + +static void +w_unique(VALUE s, struct dump_arg *arg) +{ + must_not_be_anonymous("class", s); + w_symbol(rb_intern_str(s), arg); +} + +static void w_object(VALUE,struct dump_arg*,int); + +static int +hash_each(VALUE key, VALUE value, struct dump_call_arg *arg) +{ + w_object(key, arg->arg, arg->limit); + w_object(value, arg->arg, arg->limit); + return ST_CONTINUE; +} + +static void +w_extended(VALUE klass, struct dump_arg *arg, int check) +{ + if (check && FL_TEST(klass, FL_SINGLETON)) { + if (RCLASS_M_TBL(klass)->num_entries || + (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) { + rb_raise(rb_eTypeError, "singleton can't be dumped"); + } + klass = RCLASS_SUPER(klass); + } + while (BUILTIN_TYPE(klass) == T_ICLASS) { + VALUE path = rb_class_name(RBASIC(klass)->klass); + w_byte(TYPE_EXTENDED, arg); + w_unique(path, arg); + klass = RCLASS_SUPER(klass); + } +} + +static void +w_class(char type, VALUE obj, struct dump_arg *arg, int check) +{ + VALUE path; + st_data_t real_obj; + VALUE klass; + + if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) { + obj = (VALUE)real_obj; + } + klass = CLASS_OF(obj); + w_extended(klass, arg, check); + w_byte(type, arg); + path = class2path(rb_class_real(klass)); + w_unique(path, arg); +} + +static void +w_uclass(VALUE obj, VALUE super, struct dump_arg *arg) +{ + VALUE klass = CLASS_OF(obj); + + w_extended(klass, arg, Qtrue); + klass = rb_class_real(klass); + if (klass != super) { + w_byte(TYPE_UCLASS, arg); + w_unique(class2path(klass), arg); + } +} + +static int +w_obj_each(ID id, VALUE value, struct dump_call_arg *arg) +{ + if (id == rb_id_encoding()) return ST_CONTINUE; + w_symbol(id, arg->arg); + w_object(value, arg->arg, arg->limit); + return ST_CONTINUE; +} + +static void +w_encoding(VALUE obj, long num, struct dump_call_arg *arg) +{ + int encidx = rb_enc_get_index(obj); + rb_encoding *enc = 0; + st_data_t name; + + if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) { + w_long(num, arg->arg); + return; + } + w_long(num + 1, arg->arg); + w_symbol(rb_id_encoding(), arg->arg); + do { + if (!arg->arg->encodings) + arg->arg->encodings = st_init_strcasetable(); + else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name)) + break; + name = (st_data_t)rb_str_new2(rb_enc_name(enc)); + st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name); + } while (0); + w_object(name, arg->arg, arg->limit + 1); +} + +static void +w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg) +{ + long num = tbl ? tbl->num_entries : 0; + + w_encoding(obj, num, arg); + if (tbl) { + st_foreach_safe(tbl, w_obj_each, (st_data_t)arg); + } +} + +static void +w_objivar(VALUE obj, struct dump_call_arg *arg) +{ + VALUE *ptr; + long i, len, num; + + len = ROBJECT_NUMIV(obj); + ptr = ROBJECT_IVPTR(obj); + num = 0; + for (i = 0; i < len; i++) + if (ptr[i] != Qundef) + num += 1; + + w_encoding(obj, num, arg); + if (num != 0) { + rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg); + } +} + +static void +w_object(VALUE obj, struct dump_arg *arg, int limit) +{ + struct dump_call_arg c_arg; + st_table *ivtbl = 0; + st_data_t num; + int hasiv = 0; +#define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \ + (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj))) + + if (limit == 0) { + rb_raise(rb_eArgError, "exceed depth limit"); + } + + limit--; + c_arg.limit = limit; + c_arg.arg = arg; + + if (st_lookup(arg->data, obj, &num)) { + w_byte(TYPE_LINK, arg); + w_long((long)num, arg); + return; + } + + if ((hasiv = has_ivars(obj, ivtbl)) != 0) { + w_byte(TYPE_IVAR, arg); + } + if (obj == Qnil) { + w_byte(TYPE_NIL, arg); + } + else if (obj == Qtrue) { + w_byte(TYPE_TRUE, arg); + } + else if (obj == Qfalse) { + w_byte(TYPE_FALSE, arg); + } + else if (FIXNUM_P(obj)) { +#if SIZEOF_LONG <= 4 + w_byte(TYPE_FIXNUM, arg); + w_long(FIX2INT(obj), arg); +#else + if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) { + w_byte(TYPE_FIXNUM, arg); + w_long(FIX2LONG(obj), arg); + } + else { + w_object(rb_int2big(FIX2LONG(obj)), arg, limit); + } +#endif + } + else if (SYMBOL_P(obj)) { + w_symbol(SYM2ID(obj), arg); + } + else { + if (OBJ_TAINTED(obj)) arg->taint = Qtrue; + if (OBJ_UNTRUSTED(obj)) arg->untrust = Qtrue; + + if (rb_respond_to(obj, s_mdump)) { + volatile VALUE v; + + st_add_direct(arg->data, obj, arg->data->num_entries); + + v = rb_funcall(obj, s_mdump, 0, 0); + check_dump_arg(arg, s_mdump); + w_class(TYPE_USRMARSHAL, obj, arg, Qfalse); + w_object(v, arg, limit); + if (hasiv) w_ivar(obj, 0, &c_arg); + return; + } + if (rb_respond_to(obj, s_dump)) { + VALUE v; + st_table *ivtbl2 = 0; + int hasiv2; + + v = rb_funcall(obj, s_dump, 1, INT2NUM(limit)); + check_dump_arg(arg, s_dump); + if (TYPE(v) != T_STRING) { + rb_raise(rb_eTypeError, "_dump() must return string"); + } + if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) { + w_byte(TYPE_IVAR, arg); + } + w_class(TYPE_USERDEF, obj, arg, Qfalse); + w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg); + if (hasiv2) { + w_ivar(v, ivtbl2, &c_arg); + } + else if (hasiv) { + w_ivar(obj, ivtbl, &c_arg); + } + st_add_direct(arg->data, obj, arg->data->num_entries); + return; + } + + st_add_direct(arg->data, obj, arg->data->num_entries); + + { + st_data_t compat_data; + rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass); + if (st_lookup(compat_allocator_tbl, + (st_data_t)allocator, + &compat_data)) { + marshal_compat_t *compat = (marshal_compat_t*)compat_data; + VALUE real_obj = obj; + obj = compat->dumper(real_obj); + st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); + } + } + + switch (BUILTIN_TYPE(obj)) { + case T_CLASS: + if (FL_TEST(obj, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton class can't be dumped"); + } + w_byte(TYPE_CLASS, arg); + { + volatile VALUE path = class2path(obj); + w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); + } + break; + + case T_MODULE: + w_byte(TYPE_MODULE, arg); + { + VALUE path = class2path(obj); + w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); + } + break; + + case T_FLOAT: + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT_VALUE(obj), arg); + break; + + case T_BIGNUM: + w_byte(TYPE_BIGNUM, arg); + { + char sign = RBIGNUM_SIGN(obj) ? '+' : '-'; + long len = RBIGNUM_LEN(obj); + BDIGIT *d = RBIGNUM_DIGITS(obj); + + w_byte(sign, arg); + w_long(SHORTLEN(len), arg); /* w_short? */ + while (len--) { +#if SIZEOF_BDIGITS > SIZEOF_SHORT + BDIGIT num = *d; + int i; + + for (i=0; iifnone)) { + w_byte(TYPE_HASH, arg); + } + else if (FL_TEST(obj, FL_USER2)) { + /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */ + rb_raise(rb_eTypeError, "can't dump hash with default proc"); + } + else { + w_byte(TYPE_HASH_DEF, arg); + } + w_long(RHASH_SIZE(obj), arg); + rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); + if (!NIL_P(RHASH(obj)->ifnone)) { + w_object(RHASH(obj)->ifnone, arg, limit); + } + break; + + case T_STRUCT: + w_class(TYPE_STRUCT, obj, arg, Qtrue); + { + long len = RSTRUCT_LEN(obj); + VALUE mem; + long i; + + w_long(len, arg); + mem = rb_struct_members(obj); + for (i=0; iobj, arg->arg, arg->limit); + if (arg->arg->dest) { + rb_io_write(arg->arg->dest, arg->arg->str); + rb_str_resize(arg->arg->str, 0); + } + return 0; +} + +static VALUE +dump_ensure(struct dump_arg *arg) +{ + if (!DATA_PTR(arg->wrapper)) return 0; + st_free_table(arg->symbols); + st_free_table(arg->data); + st_free_table(arg->compat_tbl); + if (arg->encodings) st_free_table(arg->encodings); + DATA_PTR(arg->wrapper) = 0; + arg->wrapper = 0; + if (arg->taint) { + OBJ_TAINT(arg->str); + } + if (arg->untrust) { + OBJ_UNTRUST(arg->str); + } + return 0; +} + +/* + * call-seq: + * dump( obj [, anIO] , limit=--1 ) => anIO + * + * Serializes obj and all descendent objects. If anIO is + * specified, the serialized data will be written to it, otherwise the + * data will be returned as a String. If limit is specified, the + * traversal of subobjects will be limited to that depth. If limit is + * negative, no checking of depth will be performed. + * + * class Klass + * def initialize(str) + * @str = str + * end + * def sayHello + * @str + * end + * end + * + * (produces no output) + * + * o = Klass.new("hello\n") + * data = Marshal.dump(o) + * obj = Marshal.load(data) + * obj.sayHello #=> "hello\n" + */ +static VALUE +marshal_dump(int argc, VALUE *argv) +{ + VALUE obj, port, a1, a2; + int limit = -1; + struct dump_arg arg; + struct dump_call_arg c_arg; + + port = Qnil; + rb_scan_args(argc, argv, "12", &obj, &a1, &a2); + if (argc == 3) { + if (!NIL_P(a2)) limit = NUM2INT(a2); + if (NIL_P(a1)) goto type_error; + port = a1; + } + else if (argc == 2) { + if (FIXNUM_P(a1)) limit = FIX2INT(a1); + else if (NIL_P(a1)) goto type_error; + else port = a1; + } + arg.dest = 0; + arg.symbols = st_init_numtable(); + arg.data = st_init_numtable(); + arg.taint = Qfalse; + arg.untrust = Qfalse; + arg.compat_tbl = st_init_numtable(); + arg.encodings = 0; + arg.str = rb_str_buf_new(0); + RBASIC(arg.str)->klass = 0; + arg.wrapper = Data_Wrap_Struct(rb_cData, mark_dump_arg, 0, &arg); + if (!NIL_P(port)) { + if (!rb_respond_to(port, s_write)) { + type_error: + rb_raise(rb_eTypeError, "instance of IO needed"); + } + arg.dest = port; + if (rb_respond_to(port, s_binmode)) { + rb_funcall2(port, s_binmode, 0, 0); + check_dump_arg(&arg, s_binmode); + } + } + else { + port = arg.str; + } + + c_arg.obj = obj; + c_arg.arg = &arg; + c_arg.limit = limit; + + w_byte(MARSHAL_MAJOR, &arg); + w_byte(MARSHAL_MINOR, &arg); + + rb_ensure(dump, (VALUE)&c_arg, dump_ensure, (VALUE)&arg); + RBASIC(arg.str)->klass = rb_cString; + + return port; +} + +struct load_arg { + VALUE src; + long offset; + st_table *symbols; + st_table *data; + VALUE proc; + int taint; + int untrust; + st_table *compat_tbl; + VALUE wrapper; +}; + +static void +check_load_arg(struct load_arg *arg, ID sym) +{ + if (!DATA_PTR(arg->wrapper)) { + rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s", + rb_id2name(sym)); + } +} + +static void +mark_load_arg(void *ptr) +{ + struct load_arg *p = ptr; + if (!ptr) + return; + rb_mark_tbl(p->data); + rb_mark_hash(p->compat_tbl); +} + +static VALUE r_entry(VALUE v, struct load_arg *arg); +static VALUE r_object(struct load_arg *arg); +static ID r_symbol(struct load_arg *arg); +static VALUE path2class(VALUE path); + +static int +r_byte(struct load_arg *arg) +{ + int c; + + if (TYPE(arg->src) == T_STRING) { + if (RSTRING_LEN(arg->src) > arg->offset) { + c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++]; + } + else { + rb_raise(rb_eArgError, "marshal data too short"); + } + } + else { + VALUE src = arg->src; + VALUE v = rb_funcall2(src, s_getbyte, 0, 0); + check_load_arg(arg, s_getbyte); + if (NIL_P(v)) rb_eof_error(); + c = (unsigned char)NUM2CHR(v); + } + return c; +} + +static void +long_toobig(int size) +{ + rb_raise(rb_eTypeError, "long too big for this architecture (size " + STRINGIZE(SIZEOF_LONG)", given %d)", size); +} + +#undef SIGN_EXTEND_CHAR +#if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) +#endif + +static long +r_long(struct load_arg *arg) +{ + register long x; + int c = SIGN_EXTEND_CHAR(r_byte(arg)); + long i; + + if (c == 0) return 0; + if (c > 0) { + if (4 < c && c < 128) { + return c - 5; + } + if (c > sizeof(long)) long_toobig(c); + x = 0; + for (i=0;i sizeof(long)) long_toobig(c); + x = -1; + for (i=0;isrc) == T_STRING) { + if (RSTRING_LEN(arg->src) - arg->offset >= len) { + str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len); + arg->offset += len; + } + else { + too_short: + rb_raise(rb_eArgError, "marshal data too short"); + } + } + else { + VALUE src = arg->src; + VALUE n = LONG2NUM(len); + str = rb_funcall2(src, s_read, 1, &n); + check_load_arg(arg, s_read); + if (NIL_P(str)) goto too_short; + StringValue(str); + if (RSTRING_LEN(str) != len) goto too_short; + if (OBJ_TAINTED(str)) arg->taint = Qtrue; + if (OBJ_UNTRUSTED(str)) arg->untrust = Qtrue; + } + return str; +} + +static int +id2encidx(ID id, VALUE val) +{ + if (id == rb_id_encoding()) { + return rb_enc_find_index(StringValueCStr(val)); + } + else if (id == rb_intern("E")) { + if (val == Qfalse) return rb_usascii_encindex(); + else if (val == Qtrue) return rb_utf8_encindex(); + /* bogus ignore */ + } + return -1; +} + +static ID +r_symlink(struct load_arg *arg) +{ + ID id; + long num = r_long(arg); + + if (st_lookup(arg->symbols, num, &id)) { + return id; + } + rb_raise(rb_eArgError, "bad symbol"); +} + +static ID +r_symreal(struct load_arg *arg, int ivar) +{ + volatile VALUE s = r_bytes(arg); + ID id; + int idx = -1; + st_index_t n = arg->symbols->num_entries; + + st_insert(arg->symbols, (st_data_t)n, (st_data_t)0); + if (ivar) { + long num = r_long(arg); + while (num-- > 0) { + id = r_symbol(arg); + idx = id2encidx(id, r_object(arg)); + } + } + if (idx < 0) idx = rb_usascii_encindex(); + rb_enc_associate_index(s, idx); + id = rb_intern_str(s); + st_insert(arg->symbols, (st_data_t)n, (st_data_t)id); + + return id; +} + +static ID +r_symbol(struct load_arg *arg) +{ + int type, ivar = 0; + + again: + switch ((type = r_byte(arg))) { + case TYPE_IVAR: + ivar = 1; + goto again; + case TYPE_SYMBOL: + return r_symreal(arg, ivar); + case TYPE_SYMLINK: + if (ivar) { + rb_raise(rb_eArgError, "dump format error (symlink with encoding)"); + } + return r_symlink(arg); + default: + rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type); + break; + } +} + +static VALUE +r_unique(struct load_arg *arg) +{ + return rb_id2str(r_symbol(arg)); +} + +static VALUE +r_string(struct load_arg *arg) +{ + return r_bytes(arg); +} + +static VALUE +r_entry(VALUE v, struct load_arg *arg) +{ + st_data_t real_obj = (VALUE)Qundef; + if (st_lookup(arg->compat_tbl, v, &real_obj)) { + st_insert(arg->data, arg->data->num_entries, (st_data_t)real_obj); + } + else { + st_insert(arg->data, arg->data->num_entries, (st_data_t)v); + } + if (arg->taint) { + OBJ_TAINT(v); + if ((VALUE)real_obj != Qundef) + OBJ_TAINT((VALUE)real_obj); + } + if (arg->untrust) { + OBJ_UNTRUST(v); + if ((VALUE)real_obj != Qundef) + OBJ_UNTRUST((VALUE)real_obj); + } + return v; +} + +static VALUE +r_leave(VALUE v, struct load_arg *arg) +{ + st_data_t data; + if (st_lookup(arg->compat_tbl, v, &data)) { + VALUE real_obj = (VALUE)data; + rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj)); + st_data_t key = v; + if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) { + marshal_compat_t *compat = (marshal_compat_t*)data; + compat->loader(real_obj, v); + } + st_delete(arg->compat_tbl, &key, 0); + v = real_obj; + } + if (arg->proc) { + v = rb_funcall(arg->proc, s_call, 1, v); + check_load_arg(arg, s_call); + } + return v; +} + +static void +r_ivar(VALUE obj, struct load_arg *arg) +{ + long len; + + len = r_long(arg); + if (len > 0) { + while (len--) { + ID id = r_symbol(arg); + VALUE val = r_object(arg); + int idx = id2encidx(id, val); + if (idx >= 0) { + rb_enc_associate_index(obj, idx); + } + else { + rb_ivar_set(obj, id, val); + } + } + } +} + +static VALUE +path2class(VALUE path) +{ + VALUE v = rb_path_to_class(path); + + if (TYPE(v) != T_CLASS) { + rb_raise(rb_eArgError, "%.*s does not refer to class", + (int)RSTRING_LEN(path), RSTRING_PTR(path)); + } + return v; +} + +static VALUE +path2module(VALUE path) +{ + VALUE v = rb_path_to_class(path); + + if (TYPE(v) != T_MODULE) { + rb_raise(rb_eArgError, "%.*s does not refer to module", + (int)RSTRING_LEN(path), RSTRING_PTR(path)); + } + return v; +} + +static VALUE +obj_alloc_by_path(VALUE path, struct load_arg *arg) +{ + VALUE klass; + st_data_t data; + rb_alloc_func_t allocator; + + klass = path2class(path); + + allocator = rb_get_alloc_func(klass); + if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) { + marshal_compat_t *compat = (marshal_compat_t*)data; + VALUE real_obj = rb_obj_alloc(klass); + VALUE obj = rb_obj_alloc(compat->oldclass); + st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); + return obj; + } + + return rb_obj_alloc(klass); +} + +static VALUE +r_object0(struct load_arg *arg, int *ivp, VALUE extmod) +{ + VALUE v = Qnil; + int type = r_byte(arg); + long id; + st_data_t link; + + switch (type) { + case TYPE_LINK: + id = r_long(arg); + if (!st_lookup(arg->data, (st_data_t)id, &link)) { + rb_raise(rb_eArgError, "dump format error (unlinked)"); + } + v = (VALUE)link; + if (arg->proc) { + v = rb_funcall(arg->proc, s_call, 1, v); + check_load_arg(arg, s_call); + } + break; + + case TYPE_IVAR: + { + int ivar = Qtrue; + + v = r_object0(arg, &ivar, extmod); + if (ivar) r_ivar(v, arg); + } + break; + + case TYPE_EXTENDED: + { + VALUE m = path2module(r_unique(arg)); + + if (NIL_P(extmod)) extmod = rb_ary_new2(0); + rb_ary_push(extmod, m); + + v = r_object0(arg, 0, extmod); + while (RARRAY_LEN(extmod) > 0) { + m = rb_ary_pop(extmod); + rb_extend_object(v, m); + } + } + break; + + case TYPE_UCLASS: + { + VALUE c = path2class(r_unique(arg)); + + if (FL_TEST(c, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton can't be loaded"); + } + v = r_object0(arg, 0, extmod); + if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) { + format_error: + rb_raise(rb_eArgError, "dump format error (user class)"); + } + if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) { + VALUE tmp = rb_obj_alloc(c); + + if (TYPE(v) != TYPE(tmp)) goto format_error; + } + RBASIC(v)->klass = c; + } + break; + + case TYPE_NIL: + v = Qnil; + v = r_leave(v, arg); + break; + + case TYPE_TRUE: + v = Qtrue; + v = r_leave(v, arg); + break; + + case TYPE_FALSE: + v = Qfalse; + v = r_leave(v, arg); + break; + + case TYPE_FIXNUM: + { + long i = r_long(arg); + v = LONG2FIX(i); + } + v = r_leave(v, arg); + break; + + case TYPE_FLOAT: + { + double d, t = 0.0; + VALUE str = r_bytes(arg); + const char *ptr = RSTRING_PTR(str); + + if (strcmp(ptr, "nan") == 0) { + d = t / t; + } + else if (strcmp(ptr, "inf") == 0) { + d = 1.0 / t; + } + else if (strcmp(ptr, "-inf") == 0) { + d = -1.0 / t; + } + else { + char *e; + d = strtod(ptr, &e); + d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr)); + } + v = DBL2NUM(d); + v = r_entry(v, arg); + v = r_leave(v, arg); + } + break; + + case TYPE_BIGNUM: + { + long len; + BDIGIT *digits; + volatile VALUE data; + + NEWOBJ(big, struct RBignum); + OBJSETUP(big, rb_cBignum, T_BIGNUM); + RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+')); + len = r_long(arg); + data = r_bytes0(len * 2, arg); +#if SIZEOF_BDIGITS == SIZEOF_SHORT + rb_big_resize((VALUE)big, len); +#else + rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT)); +#endif + digits = RBIGNUM_DIGITS(big); + MEMCPY(digits, RSTRING_PTR(data), char, len * 2); +#if SIZEOF_BDIGITS > SIZEOF_SHORT + MEMZERO((char *)digits + len * 2, char, + RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2); +#endif + len = RBIGNUM_LEN(big); + while (len > 0) { + unsigned char *p = (unsigned char *)digits; + BDIGIT num = 0; +#if SIZEOF_BDIGITS > SIZEOF_SHORT + int shift = 0; + int i; + + for (i=0; iifnone = r_object(arg); + } + v = r_leave(v, arg); + } + break; + + case TYPE_STRUCT: + { + VALUE klass, mem; + VALUE values; + volatile long i; /* gcc 2.7.2.3 -O2 bug?? */ + long len; + ID slot; + + klass = path2class(r_unique(arg)); + len = r_long(arg); + + v = rb_obj_alloc(klass); + if (TYPE(v) != T_STRUCT) { + rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass)); + } + mem = rb_struct_s_members(klass); + if (RARRAY_LEN(mem) != len) { + rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)", + rb_class2name(klass)); + } + + v = r_entry(v, arg); + values = rb_ary_new2(len); + for (i=0; i 0) { + VALUE m = rb_ary_pop(extmod); + rb_extend_object(v, m); + } + } + if (!rb_respond_to(v, s_mload)) { + rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'", + rb_class2name(klass)); + } + v = r_entry(v, arg); + data = r_object(arg); + rb_funcall(v, s_mload, 1, data); + check_load_arg(arg, s_mload); + v = r_leave(v, arg); + } + break; + + case TYPE_OBJECT: + { + v = obj_alloc_by_path(r_unique(arg), arg); + if (TYPE(v) != T_OBJECT) { + rb_raise(rb_eArgError, "dump format error"); + } + v = r_entry(v, arg); + r_ivar(v, arg); + v = r_leave(v, arg); + } + break; + + case TYPE_DATA: + { + VALUE klass = path2class(r_unique(arg)); + if (rb_respond_to(klass, s_alloc)) { + static int warn = Qtrue; + if (warn) { + rb_warn("define `allocate' instead of `_alloc'"); + warn = Qfalse; + } + v = rb_funcall(klass, s_alloc, 0); + check_load_arg(arg, s_alloc); + } + else { + v = rb_obj_alloc(klass); + } + if (TYPE(v) != T_DATA) { + rb_raise(rb_eArgError, "dump format error"); + } + v = r_entry(v, arg); + if (!rb_respond_to(v, s_load_data)) { + rb_raise(rb_eTypeError, + "class %s needs to have instance method `_load_data'", + rb_class2name(klass)); + } + rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod)); + check_load_arg(arg, s_load_data); + v = r_leave(v, arg); + } + break; + + case TYPE_MODULE_OLD: + { + volatile VALUE str = r_bytes(arg); + + v = rb_path_to_class(str); + v = r_entry(v, arg); + v = r_leave(v, arg); + } + break; + + case TYPE_CLASS: + { + volatile VALUE str = r_bytes(arg); + + v = path2class(str); + v = r_entry(v, arg); + v = r_leave(v, arg); + } + break; + + case TYPE_MODULE: + { + volatile VALUE str = r_bytes(arg); + + v = path2module(str); + v = r_entry(v, arg); + v = r_leave(v, arg); + } + break; + + case TYPE_SYMBOL: + if (ivp) { + v = ID2SYM(r_symreal(arg, *ivp)); + *ivp = Qfalse; + } + else { + v = ID2SYM(r_symreal(arg, 0)); + } + v = r_leave(v, arg); + break; + + case TYPE_SYMLINK: + v = ID2SYM(r_symlink(arg)); + break; + + default: + rb_raise(rb_eArgError, "dump format error(0x%x)", type); + break; + } + return v; +} + +static VALUE +r_object(struct load_arg *arg) +{ + return r_object0(arg, 0, Qnil); +} + +static VALUE +load(struct load_arg *arg) +{ + return r_object(arg); +} + +static VALUE +load_ensure(struct load_arg *arg) +{ + if (!DATA_PTR(arg->wrapper)) return 0; + st_free_table(arg->symbols); + st_free_table(arg->data); + st_free_table(arg->compat_tbl); + DATA_PTR(arg->wrapper) = 0; + arg->wrapper = 0; + return 0; +} + +/* + * call-seq: + * load( source [, proc] ) => obj + * restore( source [, proc] ) => obj + * + * Returns the result of converting the serialized data in source into a + * Ruby object (possibly with associated subordinate objects). source + * may be either an instance of IO or an object that responds to + * to_str. If proc is specified, it will be passed each object as it + * is deserialized. + */ +static VALUE +marshal_load(int argc, VALUE *argv) +{ + VALUE port, proc; + int major, minor; + VALUE v; + struct load_arg arg; + + rb_scan_args(argc, argv, "11", &port, &proc); + v = rb_check_string_type(port); + if (!NIL_P(v)) { + arg.taint = OBJ_TAINTED(port); /* original taintedness */ + port = v; + } + else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) { + if (rb_respond_to(port, s_binmode)) { + rb_funcall2(port, s_binmode, 0, 0); + } + arg.taint = Qtrue; + } + else { + rb_raise(rb_eTypeError, "instance of IO needed"); + } + arg.untrust = OBJ_UNTRUSTED(port); + arg.src = port; + arg.offset = 0; + arg.symbols = st_init_numtable(); + arg.data = st_init_numtable(); + arg.compat_tbl = st_init_numtable(); + arg.proc = 0; + arg.wrapper = Data_Wrap_Struct(rb_cData, mark_load_arg, 0, &arg); + + major = r_byte(&arg); + minor = r_byte(&arg); + if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) { + rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\ +\tformat version %d.%d required; %d.%d given", + MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); + } + if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) { + rb_warn("incompatible marshal file format (can be read)\n\ +\tformat version %d.%d required; %d.%d given", + MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); + } + + if (!NIL_P(proc)) arg.proc = proc; + v = rb_ensure(load, (VALUE)&arg, load_ensure, (VALUE)&arg); + + return v; +} + +/* + * The marshaling library converts collections of Ruby objects into a + * byte stream, allowing them to be stored outside the currently + * active script. This data may subsequently be read and the original + * objects reconstituted. + * Marshaled data has major and minor version numbers stored along + * with the object information. In normal use, marshaling can only + * load data written with the same major version number and an equal + * or lower minor version number. If Ruby's ``verbose'' flag is set + * (normally using -d, -v, -w, or --verbose) the major and minor + * numbers must match exactly. Marshal versioning is independent of + * Ruby's version numbers. You can extract the version by reading the + * first two bytes of marshaled data. + * + * str = Marshal.dump("thing") + * RUBY_VERSION #=> "1.9.0" + * str[0].ord #=> 4 + * str[1].ord #=> 8 + * + * Some objects cannot be dumped: if the objects to be dumped include + * bindings, procedure or method objects, instances of class IO, or + * singleton objects, a TypeError will be raised. + * If your class has special serialization needs (for example, if you + * want to serialize in some specific format), or if it contains + * objects that would otherwise not be serializable, you can implement + * your own serialization strategy by defining two methods, _dump and + * _load: + * The instance method _dump should return a String object containing + * all the information necessary to reconstitute objects of this class + * and all referenced objects up to a maximum depth given as an integer + * parameter (a value of -1 implies that you should disable depth checking). + * The class method _load should take a String and return an object of this class. + */ +void +Init_marshal(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + VALUE rb_mMarshal = rb_define_module("Marshal"); + + s_dump = rb_intern("_dump"); + s_load = rb_intern("_load"); + s_mdump = rb_intern("marshal_dump"); + s_mload = rb_intern("marshal_load"); + s_dump_data = rb_intern("_dump_data"); + s_load_data = rb_intern("_load_data"); + s_alloc = rb_intern("_alloc"); + s_call = rb_intern("call"); + s_getbyte = rb_intern("getbyte"); + s_read = rb_intern("read"); + s_write = rb_intern("write"); + s_binmode = rb_intern("binmode"); + + rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1); + rb_define_module_function(rb_mMarshal, "load", marshal_load, -1); + rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1); + + rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR)); + rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR)); + + compat_allocator_tbl = st_init_numtable(); + compat_allocator_tbl_wrapper = + Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl); + rb_gc_register_mark_object(compat_allocator_tbl_wrapper); +} + +VALUE +rb_marshal_dump(VALUE obj, VALUE port) +{ + int argc = 1; + VALUE argv[2]; + + argv[0] = obj; + argv[1] = port; + if (!NIL_P(port)) argc = 2; + return marshal_dump(argc, argv); +} + +VALUE +rb_marshal_load(VALUE port) +{ + return marshal_load(1, &port); +} diff --git a/math.c b/math.c new file mode 100644 index 0000000..561879c --- /dev/null +++ b/math.c @@ -0,0 +1,746 @@ +/********************************************************************** + + math.c - + + $Author: yugui $ + created at: Tue Jan 25 14:12:56 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include +#include + +VALUE rb_mMath; + +extern VALUE rb_to_float(VALUE val); +#define Need_Float(x) (x) = rb_to_float(x) +#define Need_Float2(x,y) do {\ + Need_Float(x);\ + Need_Float(y);\ +} while (0) + +static void +domain_check(double x, double y, const char *msg) +{ + while(1) { + if (errno) { + rb_sys_fail(msg); + } + if (isnan(y)) { + if (isnan(x)) break; +#if defined(EDOM) + errno = EDOM; +#elif defined(ERANGE) + errno = ERANGE; +#endif + continue; + } + break; + } +} + +static void +infinity_check(VALUE arg, double res, const char *msg) +{ + while(1) { + if (errno) { + rb_sys_fail(msg); + } + if (isinf(res) && !isinf(RFLOAT_VALUE(arg))) { +#if defined(EDOM) + errno = EDOM; +#elif defined(ERANGE) + errno = ERANGE; +#endif + continue; + } + break; + } +} + +/* + * call-seq: + * Math.atan2(y, x) => float + * + * Computes the arc tangent given y and x. Returns + * -PI..PI. + * + */ + +static VALUE +math_atan2(VALUE obj, VALUE y, VALUE x) +{ + Need_Float2(y, x); + return DBL2NUM(atan2(RFLOAT_VALUE(y), RFLOAT_VALUE(x))); +} + + +/* + * call-seq: + * Math.cos(x) => float + * + * Computes the cosine of x (expressed in radians). Returns + * -1..1. + */ + +static VALUE +math_cos(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(cos(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.sin(x) => float + * + * Computes the sine of x (expressed in radians). Returns + * -1..1. + */ + +static VALUE +math_sin(VALUE obj, VALUE x) +{ + Need_Float(x); + + return DBL2NUM(sin(RFLOAT_VALUE(x))); +} + + +/* + * call-seq: + * Math.tan(x) => float + * + * Returns the tangent of x (expressed in radians). + */ + +static VALUE +math_tan(VALUE obj, VALUE x) +{ + Need_Float(x); + + return DBL2NUM(tan(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.acos(x) => float + * + * Computes the arc cosine of x. Returns 0..PI. + */ + +static VALUE +math_acos(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = acos(d0); + domain_check(d0, d, "acos"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.asin(x) => float + * + * Computes the arc sine of x. Returns -{PI/2} .. {PI/2}. + */ + +static VALUE +math_asin(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = asin(d0); + domain_check(d0, d, "asin"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.atan(x) => float + * + * Computes the arc tangent of x. Returns -{PI/2} .. {PI/2}. + */ + +static VALUE +math_atan(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(atan(RFLOAT_VALUE(x))); +} + +#ifndef HAVE_COSH +double +cosh(double x) +{ + return (exp(x) + exp(-x)) / 2; +} +#endif + +/* + * call-seq: + * Math.cosh(x) => float + * + * Computes the hyperbolic cosine of x (expressed in radians). + */ + +static VALUE +math_cosh(VALUE obj, VALUE x) +{ + Need_Float(x); + + return DBL2NUM(cosh(RFLOAT_VALUE(x))); +} + +#ifndef HAVE_SINH +double +sinh(double x) +{ + return (exp(x) - exp(-x)) / 2; +} +#endif + +/* + * call-seq: + * Math.sinh(x) => float + * + * Computes the hyperbolic sine of x (expressed in + * radians). + */ + +static VALUE +math_sinh(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(sinh(RFLOAT_VALUE(x))); +} + +#ifndef HAVE_TANH +double +tanh(double x) +{ + return sinh(x) / cosh(x); +} +#endif + +/* + * call-seq: + * Math.tanh() => float + * + * Computes the hyperbolic tangent of x (expressed in + * radians). + */ + +static VALUE +math_tanh(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(tanh(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.acosh(x) => float + * + * Computes the inverse hyperbolic cosine of x. + */ + +static VALUE +math_acosh(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = acosh(d0); + domain_check(d0, d, "acosh"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.asinh(x) => float + * + * Computes the inverse hyperbolic sine of x. + */ + +static VALUE +math_asinh(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(asinh(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.atanh(x) => float + * + * Computes the inverse hyperbolic tangent of x. + */ + +static VALUE +math_atanh(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = atanh(d0); + domain_check(d0, d, "atanh"); + infinity_check(x, d, "atanh"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.exp(x) => float + * + * Returns e**x. + */ + +static VALUE +math_exp(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(exp(RFLOAT_VALUE(x))); +} + +#if defined __CYGWIN__ +# include +# if CYGWIN_VERSION_DLL_MAJOR < 1005 +# define nan(x) nan() +# endif +# define log(x) ((x) < 0.0 ? nan("") : log(x)) +# define log10(x) ((x) < 0.0 ? nan("") : log10(x)) +#endif + +/* + * call-seq: + * Math.log(numeric) => float + * Math.log(num,base) => float + * + * Returns the natural logarithm of numeric. + * If additional second argument is given, it will be the base + * of logarithm. + */ + +static VALUE +math_log(int argc, VALUE *argv) +{ + VALUE x, base; + double d0, d; + + rb_scan_args(argc, argv, "11", &x, &base); + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = log(d0); + if (argc == 2) { + Need_Float(base); + d /= log(RFLOAT_VALUE(base)); + } + domain_check(d0, d, "log"); + infinity_check(x, d, "log"); + return DBL2NUM(d); +} + +#ifndef log2 +#ifndef HAVE_LOG2 +double +log2(double x) +{ + return log10(x)/log10(2.0); +} +#else +extern double log2(double); +#endif +#endif + +/* + * call-seq: + * Math.log2(numeric) => float + * + * Returns the base 2 logarithm of numeric. + */ + +static VALUE +math_log2(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = log2(d0); + domain_check(d0, d, "log2"); + infinity_check(x, d, "log2"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.log10(numeric) => float + * + * Returns the base 10 logarithm of numeric. + */ + +static VALUE +math_log10(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = log10(d0); + domain_check(d0, d, "log10"); + infinity_check(x, d, "log10"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.sqrt(numeric) => float + * + * Returns the non-negative square root of numeric. + * + * 0.upto(10) {|x| + * p [x, Math.sqrt(x), Math.sqrt(x)**2] + * } + * #=> + * [0, 0.0, 0.0] + * [1, 1.0, 1.0] + * [2, 1.4142135623731, 2.0] + * [3, 1.73205080756888, 3.0] + * [4, 2.0, 4.0] + * [5, 2.23606797749979, 5.0] + * [6, 2.44948974278318, 6.0] + * [7, 2.64575131106459, 7.0] + * [8, 2.82842712474619, 8.0] + * [9, 3.0, 9.0] + * [10, 3.16227766016838, 10.0] + * + */ + +static VALUE +math_sqrt(VALUE obj, VALUE x) +{ + double d0, d; + + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = sqrt(d0); + domain_check(d0, d, "sqrt"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.cbrt(numeric) => float + * + * Returns the cube root of numeric. + * + * -9.upto(9) {|x| + * p [x, Math.cbrt(x), Math.cbrt(x)**3] + * } + * #=> + * [-9, -2.0800838230519, -9.0] + * [-8, -2.0, -8.0] + * [-7, -1.91293118277239, -7.0] + * [-6, -1.81712059283214, -6.0] + * [-5, -1.7099759466767, -5.0] + * [-4, -1.5874010519682, -4.0] + * [-3, -1.44224957030741, -3.0] + * [-2, -1.25992104989487, -2.0] + * [-1, -1.0, -1.0] + * [0, 0.0, 0.0] + * [1, 1.0, 1.0] + * [2, 1.25992104989487, 2.0] + * [3, 1.44224957030741, 3.0] + * [4, 1.5874010519682, 4.0] + * [5, 1.7099759466767, 5.0] + * [6, 1.81712059283214, 6.0] + * [7, 1.91293118277239, 7.0] + * [8, 2.0, 8.0] + * [9, 2.0800838230519, 9.0] + * + */ + +static VALUE +math_cbrt(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(cbrt(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.frexp(numeric) => [ fraction, exponent ] + * + * Returns a two-element array containing the normalized fraction (a + * Float) and exponent (a Fixnum) of + * numeric. + * + * fraction, exponent = Math.frexp(1234) #=> [0.6025390625, 11] + * fraction * 2**exponent #=> 1234.0 + */ + +static VALUE +math_frexp(VALUE obj, VALUE x) +{ + double d; + int exp; + + Need_Float(x); + + d = frexp(RFLOAT_VALUE(x), &exp); + return rb_assoc_new(DBL2NUM(d), INT2NUM(exp)); +} + +/* + * call-seq: + * Math.ldexp(flt, int) -> float + * + * Returns the value of flt*(2**int). + * + * fraction, exponent = Math.frexp(1234) + * Math.ldexp(fraction, exponent) #=> 1234.0 + */ + +static VALUE +math_ldexp(VALUE obj, VALUE x, VALUE n) +{ + Need_Float(x); + return DBL2NUM(ldexp(RFLOAT_VALUE(x), NUM2INT(n))); +} + +/* + * call-seq: + * Math.hypot(x, y) => float + * + * Returns sqrt(x**2 + y**2), the hypotenuse of a right-angled triangle + * with sides x and y. + * + * Math.hypot(3, 4) #=> 5.0 + */ + +static VALUE +math_hypot(VALUE obj, VALUE x, VALUE y) +{ + Need_Float2(x, y); + return DBL2NUM(hypot(RFLOAT_VALUE(x), RFLOAT_VALUE(y))); +} + +/* + * call-seq: + * Math.erf(x) => float + * + * Calculates the error function of x. + */ + +static VALUE +math_erf(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(erf(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.erfc(x) => float + * + * Calculates the complementary error function of x. + */ + +static VALUE +math_erfc(VALUE obj, VALUE x) +{ + Need_Float(x); + return DBL2NUM(erfc(RFLOAT_VALUE(x))); +} + +/* + * call-seq: + * Math.gamma(x) => float + * + * Calculates the gamma function of x. + * + * Note that gamma(n) is same as fact(n-1) for integer n >= 0. + * However gamma(n) returns float and possibly has error in calculation. + * + * def fact(n) (1..n).inject(1) {|r,i| r*i } end + * 0.upto(25) {|i| p [i, Math.gamma(i+1), fact(i)] } + * #=> + * [0, 1.0, 1] + * [1, 1.0, 1] + * [2, 2.0, 2] + * [3, 6.0, 6] + * [4, 24.0, 24] + * [5, 120.0, 120] + * [6, 720.0, 720] + * [7, 5040.0, 5040] + * [8, 40320.0, 40320] + * [9, 362880.0, 362880] + * [10, 3628800.0, 3628800] + * [11, 39916800.0, 39916800] + * [12, 479001599.999999, 479001600] + * [13, 6227020800.00001, 6227020800] + * [14, 87178291199.9998, 87178291200] + * [15, 1307674368000.0, 1307674368000] + * [16, 20922789888000.0, 20922789888000] + * [17, 3.55687428096001e+14, 355687428096000] + * [18, 6.40237370572799e+15, 6402373705728000] + * [19, 1.21645100408832e+17, 121645100408832000] + * [20, 2.43290200817664e+18, 2432902008176640000] + * [21, 5.10909421717094e+19, 51090942171709440000] + * [22, 1.12400072777761e+21, 1124000727777607680000] + * [23, 2.58520167388851e+22, 25852016738884976640000] + * [24, 6.20448401733239e+23, 620448401733239439360000] + * [25, 1.5511210043331e+25, 15511210043330985984000000] + * + */ + +static VALUE +math_gamma(VALUE obj, VALUE x) +{ + double d0, d; + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = tgamma(d0); + domain_check(d0, d, "gamma"); + return DBL2NUM(d); +} + +/* + * call-seq: + * Math.lgamma(x) => [float, -1 or 1] + * + * Calculates the logarithmic gamma of x and + * the sign of gamma of x. + * + * Math.lgamma(x) is same as + * [Math.log(Math.gamma(x).abs), Math.gamma(x) < 0 ? -1 : 1] + * but avoid overflow by Math.gamma(x) for large x. + */ + +static VALUE +math_lgamma(VALUE obj, VALUE x) +{ + double d0, d; + int sign; + VALUE v; + Need_Float(x); + errno = 0; + d0 = RFLOAT_VALUE(x); + d = lgamma_r(d0, &sign); + domain_check(d0, d, "lgamma"); + v = DBL2NUM(d); + return rb_assoc_new(v, INT2FIX(sign)); +} + + +#define exp1(n) \ +VALUE \ +rb_math_##n(VALUE x)\ +{\ + return math_##n(rb_mMath, x);\ +} + +#define exp2(n) \ +VALUE \ +rb_math_##n(VALUE x, VALUE y)\ +{\ + return math_##n(rb_mMath, x, y);\ +} + +exp2(atan2) +exp1(cos) +exp1(cosh) +exp1(exp) +exp2(hypot) + +VALUE +rb_math_log(int argc, VALUE *argv) +{ + return math_log(argc, argv); +} + +exp1(sin) +exp1(sinh) +exp1(sqrt) + + +/* + * The Math module contains module functions for basic + * trigonometric and transcendental functions. See class + * Float for a list of constants that + * define Ruby's floating point accuracy. + */ + + +void +Init_Math(void) +{ + rb_mMath = rb_define_module("Math"); + +#ifdef M_PI + rb_define_const(rb_mMath, "PI", DBL2NUM(M_PI)); +#else + rb_define_const(rb_mMath, "PI", DBL2NUM(atan(1.0)*4.0)); +#endif + +#ifdef M_E + rb_define_const(rb_mMath, "E", DBL2NUM(M_E)); +#else + rb_define_const(rb_mMath, "E", DBL2NUM(exp(1.0))); +#endif + + rb_define_module_function(rb_mMath, "atan2", math_atan2, 2); + rb_define_module_function(rb_mMath, "cos", math_cos, 1); + rb_define_module_function(rb_mMath, "sin", math_sin, 1); + rb_define_module_function(rb_mMath, "tan", math_tan, 1); + + rb_define_module_function(rb_mMath, "acos", math_acos, 1); + rb_define_module_function(rb_mMath, "asin", math_asin, 1); + rb_define_module_function(rb_mMath, "atan", math_atan, 1); + + rb_define_module_function(rb_mMath, "cosh", math_cosh, 1); + rb_define_module_function(rb_mMath, "sinh", math_sinh, 1); + rb_define_module_function(rb_mMath, "tanh", math_tanh, 1); + + rb_define_module_function(rb_mMath, "acosh", math_acosh, 1); + rb_define_module_function(rb_mMath, "asinh", math_asinh, 1); + rb_define_module_function(rb_mMath, "atanh", math_atanh, 1); + + rb_define_module_function(rb_mMath, "exp", math_exp, 1); + rb_define_module_function(rb_mMath, "log", math_log, -1); + rb_define_module_function(rb_mMath, "log2", math_log2, 1); + rb_define_module_function(rb_mMath, "log10", math_log10, 1); + rb_define_module_function(rb_mMath, "sqrt", math_sqrt, 1); + rb_define_module_function(rb_mMath, "cbrt", math_cbrt, 1); + + rb_define_module_function(rb_mMath, "frexp", math_frexp, 1); + rb_define_module_function(rb_mMath, "ldexp", math_ldexp, 2); + + rb_define_module_function(rb_mMath, "hypot", math_hypot, 2); + + rb_define_module_function(rb_mMath, "erf", math_erf, 1); + rb_define_module_function(rb_mMath, "erfc", math_erfc, 1); + + rb_define_module_function(rb_mMath, "gamma", math_gamma, 1); + rb_define_module_function(rb_mMath, "lgamma", math_lgamma, 1); +} diff --git a/missing/acosh.c b/missing/acosh.c new file mode 100644 index 0000000..47d2d31 --- /dev/null +++ b/missing/acosh.c @@ -0,0 +1,93 @@ +/********************************************************************** + + acosh.c - + + $Author: usa $ + created at: Fri Apr 12 00:34:17 JST 2002 + + public domain rewrite of acosh(3), asinh(3) and atanh(3) + +**********************************************************************/ + +#include +#include +#include +#include "ruby.h" + +/* DBL_MANT_DIG must be less than 4 times of bits of int */ +#ifndef DBL_MANT_DIG +#define DBL_MANT_DIG 53 /* in this case, at least 12 digit precision */ +#endif +#define BIG_CRITERIA_BIT (1< 0 +#define BIG_CRITERIA (1.0*BIG_CRITERIA_BIT) +#else +#define BIG_CRITERIA (1.0*(1< 0 +#define SMALL_CRITERIA (1.0/SMALL_CRITERIA_BIT) +#else +#define SMALL_CRITERIA (1.0*(1< BIG_CRITERIA) + x += x; + else + x += sqrt((x + 1) * (x - 1)); + return log(x); +} +#endif + +#ifndef HAVE_ASINH +double +asinh(double x) +{ + int neg = x < 0; + double z = fabs(x); + + if (z < SMALL_CRITERIA) return x; + if (z < (1.0/(1< BIG_CRITERIA) { + z = log(z + z); + } + else { + z = log(z + sqrt(z * z + 1)); + } + if (neg) z = -z; + return z; +} +#endif + +#ifndef HAVE_ATANH +double +atanh(double x) +{ + int neg = x < 0; + double z = fabs(x); + + if (z < SMALL_CRITERIA) return x; + z = log(z > 1 ? -1 : (1 + z) / (1 - z)) / 2; + if (neg) z = -z; + if (isinf(z)) +#if defined(ERANGE) + errno = ERANGE; +#elif defined(EDOM) + errno = EDOM; +#else + ; +#endif + return z; +} +#endif diff --git a/missing/alloca.c b/missing/alloca.c new file mode 100644 index 0000000..067f76b --- /dev/null +++ b/missing/alloca.c @@ -0,0 +1,194 @@ +/* alloca -- (mostly) portable public-domain implementation -- D A Gwyn + + last edit: 86/05/30 rms + include config.h, since on VMS it renames some symbols. + Use xmalloc instead of malloc. + + This implementation of the PWB library alloca() function, + which is used to allocate space off the run-time stack so + that it is automatically reclaimed upon procedure exit, + was inspired by discussions with J. Q. Johnson of Cornell. + + It should work under any C implementation that uses an + actual procedure stack (as opposed to a linked list of + frames). There are some preprocessor constants that can + be defined when compiling for your specific system, for + improved efficiency; however, the defaults should be okay. + + The general concept of this implementation is to keep + track of all alloca()-allocated blocks, and reclaim any + that are found to be deeper in the stack than the current + invocation. This heuristic does not reclaim storage as + soon as it becomes invalid, but it will do so eventually. + + As a special case, alloca(0) reclaims storage without + allocating any. It is a good idea to use alloca(0) in + your main control loop, etc. to force garbage collection. +*/ +#ifndef lint +static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */ +#endif + +#include "ruby/config.h" +#ifdef emacs +#ifdef static +/* actually, only want this if static is defined as "" + -- this is for usg, in which emacs must undefine static + in order to make unexec workable + */ +#ifndef STACK_DIRECTION +you +lose +-- must know STACK_DIRECTION at compile-time +#endif /* STACK_DIRECTION undefined */ +#endif /* static */ +#endif /* emacs */ + +#ifdef X3J11 +typedef void *pointer; /* generic pointer type */ +#else +typedef char *pointer; /* generic pointer type */ +#endif /* X3J11 */ + +#define NULL 0 /* null pointer constant */ + +#ifdef RUBY_LIB +#define xmalloc ruby_xmalloc +#define xfree ruby_xfree +#endif + +extern void xfree(); +extern pointer xmalloc(); + +/* + Define STACK_DIRECTION if you know the direction of stack + growth for your system; otherwise it will be automatically + deduced at run-time. + + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown +*/ + +#ifndef STACK_DIRECTION +#define STACK_DIRECTION 0 /* direction unknown */ +#endif + +#if STACK_DIRECTION != 0 + +#define STACK_DIR STACK_DIRECTION /* known at compile-time */ + +#else /* STACK_DIRECTION == 0; need run-time code */ + +static int stack_dir; /* 1 or -1 once known */ +#define STACK_DIR stack_dir + +static void +find_stack_direction (/* void */) +{ + static char *addr = NULL; /* address of first + `dummy', once known */ + auto char dummy; /* to get stack address */ + + if (addr == NULL) + { /* initial entry */ + addr = &dummy; + + find_stack_direction (); /* recurse once */ + } + else /* second entry */ + if (&dummy > addr) + stack_dir = 1; /* stack grew upward */ + else + stack_dir = -1; /* stack grew downward */ +} + +#endif /* STACK_DIRECTION == 0 */ + +/* + An "alloca header" is used to: + (a) chain together all alloca()ed blocks; + (b) keep track of stack depth. + + It is very important that sizeof(header) agree with malloc() + alignment chunk size. The following default should work okay. +*/ + +#ifndef ALIGN_SIZE +#define ALIGN_SIZE sizeof(double) +#endif + +typedef union hdr +{ + char align[ALIGN_SIZE]; /* to force sizeof(header) */ + struct + { + union hdr *next; /* for chaining headers */ + char *deep; /* for stack depth measure */ + } h; +} header; + +/* + alloca( size ) returns a pointer to at least `size' bytes of + storage which will be automatically reclaimed upon exit from + the procedure that called alloca(). Originally, this space + was supposed to be taken from the current stack frame of the + caller, but that method cannot be made to work for some + implementations of C, for example under Gould's UTX/32. +*/ + +static header *last_alloca_header = NULL; /* -> last alloca header */ + +pointer +alloca (size) /* returns pointer to storage */ + unsigned size; /* # bytes to allocate */ +{ + auto char probe; /* probes stack depth: */ + register char *depth = &probe; + +#if STACK_DIRECTION == 0 + if (STACK_DIR == 0) /* unknown growth direction */ + find_stack_direction (); +#endif + + /* Reclaim garbage, defined as all alloca()ed storage that + was allocated from deeper in the stack than currently. */ + { + register header *hp; /* traverses linked list */ + + for (hp = last_alloca_header; hp != NULL;) + if (STACK_DIR > 0 && hp->h.deep > depth + || STACK_DIR < 0 && hp->h.deep < depth) + { + register header *np = hp->h.next; + + xfree ((pointer) hp); /* collect garbage */ + + hp = np; /* -> next header */ + } + else + break; /* rest are not deeper */ + + last_alloca_header = hp; /* -> last valid storage */ + } + + if (size == 0) + return NULL; /* no allocation required */ + + /* Allocate combined header + user data storage. */ + + { + register pointer new = xmalloc (sizeof (header) + size); + /* address of header */ + + ((header *)new)->h.next = last_alloca_header; + ((header *)new)->h.deep = depth; + + last_alloca_header = (header *)new; + + /* User storage begins just after header. */ + + return (pointer)((char *)new + sizeof(header)); + } +} + diff --git a/missing/cbrt.c b/missing/cbrt.c new file mode 100644 index 0000000..54db270 --- /dev/null +++ b/missing/cbrt.c @@ -0,0 +1,10 @@ +#include + +double cbrt(double x) +{ + if (x < 0) + return -pow(-x, 1/3.0); + else + return pow(x, 1/3.0); +} + diff --git a/missing/crypt.c b/missing/crypt.c new file mode 100644 index 0000000..32736bc --- /dev/null +++ b/missing/crypt.c @@ -0,0 +1,992 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Tom Truscott. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)crypt.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#ifdef HAVE_PWD_H +#include +#endif +#include +#ifndef _PASSWORD_EFMT1 +#define _PASSWORD_EFMT1 '_' +#endif + +/* + * UNIX password, and DES, encryption. + * By Tom Truscott, trt@rti.rti.org, + * from algorithms by Robert W. Baldwin and James Gillogly. + * + * References: + * "Mathematical Cryptology for Computer Scientists and Mathematicians," + * by Wayne Patterson, 1987, ISBN 0-8476-7438-X. + * + * "Password Security: A Case History," R. Morris and Ken Thompson, + * Communications of the ACM, vol. 22, pp. 594-597, Nov. 1979. + * + * "DES will be Totally Insecure within Ten Years," M.E. Hellman, + * IEEE Spectrum, vol. 16, pp. 32-39, July 1979. + */ + +/* ===== Configuration ==================== */ + +/* + * define "MUST_ALIGN" if your compiler cannot load/store + * long integers at arbitrary (e.g. odd) memory locations. + * (Either that or never pass unaligned addresses to des_cipher!) + */ +#if !defined(vax) +#define MUST_ALIGN +#endif + +#ifdef CHAR_BITS +#if CHAR_BITS != 8 + #error C_block structure assumes 8 bit characters +#endif +#endif + +/* + * define "LONG_IS_32_BITS" only if sizeof(long)==4. + * This avoids use of bit fields (your compiler may be sloppy with them). + */ +#if !defined(cray) +#define LONG_IS_32_BITS +#endif + +/* + * define "B64" to be the declaration for a 64 bit integer. + * XXX this feature is currently unused, see "endian" comment below. + */ +#if defined(cray) +#define B64 long +#endif +#if defined(convex) +#define B64 long long +#endif + +/* + * define "LARGEDATA" to get faster permutations, by using about 72 kilobytes + * of lookup tables. This speeds up des_setkey() and des_cipher(), but has + * little effect on crypt(). + */ +#if defined(notdef) +#define LARGEDATA +#endif + +int des_setkey(), des_cipher(); + +/* compile with "-DSTATIC=int" when profiling */ +#ifndef STATIC +#define STATIC static +#endif +STATIC void init_des(), init_perm(), permute(); +#ifdef DEBUG +STATIC void prtab(); +#endif + +/* ==================================== */ + +/* + * Cipher-block representation (Bob Baldwin): + * + * DES operates on groups of 64 bits, numbered 1..64 (sigh). One + * representation is to store one bit per byte in an array of bytes. Bit N of + * the NBS spec is stored as the LSB of the Nth byte (index N-1) in the array. + * Another representation stores the 64 bits in 8 bytes, with bits 1..8 in the + * first byte, 9..16 in the second, and so on. The DES spec apparently has + * bit 1 in the MSB of the first byte, but that is particularly noxious so we + * bit-reverse each byte so that bit 1 is the LSB of the first byte, bit 8 is + * the MSB of the first byte. Specifically, the 64-bit input data and key are + * converted to LSB format, and the output 64-bit block is converted back into + * MSB format. + * + * DES operates internally on groups of 32 bits which are expanded to 48 bits + * by permutation E and shrunk back to 32 bits by the S boxes. To speed up + * the computation, the expansion is applied only once, the expanded + * representation is maintained during the encryption, and a compression + * permutation is applied only at the end. To speed up the S-box lookups, + * the 48 bits are maintained as eight 6 bit groups, one per byte, which + * directly feed the eight S-boxes. Within each byte, the 6 bits are the + * most significant ones. The low two bits of each byte are zero. (Thus, + * bit 1 of the 48 bit E expansion is stored as the "4"-valued bit of the + * first byte in the eight byte representation, bit 2 of the 48 bit value is + * the "8"-valued bit, and so on.) In fact, a combined "SPE"-box lookup is + * used, in which the output is the 64 bit result of an S-box lookup which + * has been permuted by P and expanded by E, and is ready for use in the next + * iteration. Two 32-bit wide tables, SPE[0] and SPE[1], are used for this + * lookup. Since each byte in the 48 bit path is a multiple of four, indexed + * lookup of SPE[0] and SPE[1] is simple and fast. The key schedule and + * "salt" are also converted to this 8*(6+2) format. The SPE table size is + * 8*64*8 = 4K bytes. + * + * To speed up bit-parallel operations (such as XOR), the 8 byte + * representation is "union"ed with 32 bit values "i0" and "i1", and, on + * machines which support it, a 64 bit value "b64". This data structure, + * "C_block", has two problems. First, alignment restrictions must be + * honored. Second, the byte-order (e.g. little-endian or big-endian) of + * the architecture becomes visible. + * + * The byte-order problem is unfortunate, since on the one hand it is good + * to have a machine-independent C_block representation (bits 1..8 in the + * first byte, etc.), and on the other hand it is good for the LSB of the + * first byte to be the LSB of i0. We cannot have both these things, so we + * currently use the "little-endian" representation and avoid any multi-byte + * operations that depend on byte order. This largely precludes use of the + * 64-bit datatype since the relative order of i0 and i1 are unknown. It + * also inhibits grouping the SPE table to look up 12 bits at a time. (The + * 12 bits can be stored in a 16-bit field with 3 low-order zeroes and 1 + * high-order zero, providing fast indexing into a 64-bit wide SPE.) On the + * other hand, 64-bit datatypes are currently rare, and a 12-bit SPE lookup + * requires a 128 kilobyte table, so perhaps this is not a big loss. + * + * Permutation representation (Jim Gillogly): + * + * A transformation is defined by its effect on each of the 8 bytes of the + * 64-bit input. For each byte we give a 64-bit output that has the bits in + * the input distributed appropriately. The transformation is then the OR + * of the 8 sets of 64-bits. This uses 8*256*8 = 16K bytes of storage for + * each transformation. Unless LARGEDATA is defined, however, a more compact + * table is used which looks up 16 4-bit "chunks" rather than 8 8-bit chunks. + * The smaller table uses 16*16*8 = 2K bytes for each transformation. This + * is slower but tolerable, particularly for password encryption in which + * the SPE transformation is iterated many times. The small tables total 9K + * bytes, the large tables total 72K bytes. + * + * The transformations used are: + * IE3264: MSB->LSB conversion, initial permutation, and expansion. + * This is done by collecting the 32 even-numbered bits and applying + * a 32->64 bit transformation, and then collecting the 32 odd-numbered + * bits and applying the same transformation. Since there are only + * 32 input bits, the IE3264 transformation table is half the size of + * the usual table. + * CF6464: Compression, final permutation, and LSB->MSB conversion. + * This is done by two trivial 48->32 bit compressions to obtain + * a 64-bit block (the bit numbering is given in the "CIFP" table) + * followed by a 64->64 bit "cleanup" transformation. (It would + * be possible to group the bits in the 64-bit block so that 2 + * identical 32->32 bit transformations could be used instead, + * saving a factor of 4 in space and possibly 2 in time, but + * byte-ordering and other complications rear their ugly head. + * Similar opportunities/problems arise in the key schedule + * transforms.) + * PC1ROT: MSB->LSB, PC1 permutation, rotate, and PC2 permutation. + * This admittedly baroque 64->64 bit transformation is used to + * produce the first code (in 8*(6+2) format) of the key schedule. + * PC2ROT[0]: Inverse PC2 permutation, rotate, and PC2 permutation. + * It would be possible to define 15 more transformations, each + * with a different rotation, to generate the entire key schedule. + * To save space, however, we instead permute each code into the + * next by using a transformation that "undoes" the PC2 permutation, + * rotates the code, and then applies PC2. Unfortunately, PC2 + * transforms 56 bits into 48 bits, dropping 8 bits, so PC2 is not + * invertible. We get around that problem by using a modified PC2 + * which retains the 8 otherwise-lost bits in the unused low-order + * bits of each byte. The low-order bits are cleared when the + * codes are stored into the key schedule. + * PC2ROT[1]: Same as PC2ROT[0], but with two rotations. + * This is faster than applying PC2ROT[0] twice, + * + * The Bell Labs "salt" (Bob Baldwin): + * + * The salting is a simple permutation applied to the 48-bit result of E. + * Specifically, if bit i (1 <= i <= 24) of the salt is set then bits i and + * i+24 of the result are swapped. The salt is thus a 24 bit number, with + * 16777216 possible values. (The original salt was 12 bits and could not + * swap bits 13..24 with 36..48.) + * + * It is possible, but ugly, to warp the SPE table to account for the salt + * permutation. Fortunately, the conditional bit swapping requires only + * about four machine instructions and can be done on-the-fly with about an + * 8% performance penalty. + */ + +typedef union { + unsigned char b[8]; + struct { +#if defined(LONG_IS_32_BITS) + /* long is often faster than a 32-bit bit field */ + long i0; + long i1; +#else + long i0: 32; + long i1: 32; +#endif + } b32; +#if defined(B64) + B64 b64; +#endif +} C_block; + +/* + * Convert twenty-four-bit long in host-order + * to six bits (and 2 low-order zeroes) per char little-endian format. + */ +#define TO_SIX_BIT(rslt, src) { \ + C_block cvt; \ + cvt.b[0] = src; src >>= 6; \ + cvt.b[1] = src; src >>= 6; \ + cvt.b[2] = src; src >>= 6; \ + cvt.b[3] = src; \ + rslt = (cvt.b32.i0 & 0x3f3f3f3fL) << 2; \ + } + +/* + * These macros may someday permit efficient use of 64-bit integers. + */ +#define ZERO(d,d0,d1) d0 = 0, d1 = 0 +#define LOAD(d,d0,d1,bl) d0 = (bl).b32.i0, d1 = (bl).b32.i1 +#define LOADREG(d,d0,d1,s,s0,s1) d0 = s0, d1 = s1 +#define OR(d,d0,d1,bl) d0 |= (bl).b32.i0, d1 |= (bl).b32.i1 +#define STORE(s,s0,s1,bl) (bl).b32.i0 = s0, (bl).b32.i1 = s1 +#define DCL_BLOCK(d,d0,d1) long d0, d1 + +#if defined(LARGEDATA) + /* Waste memory like crazy. Also, do permutations in line */ +#define LGCHUNKBITS 3 +#define CHUNKBITS (1<>4]; OR(D,D0,D1,*tp); p += (1< 0); + STORE(D,D0,D1,*out); +} +#endif /* LARGEDATA */ + + +/* ===== (mostly) Standard DES Tables ==================== */ + +static unsigned char IP[] = { /* initial permutation */ + 58, 50, 42, 34, 26, 18, 10, 2, + 60, 52, 44, 36, 28, 20, 12, 4, + 62, 54, 46, 38, 30, 22, 14, 6, + 64, 56, 48, 40, 32, 24, 16, 8, + 57, 49, 41, 33, 25, 17, 9, 1, + 59, 51, 43, 35, 27, 19, 11, 3, + 61, 53, 45, 37, 29, 21, 13, 5, + 63, 55, 47, 39, 31, 23, 15, 7, +}; + +/* The final permutation is the inverse of IP - no table is necessary */ + +static unsigned char ExpandTr[] = { /* expansion operation */ + 32, 1, 2, 3, 4, 5, + 4, 5, 6, 7, 8, 9, + 8, 9, 10, 11, 12, 13, + 12, 13, 14, 15, 16, 17, + 16, 17, 18, 19, 20, 21, + 20, 21, 22, 23, 24, 25, + 24, 25, 26, 27, 28, 29, + 28, 29, 30, 31, 32, 1, +}; + +static unsigned char PC1[] = { /* permuted choice table 1 */ + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4, +}; + +static unsigned char Rotates[] = { /* PC1 rotation schedule */ + 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, +}; + +/* note: each "row" of PC2 is left-padded with bits that make it invertible */ +static unsigned char PC2[] = { /* permuted choice table 2 */ + 9, 18, 14, 17, 11, 24, 1, 5, + 22, 25, 3, 28, 15, 6, 21, 10, + 35, 38, 23, 19, 12, 4, 26, 8, + 43, 54, 16, 7, 27, 20, 13, 2, + + 0, 0, 41, 52, 31, 37, 47, 55, + 0, 0, 30, 40, 51, 45, 33, 48, + 0, 0, 44, 49, 39, 56, 34, 53, + 0, 0, 46, 42, 50, 36, 29, 32, +}; + +static unsigned char S[8][64] = { /* 48->32 bit substitution tables */ + { + /* S[1] */ + 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, + 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, + 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13, + }, + { + /* S[2] */ + 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, + 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, + 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9, + }, + { + /* S[3] */ + 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, + 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, + 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12, + }, + { + /* S[4] */ + 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, + 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, + 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14, + }, + { + /* S[5] */ + 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, + 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, + 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3, + }, + { + /* S[6] */ + 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, + 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, + 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13, + }, + { + /* S[7] */ + 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, + 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, + 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12, + }, + { + /* S[8] */ + 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, + 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, + 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11, + }, +}; + +static unsigned char P32Tr[] = { /* 32-bit permutation function */ + 16, 7, 20, 21, + 29, 12, 28, 17, + 1, 15, 23, 26, + 5, 18, 31, 10, + 2, 8, 24, 14, + 32, 27, 3, 9, + 19, 13, 30, 6, + 22, 11, 4, 25, +}; + +static unsigned char CIFP[] = { /* compressed/interleaved permutation */ + 1, 2, 3, 4, 17, 18, 19, 20, + 5, 6, 7, 8, 21, 22, 23, 24, + 9, 10, 11, 12, 25, 26, 27, 28, + 13, 14, 15, 16, 29, 30, 31, 32, + + 33, 34, 35, 36, 49, 50, 51, 52, + 37, 38, 39, 40, 53, 54, 55, 56, + 41, 42, 43, 44, 57, 58, 59, 60, + 45, 46, 47, 48, 61, 62, 63, 64, +}; + +static unsigned char itoa64[] = /* 0..63 => ascii-64 */ + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + +/* ===== Tables that are initialized at run time ==================== */ + + +static unsigned char a64toi[128]; /* ascii-64 => 0..63 */ + +/* Initial key schedule permutation */ +static C_block PC1ROT[64/CHUNKBITS][1< final permutation table */ +static C_block CF6464[64/CHUNKBITS][1<= 0; ) { + if ((t = (unsigned char)setting[i]) == '\0') + t = '.'; + encp[i] = t; + num_iter = (num_iter<<6) | a64toi[t]; + } + setting += 4; + encp += 4; + salt_size = 4; + break; + default: + num_iter = 25; + salt_size = 2; + } + + salt = 0; + for (i = salt_size; --i >= 0; ) { + if ((t = (unsigned char)setting[i]) == '\0') + t = '.'; + encp[i] = t; + salt = (salt<<6) | a64toi[t]; + } + encp += salt_size; + if (des_cipher((char *)&constdatablock, (char *)&rsltblock, + salt, num_iter)) + return (NULL); + + /* + * Encode the 64 cipher bits as 11 ascii characters. + */ + i = ((long)((rsltblock.b[0]<<8) | rsltblock.b[1])<<8) | rsltblock.b[2]; + encp[3] = itoa64[i&0x3f]; i >>= 6; + encp[2] = itoa64[i&0x3f]; i >>= 6; + encp[1] = itoa64[i&0x3f]; i >>= 6; + encp[0] = itoa64[i]; encp += 4; + i = ((long)((rsltblock.b[3]<<8) | rsltblock.b[4])<<8) | rsltblock.b[5]; + encp[3] = itoa64[i&0x3f]; i >>= 6; + encp[2] = itoa64[i&0x3f]; i >>= 6; + encp[1] = itoa64[i&0x3f]; i >>= 6; + encp[0] = itoa64[i]; encp += 4; + i = ((long)((rsltblock.b[6])<<8) | rsltblock.b[7])<<2; + encp[2] = itoa64[i&0x3f]; i >>= 6; + encp[1] = itoa64[i&0x3f]; i >>= 6; + encp[0] = itoa64[i]; + + encp[3] = 0; + + return (cryptresult); +} + + +/* + * The Key Schedule, filled in by des_setkey() or setkey(). + */ +#define KS_SIZE 16 +static C_block KS[KS_SIZE]; + +/* + * Set up the key schedule from the key. + */ +int +des_setkey(key) + register const char *key; +{ + register DCL_BLOCK(K, K0, K1); + register C_block *ptabp; + register int i; + static int des_ready = 0; + + if (!des_ready) { + init_des(); + des_ready = 1; + } + + PERM6464(K,K0,K1,(unsigned char *)key,(C_block *)PC1ROT); + key = (char *)&KS[0]; + STORE(K&~0x03030303L, K0&~0x03030303L, K1, *(C_block *)key); + for (i = 1; i < 16; i++) { + key += sizeof(C_block); + STORE(K,K0,K1,*(C_block *)key); + ptabp = (C_block *)PC2ROT[Rotates[i]-1]; + PERM6464(K,K0,K1,(unsigned char *)key,ptabp); + STORE(K&~0x03030303L, K0&~0x03030303L, K1, *(C_block *)key); + } + return (0); +} + +/* + * Encrypt (or decrypt if num_iter < 0) the 8 chars at "in" with abs(num_iter) + * iterations of DES, using the the given 24-bit salt and the pre-computed key + * schedule, and store the resulting 8 chars at "out" (in == out is permitted). + * + * NOTE: the performance of this routine is critically dependent on your + * compiler and machine architecture. + */ +int +des_cipher(in, out, salt, num_iter) + const char *in; + char *out; + long salt; + int num_iter; +{ + /* variables that we want in registers, most important first */ +#if defined(pdp11) + register int j; +#endif + register long L0, L1, R0, R1, k; + register C_block *kp; + register int ks_inc, loop_count; + C_block B; + + L0 = salt; + TO_SIX_BIT(salt, L0); /* convert to 4*(6+2) format */ + +#if defined(vax) || defined(pdp11) + salt = ~salt; /* "x &~ y" is faster than "x & y". */ +#define SALT (~salt) +#else +#define SALT salt +#endif + +#if defined(MUST_ALIGN) + B.b[0] = in[0]; B.b[1] = in[1]; B.b[2] = in[2]; B.b[3] = in[3]; + B.b[4] = in[4]; B.b[5] = in[5]; B.b[6] = in[6]; B.b[7] = in[7]; + LOAD(L,L0,L1,B); +#else + LOAD(L,L0,L1,*(C_block *)in); +#endif + LOADREG(R,R0,R1,L,L0,L1); + L0 &= 0x55555555L; + L1 &= 0x55555555L; + L0 = (L0 << 1) | L1; /* L0 is the even-numbered input bits */ + R0 &= 0xaaaaaaaaL; + R1 = (R1 >> 1) & 0x55555555L; + L1 = R0 | R1; /* L1 is the odd-numbered input bits */ + STORE(L,L0,L1,B); + PERM3264(L,L0,L1,B.b, (C_block *)IE3264); /* even bits */ + PERM3264(R,R0,R1,B.b+4,(C_block *)IE3264); /* odd bits */ + + if (num_iter >= 0) + { /* encryption */ + kp = &KS[0]; + ks_inc = sizeof(*kp); + } + else + { /* decryption */ + num_iter = -num_iter; + kp = &KS[KS_SIZE-1]; + ks_inc = -sizeof(*kp); + } + + while (--num_iter >= 0) { + loop_count = 8; + do { + +#define SPTAB(t, i) (*(long *)((unsigned char *)t + i*(sizeof(long)/4))) +#if defined(gould) + /* use this if B.b[i] is evaluated just once ... */ +#define DOXOR(x,y,i) x^=SPTAB(SPE[0][i],B.b[i]); y^=SPTAB(SPE[1][i],B.b[i]); +#else +#if defined(pdp11) + /* use this if your "long" int indexing is slow */ +#define DOXOR(x,y,i) j=B.b[i]; x^=SPTAB(SPE[0][i],j); y^=SPTAB(SPE[1][i],j); +#else + /* use this if "k" is allocated to a register ... */ +#define DOXOR(x,y,i) k=B.b[i]; x^=SPTAB(SPE[0][i],k); y^=SPTAB(SPE[1][i],k); +#endif +#endif + +#define CRUNCH(p0, p1, q0, q1) \ + k = (q0 ^ q1) & SALT; \ + B.b32.i0 = k ^ q0 ^ kp->b32.i0; \ + B.b32.i1 = k ^ q1 ^ kp->b32.i1; \ + kp = (C_block *)((char *)kp+ks_inc); \ + \ + DOXOR(p0, p1, 0); \ + DOXOR(p0, p1, 1); \ + DOXOR(p0, p1, 2); \ + DOXOR(p0, p1, 3); \ + DOXOR(p0, p1, 4); \ + DOXOR(p0, p1, 5); \ + DOXOR(p0, p1, 6); \ + DOXOR(p0, p1, 7); + + CRUNCH(L0, L1, R0, R1); + CRUNCH(R0, R1, L0, L1); + } while (--loop_count != 0); + kp = (C_block *)((char *)kp-(ks_inc*KS_SIZE)); + + + /* swap L and R */ + L0 ^= R0; L1 ^= R1; + R0 ^= L0; R1 ^= L1; + L0 ^= R0; L1 ^= R1; + } + + /* store the encrypted (or decrypted) result */ + L0 = ((L0 >> 3) & 0x0f0f0f0fL) | ((L1 << 1) & 0xf0f0f0f0L); + L1 = ((R0 >> 3) & 0x0f0f0f0fL) | ((R1 << 1) & 0xf0f0f0f0L); + STORE(L,L0,L1,B); + PERM6464(L,L0,L1,B.b, (C_block *)CF6464); +#if defined(MUST_ALIGN) + STORE(L,L0,L1,B); + out[0] = B.b[0]; out[1] = B.b[1]; out[2] = B.b[2]; out[3] = B.b[3]; + out[4] = B.b[4]; out[5] = B.b[5]; out[6] = B.b[6]; out[7] = B.b[7]; +#else + STORE(L,L0,L1,*(C_block *)out); +#endif + return (0); +} + + +/* + * Initialize various tables. This need only be done once. It could even be + * done at compile time, if the compiler were capable of that sort of thing. + */ +STATIC void +init_des() +{ + register int i, j; + register long k; + register int tableno; + static unsigned char perm[64], tmp32[32]; /* "static" for speed */ + + /* + * table that converts chars "./0-9A-Za-z"to integers 0-63. + */ + for (i = 0; i < 64; i++) + a64toi[itoa64[i]] = i; + + /* + * PC1ROT - bit reverse, then PC1, then Rotate, then PC2. + */ + for (i = 0; i < 64; i++) + perm[i] = 0; + for (i = 0; i < 64; i++) { + if ((k = PC2[i]) == 0) + continue; + k += Rotates[0]-1; + if ((k%28) < Rotates[0]) k -= 28; + k = PC1[k]; + if (k > 0) { + k--; + k = (k|07) - (k&07); + k++; + } + perm[i] = k; + } +#ifdef DEBUG + prtab("pc1tab", perm, 8); +#endif + init_perm(PC1ROT, perm, 8, 8); + + /* + * PC2ROT - PC2 inverse, then Rotate (once or twice), then PC2. + */ + for (j = 0; j < 2; j++) { + unsigned char pc2inv[64]; + for (i = 0; i < 64; i++) + perm[i] = pc2inv[i] = 0; + for (i = 0; i < 64; i++) { + if ((k = PC2[i]) == 0) + continue; + pc2inv[k-1] = i+1; + } + for (i = 0; i < 64; i++) { + if ((k = PC2[i]) == 0) + continue; + k += j; + if ((k%28) <= j) k -= 28; + perm[i] = pc2inv[k]; + } +#ifdef DEBUG + prtab("pc2tab", perm, 8); +#endif + init_perm(PC2ROT[j], perm, 8, 8); + } + + /* + * Bit reverse, then initial permutation, then expansion. + */ + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + k = (j < 2)? 0: IP[ExpandTr[i*6+j-2]-1]; + if (k > 32) + k -= 32; + else if (k > 0) + k--; + if (k > 0) { + k--; + k = (k|07) - (k&07); + k++; + } + perm[i*8+j] = k; + } + } +#ifdef DEBUG + prtab("ietab", perm, 8); +#endif + init_perm(IE3264, perm, 4, 8); + + /* + * Compression, then final permutation, then bit reverse. + */ + for (i = 0; i < 64; i++) { + k = IP[CIFP[i]-1]; + if (k > 0) { + k--; + k = (k|07) - (k&07); + k++; + } + perm[k-1] = i+1; + } +#ifdef DEBUG + prtab("cftab", perm, 8); +#endif + init_perm(CF6464, perm, 8, 8); + + /* + * SPE table + */ + for (i = 0; i < 48; i++) + perm[i] = P32Tr[ExpandTr[i]-1]; + for (tableno = 0; tableno < 8; tableno++) { + for (j = 0; j < 64; j++) { + k = (((j >> 0) &01) << 5)| + (((j >> 1) &01) << 3)| + (((j >> 2) &01) << 2)| + (((j >> 3) &01) << 1)| + (((j >> 4) &01) << 0)| + (((j >> 5) &01) << 4); + k = S[tableno][k]; + k = (((k >> 3)&01) << 0)| + (((k >> 2)&01) << 1)| + (((k >> 1)&01) << 2)| + (((k >> 0)&01) << 3); + for (i = 0; i < 32; i++) + tmp32[i] = 0; + for (i = 0; i < 4; i++) + tmp32[4 * tableno + i] = (k >> i) & 01; + k = 0; + for (i = 24; --i >= 0; ) + k = (k<<1) | tmp32[perm[i]-1]; + TO_SIX_BIT(SPE[0][tableno][j], k); + k = 0; + for (i = 24; --i >= 0; ) + k = (k<<1) | tmp32[perm[i+24]-1]; + TO_SIX_BIT(SPE[1][tableno][j], k); + } + } +} + +/* + * Initialize "perm" to represent transformation "p", which rearranges + * (perhaps with expansion and/or contraction) one packed array of bits + * (of size "chars_in" characters) into another array (of size "chars_out" + * characters). + * + * "perm" must be all-zeroes on entry to this routine. + */ +STATIC void +init_perm(perm, p, chars_in, chars_out) + C_block perm[64/CHUNKBITS][1<>LGCHUNKBITS; /* which chunk this bit comes from */ + l = 1<<(l&(CHUNKBITS-1)); /* mask for this bit */ + for (j = 0; j < (1<>3] |= 1<<(k&07); + } + } +} + +/* + * "setkey" routine (for backwards compatibility) + */ +int +setkey(key) + register const char *key; +{ + register int i, j, k; + C_block keyblock; + + for (i = 0; i < 8; i++) { + k = 0; + for (j = 0; j < 8; j++) { + k <<= 1; + k |= (unsigned char)*key++; + } + keyblock.b[i] = k; + } + return (des_setkey((char *)keyblock.b)); +} + +/* + * "encrypt" routine (for backwards compatibility) + */ +int +encrypt(block, flag) + register char *block; + int flag; +{ + register int i, j, k; + C_block cblock; + + for (i = 0; i < 8; i++) { + k = 0; + for (j = 0; j < 8; j++) { + k <<= 1; + k |= (unsigned char)*block++; + } + cblock.b[i] = k; + } + if (des_cipher((char *)&cblock, (char *)&cblock, 0L, (flag ? -1: 1))) + return (1); + for (i = 7; i >= 0; i--) { + k = cblock.b[i]; + for (j = 7; j >= 0; j--) { + *--block = k&01; + k >>= 1; + } + } + return (0); +} + +#ifdef DEBUG +STATIC void +prtab(s, t, num_rows) + char *s; + unsigned char *t; + int num_rows; +{ + register int i, j; + + (void)printf("%s:\n", s); + for (i = 0; i < num_rows; i++) { + for (j = 0; j < 8; j++) { + (void)printf("%3d", t[i*8+j]); + } + (void)printf("\n"); + } + (void)printf("\n"); +} +#endif diff --git a/missing/dup2.c b/missing/dup2.c new file mode 100644 index 0000000..7554084 --- /dev/null +++ b/missing/dup2.c @@ -0,0 +1,60 @@ +/* + * Public domain dup2() lookalike + * by Curtis Jackson @ AT&T Technologies, Burlington, NC + * electronic address: burl!rcj + * + * dup2 performs the following functions: + * + * Check to make sure that fd1 is a valid open file descriptor. + * Check to see if fd2 is already open; if so, close it. + * Duplicate fd1 onto fd2; checking to make sure fd2 is a valid fd. + * Return fd2 if all went well; return BADEXIT otherwise. + */ + +#include "ruby/config.h" + +#if defined(HAVE_FCNTL) +# include +#endif + +#if !defined(HAVE_FCNTL) || !defined(F_DUPFD) +# include +#endif + +#define BADEXIT -1 + +int +dup2(int fd1, int fd2) +{ +#if defined(HAVE_FCNTL) && defined(F_DUPFD) + if (fd1 != fd2) { +#ifdef F_GETFL + if (fcntl(fd1, F_GETFL) < 0) + return BADEXIT; + if (fcntl(fd2, F_GETFL) >= 0) + close(fd2); +#else + close(fd2); +#endif + if (fcntl(fd1, F_DUPFD, fd2) < 0) + return BADEXIT; + } + return fd2; +#else + extern int errno; + int i, fd, fds[256]; + + if (fd1 == fd2) return 0; + close(fd2); + for (i=0; i<256; i++) { + fd = fds[i] = dup(fd1); + if (fd == fd2) break; + } + while (i) { + close(fds[i--]); + } + if (fd == fd2) return 0; + errno = EMFILE; + return BADEXIT; +#endif +} diff --git a/missing/erf.c b/missing/erf.c new file mode 100644 index 0000000..fe65b9a --- /dev/null +++ b/missing/erf.c @@ -0,0 +1,88 @@ +/* erf.c - public domain implementation of error function erf(3m) + +reference - Haruhiko Okumura: C-gengo niyoru saishin algorithm jiten + (New Algorithm handbook in C language) (Gijyutsu hyouron + sha, Tokyo, 1991) p.227 [in Japanese] */ +#include +#include + +#ifdef _WIN32 +# include +# if !defined __MINGW32__ || defined __NO_ISOCEXT +# ifndef isnan +# define isnan(x) _isnan(x) +# endif +# ifndef isinf +# define isinf(x) (!_finite(x) && !_isnan(x)) +# endif +# ifndef finite +# define finite(x) _finite(x) +# endif +# endif +#endif + +static double q_gamma(double, double, double); + +/* Incomplete gamma function + 1 / Gamma(a) * Int_0^x exp(-t) t^(a-1) dt */ +static double p_gamma(double a, double x, double loggamma_a) +{ + int k; + double result, term, previous; + + if (x >= 1 + a) return 1 - q_gamma(a, x, loggamma_a); + if (x == 0) return 0; + result = term = exp(a * log(x) - x - loggamma_a) / a; + for (k = 1; k < 1000; k++) { + term *= x / (a + k); + previous = result; result += term; + if (result == previous) return result; + } + fprintf(stderr, "erf.c:%d:p_gamma() could not converge.", __LINE__); + return result; +} + +/* Incomplete gamma function + 1 / Gamma(a) * Int_x^inf exp(-t) t^(a-1) dt */ +static double q_gamma(double a, double x, double loggamma_a) +{ + int k; + double result, w, temp, previous; + double la = 1, lb = 1 + x - a; /* Laguerre polynomial */ + + if (x < 1 + a) return 1 - p_gamma(a, x, loggamma_a); + w = exp(a * log(x) - x - loggamma_a); + result = w / lb; + for (k = 2; k < 1000; k++) { + temp = ((k - 1 - a) * (lb - la) + (k + x) * lb) / k; + la = lb; lb = temp; + w *= (k - 1 - a) / k; + temp = w / (la * lb); + previous = result; result += temp; + if (result == previous) return result; + } + fprintf(stderr, "erf.c:%d:q_gamma() could not converge.", __LINE__); + return result; +} + +#define LOG_PI_OVER_2 0.572364942924700087071713675675 /* log_e(PI)/2 */ + +double erf(double x) +{ + if (!finite(x)) { + if (isnan(x)) return x; /* erf(NaN) = NaN */ + return (x>0 ? 1.0 : -1.0); /* erf(+-inf) = +-1.0 */ + } + if (x >= 0) return p_gamma(0.5, x * x, LOG_PI_OVER_2); + else return - p_gamma(0.5, x * x, LOG_PI_OVER_2); +} + +double erfc(double x) +{ + if (!finite(x)) { + if (isnan(x)) return x; /* erfc(NaN) = NaN */ + return (x>0 ? 0.0 : 2.0); /* erfc(+-inf) = 0.0, 2.0 */ + } + if (x >= 0) return q_gamma(0.5, x * x, LOG_PI_OVER_2); + else return 1 + p_gamma(0.5, x * x, LOG_PI_OVER_2); +} diff --git a/missing/file.h b/missing/file.h new file mode 100644 index 0000000..2d491d0 --- /dev/null +++ b/missing/file.h @@ -0,0 +1,22 @@ +/* This is file FILE.H */ + +#ifndef _FILE_H_ +#define _FILE_H_ + +#include + +#ifndef L_SET +# define L_SET 0 /* seek from beginning. */ +# define L_CURR 1 /* seek from current position. */ +# define L_INCR 1 /* ditto. */ +# define L_XTND 2 /* seek from end. */ +#endif + +#ifndef R_OK +# define R_OK 4 /* test whether readable. */ +# define W_OK 2 /* test whether writable. */ +# define X_OK 1 /* test whether execubale. */ +# define F_OK 0 /* test whether exist. */ +#endif + +#endif diff --git a/missing/fileblocks.c b/missing/fileblocks.c new file mode 100644 index 0000000..ccb8d66 --- /dev/null +++ b/missing/fileblocks.c @@ -0,0 +1 @@ +/* dummy for autoconf */ diff --git a/missing/finite.c b/missing/finite.c new file mode 100644 index 0000000..8d0b7af --- /dev/null +++ b/missing/finite.c @@ -0,0 +1,7 @@ +/* public domain rewrite of finite(3) */ + +int +finite(double n) +{ + return !isnan(n) && !isinf(n); +} diff --git a/missing/flock.c b/missing/flock.c new file mode 100644 index 0000000..b02f8bf --- /dev/null +++ b/missing/flock.c @@ -0,0 +1,129 @@ +#include "ruby/config.h" + +#if defined _WIN32 +#elif defined HAVE_FCNTL && defined HAVE_FCNTL_H + +/* These are the flock() constants. Since this sytems doesn't have + flock(), the values of the constants are probably not available. +*/ +# ifndef LOCK_SH +# define LOCK_SH 1 +# endif +# ifndef LOCK_EX +# define LOCK_EX 2 +# endif +# ifndef LOCK_NB +# define LOCK_NB 4 +# endif +# ifndef LOCK_UN +# define LOCK_UN 8 +# endif + +#include +#include +#include + +int +flock(int fd, int operation) +{ + struct flock lock; + + switch (operation & ~LOCK_NB) { + case LOCK_SH: + lock.l_type = F_RDLCK; + break; + case LOCK_EX: + lock.l_type = F_WRLCK; + break; + case LOCK_UN: + lock.l_type = F_UNLCK; + break; + default: + errno = EINVAL; + return -1; + } + lock.l_whence = SEEK_SET; + lock.l_start = lock.l_len = 0L; + + return fcntl(fd, (operation & LOCK_NB) ? F_SETLK : F_SETLKW, &lock); +} + +#elif defined(HAVE_LOCKF) + +#include +#include + +/* Emulate flock() with lockf() or fcntl(). This is just to increase + portability of scripts. The calls might not be completely + interchangeable. What's really needed is a good file + locking module. +*/ + +# ifndef F_ULOCK +# define F_ULOCK 0 /* Unlock a previously locked region */ +# endif +# ifndef F_LOCK +# define F_LOCK 1 /* Lock a region for exclusive use */ +# endif +# ifndef F_TLOCK +# define F_TLOCK 2 /* Test and lock a region for exclusive use */ +# endif +# ifndef F_TEST +# define F_TEST 3 /* Test a region for other processes locks */ +# endif + +/* These are the flock() constants. Since this sytems doesn't have + flock(), the values of the constants are probably not available. +*/ +# ifndef LOCK_SH +# define LOCK_SH 1 +# endif +# ifndef LOCK_EX +# define LOCK_EX 2 +# endif +# ifndef LOCK_NB +# define LOCK_NB 4 +# endif +# ifndef LOCK_UN +# define LOCK_UN 8 +# endif + +int +flock(int fd, int operation) +{ + switch (operation) { + + /* LOCK_SH - get a shared lock */ + case LOCK_SH: + rb_notimplement(); + return -1; + /* LOCK_EX - get an exclusive lock */ + case LOCK_EX: + return lockf (fd, F_LOCK, 0); + + /* LOCK_SH|LOCK_NB - get a non-blocking shared lock */ + case LOCK_SH|LOCK_NB: + rb_notimplement(); + return -1; + /* LOCK_EX|LOCK_NB - get a non-blocking exclusive lock */ + case LOCK_EX|LOCK_NB: + return lockf (fd, F_TLOCK, 0); + + /* LOCK_UN - unlock */ + case LOCK_UN: + return lockf (fd, F_ULOCK, 0); + + /* Default - can't decipher operation */ + default: + errno = EINVAL; + return -1; + } +} +#else +int +flock(int fd, int operation) +{ + rb_notimplement(); + return -1; +} +#endif diff --git a/missing/hypot.c b/missing/hypot.c new file mode 100644 index 0000000..5a66355 --- /dev/null +++ b/missing/hypot.c @@ -0,0 +1,16 @@ +/* public domain rewrite of hypot */ + +#include + +double hypot(double x, double y) +{ + if (x < 0) x = -x; + if (y < 0) y = -y; + if (x < y) { + double tmp = x; + x = y; y = tmp; + } + if (y == 0.0) return x; + y /= x; + return x * sqrt(1.0+y*y); +} diff --git a/missing/isinf.c b/missing/isinf.c new file mode 100644 index 0000000..4aef51b --- /dev/null +++ b/missing/isinf.c @@ -0,0 +1,69 @@ +/* public domain rewrite of isinf(3) */ + +#ifdef __osf__ + +#define _IEEE 1 +#include + +int +isinf(double n) +{ + if (IsNANorINF(n) && IsINF(n)) { + return 1; + } + else { + return 0; + } +} + +#else + +#include "ruby/config.h" + +#if defined(HAVE_FINITE) && defined(HAVE_ISNAN) + +#include +#ifdef HAVE_IEEEFP_H +#include +#endif + +/* + * isinf may be provided only as a macro. + * ex. HP-UX, Solaris 10 + * http://www.gnu.org/software/automake/manual/autoconf/Function-Portability.html + */ +#ifndef isinf +int +isinf(double n) +{ + return (!finite(n) && !isnan(n)); +} +#endif + +#else + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +static double zero(void) { return 0.0; } +static double one (void) { return 1.0; } +static double inf (void) { return one() / zero(); } + +int +isinf(double n) +{ + static double pinf = 0.0; + static double ninf = 0.0; + + if (pinf == 0.0) { + pinf = inf(); + ninf = -pinf; + } + return memcmp(&n, &pinf, sizeof n) == 0 + || memcmp(&n, &ninf, sizeof n) == 0; +} +#endif +#endif diff --git a/missing/isnan.c b/missing/isnan.c new file mode 100644 index 0000000..a873397 --- /dev/null +++ b/missing/isnan.c @@ -0,0 +1,15 @@ +/* public domain rewrite of isnan(3) */ + +static int double_ne(double n1, double n2); + +int +isnan(double n) +{ + return double_ne(n, n); +} + +static int +double_ne(double n1, double n2) +{ + return n1 != n2; +} diff --git a/missing/langinfo.c b/missing/langinfo.c new file mode 100644 index 0000000..a76000b --- /dev/null +++ b/missing/langinfo.c @@ -0,0 +1,146 @@ +/* -*- c-file-style: "gnu" -*- */ +/* + * This is a quick-and-dirty emulator of the nl_langinfo(CODESET) + * function defined in the Single Unix Specification for those systems + * (FreeBSD, etc.) that don't have one yet. It behaves as if it had + * been called after setlocale(LC_CTYPE, ""), that is it looks at + * the locale environment variables. + * + * http://www.opengroup.org/onlinepubs/7908799/xsh/langinfo.h.html + * + * Please extend it as needed and suggest improvements to the author. + * This emulator will hopefully become redundant soon as + * nl_langinfo(CODESET) becomes more widely implemented. + * + * Since the proposed Li18nux encoding name registry is still not mature, + * the output follows the MIME registry where possible: + * + * http://www.iana.org/assignments/character-sets + * + * A possible autoconf test for the availability of nl_langinfo(CODESET) + * can be found in + * + * http://www.cl.cam.ac.uk/~mgk25/unicode.html#activate + * + * Markus.Kuhn@cl.cam.ac.uk -- 2002-03-11 + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + * + * Latest version: + * + * http://www.cl.cam.ac.uk/~mgk25/ucs/langinfo.c + */ + +#include +#include +#if defined _WIN32 || defined __CYGWIN__ +#include +#if defined _WIN32 +#define strncasecmp strnicmp +#endif +#endif +#ifdef HAVE_LANGINFO_H +#include "langinfo.h" +#endif + +#define C_CODESET "US-ASCII" /* Return this as the encoding of the + * C/POSIX locale. Could as well one day + * become "UTF-8". */ + +#if defined _WIN32 || defined __CYGWIN__ +#define JA_CODESET "Windows-31J" +#else +#define JA_CODESET "EUC-JP" +#endif + +#define digit(x) ((x) >= '0' && (x) <= '9') +#define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0) + +static char buf[16]; + +const char * +nl_langinfo_codeset(void) +{ + const char *l, *p; + + if (((l = getenv("LC_ALL")) && *l) || + ((l = getenv("LC_CTYPE")) && *l) || + ((l = getenv("LANG")) && *l)) { + /* check standardized locales */ + if (!strcmp(l, "C") || !strcmp(l, "POSIX")) + return C_CODESET; + /* check for encoding name fragment */ + p = strchr(l, '.'); + if (!p++) p = l; + if (strstart(p, "UTF")) + return "UTF-8"; + if (strstart(p, "8859-")) { + if (digit(p[5])) { + p += 5; + memcpy(buf, "ISO-8859-\0\0", 12); + buf[9] = *p++; + if (digit(*p)) buf[10] = *p++; + return buf; + } + } + if (strstart(p, "KOI8-R")) return "KOI8-R"; + if (strstart(p, "KOI8-U")) return "KOI8-U"; + if (strstart(p, "620")) return "TIS-620"; + if (strstart(p, "2312")) return "GB2312"; + if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */ + if (strstart(p, "BIG5")) return "Big5"; + if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */ + if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */ + if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J"; + /* check for conclusive modifier */ + if (strstart(p, "euro")) return "ISO-8859-15"; + /* check for language (and perhaps country) codes */ + if (strstart(l, "zh_TW")) return "Big5"; + if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */ + if (strstart(l, "zh")) return "GB2312"; + if (strstart(l, "ja")) return JA_CODESET; + if (strstart(l, "ko")) return "EUC-KR"; + if (strstart(l, "ru")) return "KOI8-R"; + if (strstart(l, "uk")) return "KOI8-U"; + if (strstart(l, "pl") || strstart(l, "hr") || + strstart(l, "hu") || strstart(l, "cs") || + strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2"; + if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3"; + if (strstart(l, "el")) return "ISO-8859-7"; + if (strstart(l, "he")) return "ISO-8859-8"; + if (strstart(l, "tr")) return "ISO-8859-9"; + if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */ + if (strstart(l, "lt")) return "ISO-8859-13"; + if (strstart(l, "cy")) return "ISO-8859-14"; + if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */ + if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8"; + /* Send me further rules if you like, but don't forget that we are + * *only* interested in locale naming conventions on platforms + * that do not already provide an nl_langinfo(CODESET) implementation. */ + } + return NULL; +} + +#ifdef HAVE_LANGINFO_H +char *nl_langinfo(nl_item item) +{ + const char *codeset; + if (item != CODESET) + return NULL; + codeset = nl_langinfo_codeset(); + if (!codeset) codeset = C_CODESET; + return (char *)codeset; +} +#endif + +/* For a demo, compile with "gcc -W -Wall -o langinfo -D TEST langinfo.c" */ + +#ifdef TEST +#include +int main() +{ + printf("%s\n", nl_langinfo(CODESET)); + return 0; +} +#endif diff --git a/missing/lgamma_r.c b/missing/lgamma_r.c new file mode 100644 index 0000000..ba8535b --- /dev/null +++ b/missing/lgamma_r.c @@ -0,0 +1,68 @@ +/* lgamma_r.c - public domain implementation of function lgamma_r(3m) + +lgamma_r() is based on gamma(). modified by Tanaka Akira. + +reference - Haruhiko Okumura: C-gengo niyoru saishin algorithm jiten + (New Algorithm handbook in C language) (Gijyutsu hyouron + sha, Tokyo, 1991) [in Japanese] + http://oku.edu.mie-u.ac.jp/~okumura/algo/ +*/ + +/*********************************************************** + gamma.c -- Gamma function +***********************************************************/ +#include +#include +#define PI 3.14159265358979324 /* $\pi$ */ +#define LOG_2PI 1.83787706640934548 /* $\log 2\pi$ */ +#define LOG_PI 1.14472988584940017 /* $\log_e \pi$ */ +#define N 8 + +#define B0 1 /* Bernoulli numbers */ +#define B1 (-1.0 / 2.0) +#define B2 ( 1.0 / 6.0) +#define B4 (-1.0 / 30.0) +#define B6 ( 1.0 / 42.0) +#define B8 (-1.0 / 30.0) +#define B10 ( 5.0 / 66.0) +#define B12 (-691.0 / 2730.0) +#define B14 ( 7.0 / 6.0) +#define B16 (-3617.0 / 510.0) + +static double +loggamma(double x) /* the natural logarithm of the Gamma function. */ +{ + double v, w; + + if (x == 1.0 || x == 2.0) return 0.0; + + v = 1; + while (x < N) { v *= x; x++; } + w = 1 / (x * x); + return ((((((((B16 / (16 * 15)) * w + (B14 / (14 * 13))) * w + + (B12 / (12 * 11))) * w + (B10 / (10 * 9))) * w + + (B8 / ( 8 * 7))) * w + (B6 / ( 6 * 5))) * w + + (B4 / ( 4 * 3))) * w + (B2 / ( 2 * 1))) / x + + 0.5 * LOG_2PI - log(v) - x + (x - 0.5) * log(x); +} + +/* the natural logarithm of the absolute value of the Gamma function */ +double +lgamma_r(double x, int *signp) +{ + if (x <= 0) { + double i, f, s; + f = modf(-x, &i); + if (f == 0.0) { /* pole error */ + *signp = 1; + errno = ERANGE; + return HUGE_VAL; + } + *signp = (fmod(i, 2.0) != 0.0) ? 1 : -1; + s = sin(PI * f); + if (s < 0) s = -s; + return LOG_PI - log(s) - loggamma(1 - x); + } + *signp = 1; + return loggamma(x); +} diff --git a/missing/memcmp.c b/missing/memcmp.c new file mode 100644 index 0000000..5b3ad38 --- /dev/null +++ b/missing/memcmp.c @@ -0,0 +1,17 @@ +/* public domain rewrite of memcmp(3) */ + +#include + +int +memcmp(const void *s1, const void *s2, size_t len) +{ + register unsigned char *a = (unsigned char*)s1; + register unsigned char *b = (unsigned char*)s2; + register int tmp; + + for (; len; --len) { + if (tmp = *a++ - *b++) + return tmp; + } + return 0; +} diff --git a/missing/memmove.c b/missing/memmove.c new file mode 100644 index 0000000..19c5f18 --- /dev/null +++ b/missing/memmove.c @@ -0,0 +1,21 @@ +/* public domain rewrite of memcmp(3) */ + +#include + +void * +memmove(void *d, const void *s, size_t n) +{ + char *dst = (char *)d; + const char *src = (const char *)s; + + if (src < dst) { + src += n; + dst += n; + for (; n; --n) + *--dst = *--src; + } + else if (dst < src) + for (; n; --n) + *dst++ = *src++; + return d; +} diff --git a/missing/os2.c b/missing/os2.c new file mode 100644 index 0000000..3448cdc --- /dev/null +++ b/missing/os2.c @@ -0,0 +1,138 @@ +/* os/2 compatibility functions -- follows Ruby's license */ + +#include "ruby.h" +#include +#include +#include +#include +#include +#include + +#define INCL_DOS +#include + +int +chown(char *path, int owner, int group) +{ + return 0; +} + +#if 0 +int +link(char *from, char *to) +{ + return -1; +} +#endif + +#if defined(EMX_REPLACE_GETCWD) && (EMX_REPLACE_GETCWD) \ + || defined(EMX_REPLACE_CHDIR) && (EMX_REPLACE_CHDIR) +#include + +#if defined(EMX_REPLACE_GETCWD) && (EMX_REPLACE_GETCWD) +/* to handle the drive letter and DBCS characters within a given path */ +char * +getcwd(char *path, size_t len) +{ + return _getcwd2(path, (int)len); +} +#endif + +#if defined(EMX_REPLACE_CHDIR) && (EMX_REPLACE_CHDIR) +/* to handle the drive letter and DBCS characters within a given path */ +int +chdir(__const__ char *path) +{ + return _chdir2(path); +} +#endif +#endif + +typedef char* CHARP; + +int +do_spawn(cmd) +char *cmd; +{ + register char **a; + register char *s; + char **argv; + char *shell, *sw, *cmd2; + int status; + + if ((shell = getenv("RUBYSHELL")) != NULL && *shell != '\0') { + s = shell; + do + *s = isupper(*s) ? tolower(*s) : *s; + while (*++s); + if (strstr(shell, "cmd") || strstr(shell, "4os2")) + sw = "/c"; + else + sw = "-c"; + } else if ((shell = getenv("SHELL")) != NULL && *shell != '\0') { + s = shell; + do + *s = isupper(*s) ? tolower(*s) : *s; + while (*++s); + if (strstr(shell, "cmd") || strstr(shell, "4os2")) + sw = "/c"; + else + sw = "-c"; + } else if ((shell = getenv("COMSPEC")) != NULL && *shell != '\0') { + s = shell; + do + *s = isupper(*s) ? tolower(*s) : *s; + while (*++s); + if (strstr(shell, "cmd") || strstr(shell, "4os2")) + sw = "/c"; + else + sw = "-c"; + } + /* see if there are shell metacharacters in it */ + /*SUPPRESS 530*/ + /* for (s = cmd; *s && isalpha(*s); s++) ; + if (*s == '=') + goto doshell; */ + for (s = cmd; *s; s++) { + if (*sw == '-' && *s != ' ' && + !isalpha(*s) && index("$&*(){}[]'\";\\|?<>~`\n",*s)) { + if (*s == '\n' && !s[1]) { + *s = '\0'; + break; + } + goto doshell; + } else if (*sw == '/' && *s != ' ' && + !isalpha(*s) && index("^()<>|&\n",*s)) { + if (*s == '\n' && !s[1]) { + *s = '\0'; + break; + } + doshell: + status = spawnlp(P_WAIT,shell,shell,sw,cmd,(char*)NULL); + return status; + } + } + argv = ALLOC_N(CHARP,(strlen(cmd) / 2 + 2)); + cmd2 = ALLOC_N(char, (strlen(cmd) + 1)); + strcpy(cmd2, cmd); + a = argv; + for (s = cmd2; *s;) { + while (*s && isspace(*s)) s++; + if (*s) + *(a++) = s; + while (*s && !isspace(*s)) s++; + if (*s) + *s++ = '\0'; + } + *a = NULL; + if (argv[0]) { + if ((status = spawnvp(P_WAIT, argv[0], argv)) == -1) { + free(argv); + free(cmd2); + return -1; + } + } + free(cmd2); + free(argv); + return status; +} diff --git a/missing/strchr.c b/missing/strchr.c new file mode 100644 index 0000000..bebd7ba --- /dev/null +++ b/missing/strchr.c @@ -0,0 +1,28 @@ +/* public domain rewrite of strchr(3) and strrchr(3) */ + +char * +strchr(const char *s, int c) +{ + if (c == 0) return (char *)s + strlen(s); + while (*s) { + if (*s == c) + return (char *)s; + s++; + } + return 0; +} + +char * +strrchr(const char *s, int c) +{ + const char *save; + + if (c == 0) return (char *)s + strlen(s); + save = 0; + while (*s) { + if (*s == c) + save = s; + s++; + } + return (char *)save; +} diff --git a/missing/strerror.c b/missing/strerror.c new file mode 100644 index 0000000..023935a --- /dev/null +++ b/missing/strerror.c @@ -0,0 +1,16 @@ +/* public domain rewrite of strerror(3) */ + +extern int sys_nerr; +extern char *sys_errlist[]; + +static char msg[50]; + +char * +strerror(int error) +{ + if (error <= sys_nerr && error > 0) { + return sys_errlist[error]; + } + sprintf(msg, "Unknown error (%d)", error); + return msg; +} diff --git a/missing/strlcat.c b/missing/strlcat.c new file mode 100644 index 0000000..b309648 --- /dev/null +++ b/missing/strlcat.c @@ -0,0 +1,73 @@ +/* $OpenBSD: strlcat.c,v 1.8 2001/05/13 15:40:15 deraadt Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char *rcsid = "$OpenBSD: strlcat.c,v 1.8 2001/05/13 15:40:15 deraadt Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include +#include + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(dst, src, siz) + char *dst; + const char *src; + size_t siz; +{ + register char *d = dst; + register const char *s = src; + register size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} diff --git a/missing/strlcpy.c b/missing/strlcpy.c new file mode 100644 index 0000000..5f58696 --- /dev/null +++ b/missing/strlcpy.c @@ -0,0 +1,69 @@ +/* $OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char *rcsid = "$OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include +#include + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(dst, src, siz) + char *dst; + const char *src; + size_t siz; +{ + register char *d = dst; + register const char *s = src; + register size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0 && --n != 0) { + do { + if ((*d++ = *s++) == 0) + break; + } while (--n != 0); + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} diff --git a/missing/strstr.c b/missing/strstr.c new file mode 100644 index 0000000..2e9c282 --- /dev/null +++ b/missing/strstr.c @@ -0,0 +1,25 @@ +/* public domain rewrite of strstr(3) */ + +char * +strstr(const char *haystack, const char *needle) +{ + const char *hend; + const char *a, *b; + + if (*needle == 0) return (char *)haystack; + hend = haystack + strlen(haystack) - strlen(needle) + 1; + while (haystack < hend) { + if (*haystack == *needle) { + a = haystack; + b = needle; + for (;;) { + if (*b == 0) return (char *)haystack; + if (*a++ != *b++) { + break; + } + } + } + haystack++; + } + return 0; +} diff --git a/missing/strtol.c b/missing/strtol.c new file mode 100644 index 0000000..da6636f --- /dev/null +++ b/missing/strtol.c @@ -0,0 +1,26 @@ +/* public domain rewrite of strtol(3) */ + +#include + +long +strtol(const char *nptr, char **endptr, int base) +{ + long result; + const char *p = nptr; + + while (isspace(*p)) { + p++; + } + if (*p == '-') { + p++; + result = -strtoul(p, endptr, base); + } + else { + if (*p == '+') p++; + result = strtoul(p, endptr, base); + } + if (endptr != 0 && *endptr == p) { + *endptr = (char *)nptr; + } + return result; +} diff --git a/missing/tgamma.c b/missing/tgamma.c new file mode 100644 index 0000000..5e306fb --- /dev/null +++ b/missing/tgamma.c @@ -0,0 +1,92 @@ +/* tgamma.c - public domain implementation of function tgamma(3m) + +reference - Haruhiko Okumura: C-gengo niyoru saishin algorithm jiten + (New Algorithm handbook in C language) (Gijyutsu hyouron + sha, Tokyo, 1991) [in Japanese] + http://oku.edu.mie-u.ac.jp/~okumura/algo/ +*/ + +/*********************************************************** + gamma.c -- Gamma function +***********************************************************/ +#include "ruby/config.h" +#include +#include + +#ifdef HAVE_LGAMMA_R + +double tgamma(double x) +{ + int sign; + double d; + if (x == 0.0) { /* Pole Error */ + errno = ERANGE; + return 1/x < 0 ? -HUGE_VAL : HUGE_VAL; + } + if (x < 0) { + static double zero = 0.0; + double i, f; + f = modf(-x, &i); + if (f == 0.0) { /* Domain Error */ + errno = EDOM; + return zero/zero; + } + } + d = lgamma_r(x, &sign); + return sign * exp(d); +} + +#else + +#include +#define PI 3.14159265358979324 /* $\pi$ */ +#define LOG_2PI 1.83787706640934548 /* $\log 2\pi$ */ +#define N 8 + +#define B0 1 /* Bernoulli numbers */ +#define B1 (-1.0 / 2.0) +#define B2 ( 1.0 / 6.0) +#define B4 (-1.0 / 30.0) +#define B6 ( 1.0 / 42.0) +#define B8 (-1.0 / 30.0) +#define B10 ( 5.0 / 66.0) +#define B12 (-691.0 / 2730.0) +#define B14 ( 7.0 / 6.0) +#define B16 (-3617.0 / 510.0) + +static double +loggamma(double x) /* the natural logarithm of the Gamma function. */ +{ + double v, w; + + v = 1; + while (x < N) { v *= x; x++; } + w = 1 / (x * x); + return ((((((((B16 / (16 * 15)) * w + (B14 / (14 * 13))) * w + + (B12 / (12 * 11))) * w + (B10 / (10 * 9))) * w + + (B8 / ( 8 * 7))) * w + (B6 / ( 6 * 5))) * w + + (B4 / ( 4 * 3))) * w + (B2 / ( 2 * 1))) / x + + 0.5 * LOG_2PI - log(v) - x + (x - 0.5) * log(x); +} + +double tgamma(double x) /* Gamma function */ +{ + if (x == 0.0) { /* Pole Error */ + errno = ERANGE; + return 1/x < 0 ? -HUGE_VAL : HUGE_VAL; + } + if (x < 0) { + int sign; + static double zero = 0.0; + double i, f; + f = modf(-x, &i); + if (f == 0.0) { /* Domain Error */ + errno = EDOM; + return zero/zero; + } + sign = (fmod(i, 2.0) != 0.0) ? 1 : -1; + return sign * PI / (sin(PI * f) * exp(loggamma(1 - x))); + } + return exp(loggamma(x)); +} +#endif diff --git a/missing/vsnprintf.c b/missing/vsnprintf.c new file mode 100644 index 0000000..9d0e626 --- /dev/null +++ b/missing/vsnprintf.c @@ -0,0 +1,1217 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * IMPORTANT NOTE: + * -------------- + * From ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change + * paragraph 3 above is now null and void. + */ + +/* SNPRINTF.C + * fjc 7-31-97 Modified by Mib Software to be a standalone snprintf.c module. + * http://www.mibsoftware.com + * Mib Software does not warrant this software any differently than the + * University of California, Berkeley as described above. All warranties + * are disclaimed. Use this software at your own risk. + * + * All code referencing FILE * functions was eliminated, since it could + * never be called. All header files and necessary files are collapsed + * into one file, internal functions are declared static. This should + * allow inclusion into libraries with less chance of namespace collisions. + * + * snprintf should be the only externally visible item. + * + * As of 7-31-97 FLOATING_POINT is NOT provided. The code is somewhat + * non-portable, so it is disabled. + */ + +/* Define FLOATING_POINT to get floating point. */ +/* +#define FLOATING_POINT +*/ + +#include +#define u_long unsigned long +#define u_short unsigned short +#define u_int unsigned int + +#if !defined(HAVE_STDARG_PROTOTYPES) +#if defined(__STDC__) +#define HAVE_STDARG_PROTOTYPES 1 +#endif +#endif + +#undef __P +#if defined(HAVE_STDARG_PROTOTYPES) +# include +# if !defined(__P) +# define __P(x) x +# endif +#else +# define __P(x) () +# if !defined(const) +# define const +# endif +# include +#endif +#ifndef _BSD_VA_LIST_ +#define _BSD_VA_LIST_ va_list +#endif + +#ifdef __STDC__ +# include +#else +# ifndef LONG_MAX +# ifdef HAVE_LIMITS_H +# include +# else + /* assuming 32bit(2's compliment) long */ +# define LONG_MAX 2147483647 +# endif +# endif +#endif + +#if defined(__hpux) && !defined(__GNUC__) && !defined(__STDC__) +#define const +#endif + +#if defined(sgi) +#undef __const +#define __const +#endif /* People who don't like const sys_error */ + +#include +#if defined(__hpux) && !defined(__GNUC__) || defined(__DECC) +#include +#endif + +#if !defined(__CYGWIN32__) && defined(__hpux) && !defined(__GNUC__) +#include +#endif + +#ifndef NULL +#define NULL 0 +#endif + +/* + * NB: to fit things in six character monocase externals, the stdio + * code uses the prefix `__s' for stdio objects, typically followed + * by a three-character attempt at a mnemonic. + */ + +/* stdio buffers */ +struct __sbuf { + unsigned char *_base; + size_t _size; +}; + + +/* + * stdio state variables. + * + * The following always hold: + * + * if (_flags&(__SLBF|__SWR)) == (__SLBF|__SWR), + * _lbfsize is -_bf._size, else _lbfsize is 0 + * if _flags&__SRD, _w is 0 + * if _flags&__SWR, _r is 0 + * + * This ensures that the getc and putc macros (or inline functions) never + * try to write or read from a file that is in `read' or `write' mode. + * (Moreover, they can, and do, automatically switch from read mode to + * write mode, and back, on "r+" and "w+" files.) + * + * _lbfsize is used only to make the inline line-buffered output stream + * code as compact as possible. + * + * _ub, _up, and _ur are used when ungetc() pushes back more characters + * than fit in the current _bf, or when ungetc() pushes back a character + * that does not match the previous one in _bf. When this happens, + * _ub._base becomes non-nil (i.e., a stream has ungetc() data iff + * _ub._base!=NULL) and _up and _ur save the current values of _p and _r. + * + * NB: see WARNING above before changing the layout of this structure! + */ +typedef struct __sFILE { + unsigned char *_p; /* current position in (some) buffer */ +#if 0 + size_t _r; /* read space left for getc() */ +#endif + size_t _w; /* write space left for putc() */ + short _flags; /* flags, below; this FILE is free if 0 */ + short _file; /* fileno, if Unix descriptor, else -1 */ + struct __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */ + size_t _lbfsize; /* 0 or -_bf._size, for inline putc */ + int (*vwrite)(/* struct __sFILE*, struct __suio * */); +} FILE; + + +#define __SLBF 0x0001 /* line buffered */ +#define __SNBF 0x0002 /* unbuffered */ +#define __SRD 0x0004 /* OK to read */ +#define __SWR 0x0008 /* OK to write */ + /* RD and WR are never simultaneously asserted */ +#define __SRW 0x0010 /* open for reading & writing */ +#define __SEOF 0x0020 /* found EOF */ +#define __SERR 0x0040 /* found error */ +#define __SMBF 0x0080 /* _buf is from malloc */ +#define __SAPP 0x0100 /* fdopen()ed in append mode */ +#define __SSTR 0x0200 /* this is an sprintf/snprintf string */ +#define __SOPT 0x0400 /* do fseek() optimisation */ +#define __SNPT 0x0800 /* do not do fseek() optimisation */ +#define __SOFF 0x1000 /* set iff _offset is in fact correct */ +#define __SMOD 0x2000 /* true => fgetln modified _p text */ + + +#define EOF (-1) + + +#define __sfeof(p) (((p)->_flags & __SEOF) != 0) +#define __sferror(p) (((p)->_flags & __SERR) != 0) +#define __sclearerr(p) ((void)((p)->_flags &= ~(__SERR|__SEOF))) +#define __sfileno(p) ((p)->_file) + +#undef feof +#undef ferror +#undef clearerr +#define feof(p) __sfeof(p) +#define ferror(p) __sferror(p) +#define clearerr(p) __sclearerr(p) + +#ifndef _ANSI_SOURCE +#define fileno(p) __sfileno(p) +#endif + + +/* + * I/O descriptors for __sfvwrite(). + */ +struct __siov { + const void *iov_base; + size_t iov_len; +}; +struct __suio { + struct __siov *uio_iov; + int uio_iovcnt; + int uio_resid; +}; + +#if !defined(HAVE_VSNPRINTF) || !defined(HAVE_SNPRINTF) +/* + * Write some memory regions. Return zero on success, EOF on error. + * + * This routine is large and unsightly, but most of the ugliness due + * to the three different kinds of output buffering is handled here. + */ +static int BSD__sfvwrite(fp, uio) + register FILE *fp; + register struct __suio *uio; +{ + register size_t len; + register const char *p; + register struct __siov *iov; + register int w; + + if ((len = uio->uio_resid) == 0) + return (0); +#ifndef __hpux +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif +#define COPY(n) (void)memcpy((void *)fp->_p, (void *)p, (size_t)(n)) + + iov = uio->uio_iov; + p = iov->iov_base; + len = iov->iov_len; + iov++; +#define GETIOV(extra_work) \ + while (len == 0) { \ + extra_work; \ + p = iov->iov_base; \ + len = iov->iov_len; \ + iov++; \ + } + if (fp->_flags & __SNBF) { + /* fjc 7-31-97 Will never happen. We are working with + strings only + */ + } else if ((fp->_flags & __SLBF) == 0) { + /* + * Fully buffered: fill partially full buffer, if any, + * and then flush. If there is no partial buffer, write + * one _bf._size byte chunk directly (without copying). + * + * String output is a special case: write as many bytes + * as fit, but pretend we wrote everything. This makes + * snprintf() return the number of bytes needed, rather + * than the number used, and avoids its write function + * (so that the write function can be invalid). + */ + do { + GETIOV(;); + w = fp->_w; + if (fp->_flags & __SSTR) { + if (len < w) + w = len; + COPY(w); /* copy MIN(fp->_w,len), */ + fp->_w -= w; + fp->_p += w; + w = len; /* but pretend copied all */ + } else { + /* fjc 7-31-97 Will never happen. We are working with + strings only + */ + } + p += w; + len -= w; + } while ((uio->uio_resid -= w) != 0); + } else { + /* fjc 7-31-97 Will never happen. We are working with + strings only + */ + } + return (0); +} +#endif + +/* + * Actual printf innards. + * + * This code is large and complicated... + */ + +/* + * Flush out all the vectors defined by the given uio, + * then reset it so that it can be reused. + */ +static int +BSD__sprint(FILE *fp, register struct __suio *uio) +{ + register int err; + + if (uio->uio_resid == 0) { + uio->uio_iovcnt = 0; + return (0); + } + err = (*fp->vwrite)(fp, uio); + uio->uio_resid = 0; + uio->uio_iovcnt = 0; + return (err); +} + + +/* + * Helper function for `fprintf to unbuffered unix file': creates a + * temporary buffer. We only work on write-only files; this avoids + * worries about ungetc buffers and so forth. + */ +static int +BSD__sbprintf(register FILE *fp, const char *fmt, va_list ap) +{ +/* We don't support files. */ + return 0; +} + + +/* + * Macros for converting digits to letters and vice versa + */ +#define to_digit(c) ((c) - '0') +#define is_digit(c) ((unsigned)to_digit(c) <= 9) +#define to_char(n) ((n) + '0') + +#ifdef _HAVE_SANE_QUAD_ +/* + * Convert an unsigned long long to ASCII for printf purposes, returning + * a pointer to the first character of the string representation. + * Octal numbers can be forced to have a leading zero; hex numbers + * use the given digits. + */ +static char * +BSD__uqtoa(register u_quad_t val, char *endp, int base, int octzero, const char *xdigs) +{ + register char *cp = endp; + register long sval; + + /* + * Handle the three cases separately, in the hope of getting + * better/faster code. + */ + switch (base) { + case 10: + if (val < 10) { /* many numbers are 1 digit */ + *--cp = to_char(val); + return (cp); + } + /* + * On many machines, unsigned arithmetic is harder than + * signed arithmetic, so we do at most one unsigned mod and + * divide; this is sufficient to reduce the range of + * the incoming value to where signed arithmetic works. + */ + if (val > LLONG_MAX) { + *--cp = to_char(val % 10); + sval = val / 10; + } else + sval = val; + do { + *--cp = to_char(sval % 10); + sval /= 10; + } while (sval != 0); + break; + + case 8: + do { + *--cp = to_char(val & 7); + val >>= 3; + } while (val); + if (octzero && *cp != '0') + *--cp = '0'; + break; + + case 16: + do { + *--cp = xdigs[val & 15]; + val >>= 4; + } while (val); + break; + + default: /* oops */ + /* + abort(); + */ + break; /* fjc 7-31-97. Don't reference abort() here */ + } + return (cp); +} +#endif /* _HAVE_SANE_QUAD_ */ + +/* + * Convert an unsigned long to ASCII for printf purposes, returning + * a pointer to the first character of the string representation. + * Octal numbers can be forced to have a leading zero; hex numbers + * use the given digits. + */ +static char * +BSD__ultoa(register u_long val, char *endp, int base, int octzero, const char *xdigs) +{ + register char *cp = endp; + register long sval; + + /* + * Handle the three cases separately, in the hope of getting + * better/faster code. + */ + switch (base) { + case 10: + if (val < 10) { /* many numbers are 1 digit */ + *--cp = to_char(val); + return (cp); + } + /* + * On many machines, unsigned arithmetic is harder than + * signed arithmetic, so we do at most one unsigned mod and + * divide; this is sufficient to reduce the range of + * the incoming value to where signed arithmetic works. + */ + if (val > LONG_MAX) { + *--cp = to_char(val % 10); + sval = val / 10; + } else + sval = val; + do { + *--cp = to_char(sval % 10); + sval /= 10; + } while (sval != 0); + break; + + case 8: + do { + *--cp = to_char(val & 7); + val >>= 3; + } while (val); + if (octzero && *cp != '0') + *--cp = '0'; + break; + + case 16: + do { + *--cp = xdigs[val & 15]; + val >>= 4; + } while (val); + break; + + default: /* oops */ + /* + abort(); + */ + break; /* fjc 7-31-97. Don't reference abort() here */ + } + return (cp); +} + +#ifdef FLOATING_POINT +#include +/* #include "floatio.h" */ + +#ifndef MAXEXP +# define MAXEXP 1024 +#endif + +#ifndef MAXFRACT +# define MAXFRACT 64 +#endif + +#define BUF (MAXEXP+MAXFRACT+1) /* + decimal point */ +#define DEFPREC 6 + +static char *cvt __P((double, int, int, char *, int *, int, int *, char *)); +static int exponent __P((char *, int, int)); + +#else /* no FLOATING_POINT */ + +#define BUF 68 + +#endif /* FLOATING_POINT */ + + +/* + * Flags used during conversion. + */ +#define ALT 0x001 /* alternate form */ +#define HEXPREFIX 0x002 /* add 0x or 0X prefix */ +#define LADJUST 0x004 /* left adjustment */ +#define LONGDBL 0x008 /* long double; unimplemented */ +#define LONGINT 0x010 /* long integer */ + +#ifdef _HAVE_SANE_QUAD_ +#define QUADINT 0x020 /* quad integer */ +#endif /* _HAVE_SANE_QUAD_ */ + +#define SHORTINT 0x040 /* short integer */ +#define ZEROPAD 0x080 /* zero (as opposed to blank) pad */ +#define FPT 0x100 /* Floating point number */ +static int +BSD_vfprintf(FILE *fp, const char *fmt0, va_list ap) +{ + register const char *fmt; /* format string */ + register int ch; /* character from fmt */ + register int n; /* handy integer (short term usage) */ + register const char *cp;/* handy char pointer (short term usage) */ + register struct __siov *iovp;/* for PRINT macro */ + register int flags; /* flags as above */ + int ret; /* return value accumulator */ + int width; /* width from format (%8d), or 0 */ + int prec; /* precision from format (%.3d), or -1 */ + char sign; /* sign prefix (' ', '+', '-', or \0) */ +#ifdef FLOATING_POINT + char softsign; /* temporary negative sign for floats */ + double _double = 0; /* double precision arguments %[eEfgG] */ + int expt; /* integer value of exponent */ + int expsize = 0; /* character count for expstr */ + int ndig = 0; /* actual number of digits returned by cvt */ + char expstr[7]; /* buffer for exponent string */ +#endif + u_long ulval; /* integer arguments %[diouxX] */ +#ifdef _HAVE_SANE_QUAD_ + u_quad_t uqval; /* %q integers */ +#endif /* _HAVE_SANE_QUAD_ */ + int base; /* base for [diouxX] conversion */ + int dprec; /* a copy of prec if [diouxX], 0 otherwise */ + int fieldsz; /* field size expanded by sign, etc */ + int realsz; /* field size expanded by dprec */ + int size; /* size of converted field or string */ + const char *xdigs = 0; /* digits for [xX] conversion */ +#define NIOV 8 + struct __suio uio; /* output information: summary */ + struct __siov iov[NIOV];/* ... and individual io vectors */ + char buf[BUF]; /* space for %c, %[diouxX], %[eEfgG] */ + char ox[2]; /* space for 0x hex-prefix */ + char *const ebuf = buf + sizeof(buf); + + /* + * Choose PADSIZE to trade efficiency vs. size. If larger printf + * fields occur frequently, increase PADSIZE and make the initializers + * below longer. + */ +#define PADSIZE 16 /* pad chunk size */ + static const char blanks[PADSIZE] = + {' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' '}; + static const char zeroes[PADSIZE] = + {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}; + + /* + * BEWARE, these `goto error' on error, and PAD uses `n'. + */ +#define PRINT(ptr, len) { \ + iovp->iov_base = (ptr); \ + iovp->iov_len = (len); \ + uio.uio_resid += (len); \ + iovp++; \ + if (++uio.uio_iovcnt >= NIOV) { \ + if (BSD__sprint(fp, &uio)) \ + goto error; \ + iovp = iov; \ + } \ +} +#define PAD(howmany, with) { \ + if ((n = (howmany)) > 0) { \ + while (n > PADSIZE) { \ + PRINT(with, PADSIZE); \ + n -= PADSIZE; \ + } \ + PRINT(with, n); \ + } \ +} +#define FLUSH() { \ + if (uio.uio_resid && BSD__sprint(fp, &uio)) \ + goto error; \ + uio.uio_iovcnt = 0; \ + iovp = iov; \ +} + + /* + * To extend shorts properly, we need both signed and unsigned + * argument extraction methods. + */ +#define SARG() \ + (flags&LONGINT ? va_arg(ap, long) : \ + flags&SHORTINT ? (long)(short)va_arg(ap, int) : \ + (long)va_arg(ap, int)) +#define UARG() \ + (flags&LONGINT ? va_arg(ap, u_long) : \ + flags&SHORTINT ? (u_long)(u_short)va_arg(ap, int) : \ + (u_long)va_arg(ap, u_int)) + + /* optimise fprintf(stderr) (and other unbuffered Unix files) */ + if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) && + fp->_file >= 0) + return (BSD__sbprintf(fp, fmt0, ap)); + + fmt = fmt0; + uio.uio_iov = iovp = iov; + uio.uio_resid = 0; + uio.uio_iovcnt = 0; + ret = 0; + xdigs = 0; + + /* + * Scan the format for conversions (`%' character). + */ + for (;;) { + for (cp = fmt; (ch = *fmt) != '\0' && ch != '%'; fmt++) + /* void */; + if ((n = fmt - cp) != 0) { + PRINT(cp, n); + ret += n; + } + if (ch == '\0') + goto done; + fmt++; /* skip over '%' */ + + flags = 0; + dprec = 0; + width = 0; + prec = -1; + sign = '\0'; + +rflag: ch = *fmt++; +reswitch: switch (ch) { + case ' ': + /* + * ``If the space and + flags both appear, the space + * flag will be ignored.'' + * -- ANSI X3J11 + */ + if (!sign) + sign = ' '; + goto rflag; + case '#': + flags |= ALT; + goto rflag; + case '*': + /* + * ``A negative field width argument is taken as a + * - flag followed by a positive field width.'' + * -- ANSI X3J11 + * They don't exclude field widths read from args. + */ + if ((width = va_arg(ap, int)) >= 0) + goto rflag; + width = -width; + /* FALLTHROUGH */ + case '-': + flags |= LADJUST; + goto rflag; + case '+': + sign = '+'; + goto rflag; + case '.': + if ((ch = *fmt++) == '*') { + n = va_arg(ap, int); + prec = n < 0 ? -1 : n; + goto rflag; + } + n = 0; + while (is_digit(ch)) { + n = 10 * n + to_digit(ch); + ch = *fmt++; + } + prec = n < 0 ? -1 : n; + goto reswitch; + case '0': + /* + * ``Note that 0 is taken as a flag, not as the + * beginning of a field width.'' + * -- ANSI X3J11 + */ + flags |= ZEROPAD; + goto rflag; + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = 0; + do { + n = 10 * n + to_digit(ch); + ch = *fmt++; + } while (is_digit(ch)); + width = n; + goto reswitch; +#ifdef FLOATING_POINT + case 'L': + flags |= LONGDBL; + goto rflag; +#endif + case 'h': + flags |= SHORTINT; + goto rflag; +#if SIZEOF_PTRDIFF_T == SIZEOF_LONG + case 't': +#endif + case 'l': + flags |= LONGINT; + goto rflag; +#ifdef _HAVE_SANE_QUAD_ +#if SIZEOF_PTRDIFF_T == SIZEOF_LONG_LONG + case 't': +#endif + case 'q': + flags |= QUADINT; + goto rflag; +#endif /* _HAVE_SANE_QUAD_ */ + case 'c': + cp = buf; + *buf = (char)va_arg(ap, int); + size = 1; + sign = '\0'; + break; + case 'D': + flags |= LONGINT; + /*FALLTHROUGH*/ + case 'd': + case 'i': +#ifdef _HAVE_SANE_QUAD_ + if (flags & QUADINT) { + uqval = va_arg(ap, quad_t); + if ((quad_t)uqval < 0) { + uqval = -uqval; + sign = '-'; + } + } else +#endif /* _HAVE_SANE_QUAD_ */ + { + ulval = SARG(); + if ((long)ulval < 0) { + ulval = -ulval; + sign = '-'; + } + } + base = 10; + goto number; +#ifdef FLOATING_POINT + case 'e': /* anomalous precision */ + case 'E': + if (prec != 0) + flags |= ALT; + prec = (prec == -1) ? + DEFPREC + 1 : prec + 1; + /* FALLTHROUGH */ + goto fp_begin; + case 'f': /* always print trailing zeroes */ + if (prec != 0) + flags |= ALT; + case 'g': + case 'G': + if (prec == -1) + prec = DEFPREC; +fp_begin: _double = va_arg(ap, double); + /* do this before tricky precision changes */ + if (isinf(_double)) { + if (_double < 0) + sign = '-'; + cp = "Inf"; + size = 3; + break; + } + if (isnan(_double)) { + cp = "NaN"; + size = 3; + break; + } + flags |= FPT; + cp = cvt(_double, prec, flags, &softsign, + &expt, ch, &ndig, buf); + if (ch == 'g' || ch == 'G') { + if (expt <= -4 || (expt > prec && expt > 1)) + ch = (ch == 'g') ? 'e' : 'E'; + else + ch = 'g'; + } + if (ch <= 'e') { /* 'e' or 'E' fmt */ + --expt; + expsize = exponent(expstr, expt, ch); + size = expsize + ndig; + if (ndig > 1 || flags & ALT) + ++size; + } else if (ch == 'f') { /* f fmt */ + if (expt > 0) { + size = expt; + if (prec || flags & ALT) + size += prec + 1; + } else if (!prec) { /* "0" */ + size = 1; + } else /* "0.X" */ + size = prec + 2; + } else if (expt >= ndig) { /* fixed g fmt */ + size = expt; + if (flags & ALT) + ++size; + } else + size = ndig + (expt > 0 ? + 1 : 2 - expt); + + if (softsign) + sign = '-'; + break; +#endif /* FLOATING_POINT */ + case 'n': +#ifdef _HAVE_SANE_QUAD_ + if (flags & QUADINT) + *va_arg(ap, quad_t *) = ret; + else if (flags & LONGINT) +#else /* _HAVE_SANE_QUAD_ */ + if (flags & LONGINT) +#endif /* _HAVE_SANE_QUAD_ */ + *va_arg(ap, long *) = ret; + else if (flags & SHORTINT) + *va_arg(ap, short *) = ret; + else + *va_arg(ap, int *) = ret; + continue; /* no output */ + case 'O': + flags |= LONGINT; + /*FALLTHROUGH*/ + case 'o': +#ifdef _HAVE_SANE_QUAD_ + if (flags & QUADINT) + uqval = va_arg(ap, u_quad_t); + else +#endif /* _HAVE_SANE_QUAD_ */ + ulval = UARG(); + base = 8; + goto nosign; + case 'p': + /* + * ``The argument shall be a pointer to void. The + * value of the pointer is converted to a sequence + * of printable characters, in an implementation- + * defined manner.'' + * -- ANSI X3J11 + */ + prec = sizeof(void*)*CHAR_BIT/4; +#ifdef _HAVE_LLP64_ + uqval = (u_long)va_arg(ap, void *); + flags = (flags) | QUADINT | HEXPREFIX; +#else + ulval = (u_long)va_arg(ap, void *); +#ifdef _HAVE_SANE_QUAD_ + flags = (flags & ~QUADINT) | HEXPREFIX; +#else /* _HAVE_SANE_QUAD_ */ + flags = (flags) | HEXPREFIX; +#endif /* _HAVE_SANE_QUAD_ */ +#endif + base = 16; + xdigs = "0123456789abcdef"; + ch = 'x'; + goto nosign; + case 's': + if ((cp = va_arg(ap, char *)) == NULL) + cp = "(null)"; + if (prec >= 0) { + /* + * can't use strlen; can only look for the + * NUL in the first `prec' characters, and + * strlen() will go further. + */ + const char *p = (char *)memchr(cp, 0, prec); + + if (p != NULL) { + size = p - cp; + if (size > prec) + size = prec; + } else + size = prec; + } else + size = strlen(cp); + sign = '\0'; + break; + case 'U': + flags |= LONGINT; + /*FALLTHROUGH*/ + case 'u': +#ifdef _HAVE_SANE_QUAD_ + if (flags & QUADINT) + uqval = va_arg(ap, u_quad_t); + else +#endif /* _HAVE_SANE_QUAD_ */ + ulval = UARG(); + base = 10; + goto nosign; + case 'X': + xdigs = "0123456789ABCDEF"; + goto hex; + case 'x': + xdigs = "0123456789abcdef"; +hex: +#ifdef _HAVE_SANE_QUAD_ + if (flags & QUADINT) + uqval = va_arg(ap, u_quad_t); + else +#endif /* _HAVE_SANE_QUAD_ */ + ulval = UARG(); + base = 16; + /* leading 0x/X only if non-zero */ + if (flags & ALT && +#ifdef _HAVE_SANE_QUAD_ + (flags & QUADINT ? uqval != 0 : ulval != 0) +#else /* _HAVE_SANE_QUAD_ */ + ulval != 0 +#endif /* _HAVE_SANE_QUAD_ */ + ) + flags |= HEXPREFIX; + + /* unsigned conversions */ +nosign: sign = '\0'; + /* + * ``... diouXx conversions ... if a precision is + * specified, the 0 flag will be ignored.'' + * -- ANSI X3J11 + */ +number: if ((dprec = prec) >= 0) + flags &= ~ZEROPAD; + + /* + * ``The result of converting a zero value with an + * explicit precision of zero is no characters.'' + * -- ANSI X3J11 + */ +#ifdef _HAVE_SANE_QUAD_ + if (flags & QUADINT) { + if (uqval != 0 || prec != 0) + cp = BSD__uqtoa(uqval, ebuf, base, + flags & ALT, xdigs); + } else +#else /* _HAVE_SANE_QUAD_ */ +#endif /* _HAVE_SANE_QUAD_ */ + { + if (ulval != 0 || prec != 0) + cp = BSD__ultoa(ulval, ebuf, base, + flags & ALT, xdigs); + } + size = ebuf - cp; + break; + default: /* "%?" prints ?, unless ? is NUL */ + if (ch == '\0') + goto done; + /* pretend it was %c with argument ch */ + cp = buf; + *buf = ch; + size = 1; + sign = '\0'; + break; + } + + /* + * All reasonable formats wind up here. At this point, `cp' + * points to a string which (if not flags&LADJUST) should be + * padded out to `width' places. If flags&ZEROPAD, it should + * first be prefixed by any sign or other prefix; otherwise, + * it should be blank padded before the prefix is emitted. + * After any left-hand padding and prefixing, emit zeroes + * required by a decimal [diouxX] precision, then print the + * string proper, then emit zeroes required by any leftover + * floating precision; finally, if LADJUST, pad with blanks. + * + * Compute actual size, so we know how much to pad. + * fieldsz excludes decimal prec; realsz includes it. + */ + fieldsz = size; + if (sign) + fieldsz++; + else if (flags & HEXPREFIX) + fieldsz += 2; + realsz = dprec > fieldsz ? dprec : fieldsz; + + /* right-adjusting blank padding */ + if ((flags & (LADJUST|ZEROPAD)) == 0) + PAD(width - realsz, blanks); + + /* prefix */ + if (sign) { + PRINT(&sign, 1); + } else if (flags & HEXPREFIX) { + ox[0] = '0'; + ox[1] = ch; + PRINT(ox, 2); + } + + /* right-adjusting zero padding */ + if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD) + PAD(width - realsz, zeroes); + + /* leading zeroes from decimal precision */ + PAD(dprec - fieldsz, zeroes); + + /* the string or number proper */ +#ifdef FLOATING_POINT + if ((flags & FPT) == 0) { + PRINT(cp, size); + } else { /* glue together f_p fragments */ + if (ch >= 'f') { /* 'f' or 'g' */ + if (_double == 0) { + /* kludge for __dtoa irregularity */ + if (ndig <= 1 && + (flags & ALT) == 0) { + PRINT("0", 1); + } else { + PRINT("0.", 2); + PAD(ndig - 1, zeroes); + } + } else if (expt == 0 && ndig == 0 && (flags & ALT) == 0) { + PRINT("0", 1); + } else if (expt <= 0) { + PRINT("0.", 2); + PAD(-expt, zeroes); + PRINT(cp, ndig); + } else if (expt >= ndig) { + PRINT(cp, ndig); + PAD(expt - ndig, zeroes); + if (flags & ALT) + PRINT(".", 1); + } else { + PRINT(cp, expt); + cp += expt; + PRINT(".", 1); + PRINT(cp, ndig-expt); + } + } else { /* 'e' or 'E' */ + if (ndig > 1 || flags & ALT) { + ox[0] = *cp++; + ox[1] = '.'; + PRINT(ox, 2); + if (_double || flags & ALT == 0) { + PRINT(cp, ndig-1); + } else /* 0.[0..] */ + /* __dtoa irregularity */ + PAD(ndig - 1, zeroes); + } else /* XeYYY */ + PRINT(cp, 1); + PRINT(expstr, expsize); + } + } +#else + PRINT(cp, size); +#endif + /* left-adjusting padding (always blank) */ + if (flags & LADJUST) + PAD(width - realsz, blanks); + + /* finally, adjust ret */ + ret += width > realsz ? width : realsz; + + FLUSH(); /* copy out the I/O vectors */ + } +done: + FLUSH(); +error: + return (__sferror(fp) ? EOF : ret); + /* NOTREACHED */ +} + +#ifdef FLOATING_POINT + +extern char *BSD__dtoa __P((double, int, int, int *, int *, char **)); + +static char * +cvt(value, ndigits, flags, sign, decpt, ch, length, buf) + double value; + int ndigits, flags, *decpt, ch, *length; + char *sign, *buf; +{ + int mode, dsgn; + char *digits, *bp, *rve; + + if (ch == 'f') + mode = 3; + else { + mode = 2; + } + if (value < 0) { + value = -value; + *sign = '-'; + } else if (value == 0.0 && 1.0/value < 0) { + *sign = '-'; + } else { + *sign = '\000'; + } + digits = BSD__dtoa(value, mode, ndigits, decpt, &dsgn, &rve); + memcpy(buf, digits, rve - digits); + xfree(digits); + rve = buf + (rve - digits); + digits = buf; + if (flags & ALT) { /* Print trailing zeros */ + bp = digits + ndigits; + if (ch == 'f') { + if (*digits == '0' && value) + *decpt = -ndigits + 1; + bp += *decpt; + } + if (value == 0) /* kludge for __dtoa irregularity */ + rve = bp; + while (rve < bp) + *rve++ = '0'; + } + *length = rve - digits; + return (digits); +} + +static int +exponent(p0, exp, fmtch) + char *p0; + int exp, fmtch; +{ + register char *p, *t; + char expbuf[MAXEXP]; + + p = p0; + *p++ = fmtch; + if (exp < 0) { + exp = -exp; + *p++ = '-'; + } + else + *p++ = '+'; + t = expbuf + MAXEXP; + if (exp > 9) { + do { + *--t = to_char(exp % 10); + } while ((exp /= 10) > 9); + *--t = to_char(exp); + for (; t < expbuf + MAXEXP; *p++ = *t++); + } + else { + *p++ = '0'; + *p++ = to_char(exp); + } + return (p - p0); +} +#endif /* FLOATING_POINT */ + +#ifndef HAVE_VSNPRINTF +int +vsnprintf(str, n, fmt, ap) + char *str; + size_t n; + const char *fmt; + _BSD_VA_LIST_ ap; +{ + int ret; + FILE f; + + if ((int)n < 1) + return (EOF); + f._flags = __SWR | __SSTR; + f._bf._base = f._p = (unsigned char *)str; + f._bf._size = f._w = n - 1; + f.vwrite = BSD__sfvwrite; + ret = BSD_vfprintf(&f, fmt, ap); + *f._p = 0; + return (ret); +} +#endif + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)snprintf.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#ifndef HAVE_SNPRINTF +int +#if defined(HAVE_STDARG_PROTOTYPES) +snprintf(char *str, size_t n, char const *fmt, ...) +#else +snprintf(str, n, fmt, va_alist) +char *str, *fmt; +size_t n; +va_dcl +#endif +{ + int ret; + va_list ap; + FILE f; + + if ((int)n < 1) + return (EOF); + +#if defined(HAVE_STDARG_PROTOTYPES) + va_start(ap, fmt); +#else + va_start(ap); +#endif + f._flags = __SWR | __SSTR; + f._bf._base = f._p = (unsigned char *)str; + f._bf._size = f._w = n - 1; + f.vwrite = BSD__sfvwrite; + ret = BSD_vfprintf(&f, fmt, ap); + *f._p = 0; + va_end(ap); + return (ret); +} +#endif diff --git a/mkconfig.rb b/mkconfig.rb new file mode 100755 index 0000000..d6a9d03 --- /dev/null +++ b/mkconfig.rb @@ -0,0 +1,195 @@ +#!./miniruby -s + +# avoid warnings with -d. +$install_name ||= nil +$so_name ||= nil + +srcdir = File.dirname(__FILE__) +$:.replace [srcdir+"/lib"] unless defined?(CROSS_COMPILING) +$:.unshift(".") + +require "fileutils" +mkconfig = File.basename($0) + +rbconfig_rb = ARGV[0] || 'rbconfig.rb' +unless File.directory?(dir = File.dirname(rbconfig_rb)) + FileUtils.makedirs(dir, :verbose => true) +end + +version = RUBY_VERSION +config = "" +def config.write(arg) + concat(arg.to_s) +end +$stdout = config + +fast = {'prefix'=>TRUE, 'ruby_install_name'=>TRUE, 'INSTALL'=>TRUE, 'EXEEXT'=>TRUE} +print %[ +# This file was created by #{mkconfig} when ruby was built. Any +# changes made to this file will be lost the next time ruby is built. + +module RbConfig + RUBY_VERSION == "#{version}" or + raise "ruby lib version (#{version}) doesn't match executable version (\#{RUBY_VERSION})" + +] + +v_fast = [] +v_others = [] +vars = {} +has_version = false +continued_name = nil +continued_line = nil +File.foreach "config.status" do |line| + next if /^#/ =~ line + name = nil + case line + when /^s([%,])@(\w+)@\1(?:\|\#_!!_\#\|)?(.*)\1/ + name = $2 + val = $3.gsub(/\\(?=,)/, '') + when /^S\["(\w+)"\]\s*=\s*"(.*)"\s*(\\)?$/ + name = $1 + val = $2 + if $3 + continued_line = [] + continued_line << val + continued_name = name + next + end + when /^"(.*)"\s*(\\)?$/ + if continued_line + continued_line << $1 + next if $2 + val = continued_line.join("") + name = continued_name + continued_line = nil + end + when /^(?:ac_given_)?INSTALL=(.*)/ + v_fast << " CONFIG[\"INSTALL\"] = " + $1 + "\n" + end + + if name + next if /^(?:ac_.*|configure_input|(?:top_)?srcdir|\w+OBJS)$/ =~ name + next if /^\$\(ac_\w+\)$/ =~ val + next if /^\$\{ac_\w+\}$/ =~ val + next if /^\$ac_\w+$/ =~ val + next if $install_name and /^RUBY_INSTALL_NAME$/ =~ name + next if $so_name and /^RUBY_SO_NAME$/ =~ name + next if /^(?:X|(?:MINI|RUN)RUBY$)/ =~ name + if /^program_transform_name$/ =~ name and /^s(\\?.)(.*)\1$/ =~ val + next if $install_name + sep = %r"#{Regexp.quote($1)}" + ptn = $2.sub(/\$\$/, '$').split(sep, 2) + name = "ruby_install_name" + val = "ruby".sub(/#{ptn[0]}/, ptn[1]) + end + val.gsub!(/ +(?!-)/, "=") if name == "configure_args" && /mswin32/ =~ RUBY_PLATFORM + val = val.gsub(/\$(?:\$|\{?(\w+)\}?)/) {$1 ? "$(#{$1})" : $&}.dump + if /^prefix$/ =~ name + val = "(TOPDIR || DESTDIR + #{val})" + end + v = " CONFIG[\"#{name}\"] #{vars[name] ? '<< "\n"' : '='} #{val}\n" + vars[name] = true + if fast[name] + v_fast << v + else + v_others << v + end + has_version = true if name == "MAJOR" + end +# break if /^CEOF/ +end + +drive = File::PATH_SEPARATOR == ';' + +prefix = '/lib/ruby/' + RUBY_VERSION + '/' + RUBY_PLATFORM +print " TOPDIR = File.dirname(__FILE__).chomp!(#{prefix.dump})\n" +print " DESTDIR = ", (drive ? "TOPDIR && TOPDIR[/\\A[a-z]:/i] || " : ""), "'' unless defined? DESTDIR\n" +print " CONFIG = {}\n" +print " CONFIG[\"DESTDIR\"] = DESTDIR\n" + +unless has_version + RUBY_VERSION.scan(/(\d+)\.(\d+)\.(\d+)/) { + print " CONFIG[\"MAJOR\"] = \"" + $1 + "\"\n" + print " CONFIG[\"MINOR\"] = \"" + $2 + "\"\n" + print " CONFIG[\"TEENY\"] = \"" + $3 + "\"\n" + } + patchlevel = IO.foreach(File.join(srcdir, "version.h")) {|l| + m = /^\s*#\s*define\s+RUBY_PATCHLEVEL\s+(\d+)/.match(l) and break m[1] + } + print " CONFIG[\"PATCHLEVEL\"] = \"#{patchlevel}\"\n" +end + +dest = drive ? /= \"(?!\$[\(\{])(?:[a-z]:)?/i : /= \"(?!\$[\(\{])/ +v_others.collect! do |x| + if /^\s*CONFIG\["(?!abs_|old)[a-z]+(?:_prefix|dir)"\]/ === x + x.sub(dest, '= "$(DESTDIR)') + else + x + end +end + +if $install_name + v_fast << " CONFIG[\"ruby_install_name\"] = \"" + $install_name + "\"\n" + v_fast << " CONFIG[\"RUBY_INSTALL_NAME\"] = \"" + $install_name + "\"\n" +end +if $so_name + v_fast << " CONFIG[\"RUBY_SO_NAME\"] = \"" + $so_name + "\"\n" +end + +print(*v_fast) +print(*v_others) +print <flags & NODE_TYPEMASK)>>NODE_TYPESHIFT)) +#define nd_set_type(n,t) \ + RNODE(n)->flags=((RNODE(n)->flags&~NODE_TYPEMASK)|(((t)<flags>>NODE_LSHIFT)&NODE_LMASK)) +#define nd_set_line(n,l) \ + RNODE(n)->flags=((RNODE(n)->flags&~(-1<> 8) & 0x0F) +#define NOEX_WITH(n, s) ((s << 8) | (n) | (ruby_running ? 0 : NOEX_BASIC)) +#define NOEX_WITH_SAFE(n) NOEX_WITH(n, rb_safe_level()) + +#define CALL_PUBLIC 0 +#define CALL_FCALL 1 +#define CALL_VCALL 2 +#define CALL_SUPER 3 + +#define RUBY_VM_METHOD_NODE NODE_METHOD + +VALUE rb_parser_new(void); +VALUE rb_parser_end_seen_p(VALUE); +VALUE rb_parser_encoding(VALUE); + +NODE *rb_parser_compile_cstr(volatile VALUE, const char*, const char*, int, int); +NODE *rb_parser_compile_string(volatile VALUE, const char*, VALUE, int); +NODE *rb_parser_compile_file(volatile VALUE, const char*, VALUE, int); + +NODE *rb_compile_cstr(const char*, const char*, int, int); +NODE *rb_compile_string(const char*, VALUE, int); +NODE *rb_compile_file(const char*, VALUE, int); + +void rb_add_method(VALUE, ID, NODE *, int); +NODE *rb_node_newnode(enum node_type,VALUE,VALUE,VALUE); + +NODE* rb_method_node(VALUE klass, ID id); +int rb_node_arity(NODE* node); + +struct global_entry *rb_global_entry(ID); +VALUE rb_gvar_get(struct global_entry *); +VALUE rb_gvar_set(struct global_entry *, VALUE); +VALUE rb_gvar_defined(struct global_entry *); + +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif + +#endif /* RUBY_NODE_H */ diff --git a/numeric.c b/numeric.c new file mode 100644 index 0000000..777d6c2 --- /dev/null +++ b/numeric.c @@ -0,0 +1,3243 @@ +/********************************************************************** + + numeric.c - + + $Author: yugui $ + created at: Fri Aug 13 18:33:09 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" +#include +#include +#include + +#if defined(__FreeBSD__) && __FreeBSD__ < 4 +#include +#endif + +#ifdef HAVE_FLOAT_H +#include +#endif + +#ifdef HAVE_IEEEFP_H +#include +#endif + +/* use IEEE 64bit values if not defined */ +#ifndef FLT_RADIX +#define FLT_RADIX 2 +#endif +#ifndef FLT_ROUNDS +#define FLT_ROUNDS 1 +#endif +#ifndef DBL_MIN +#define DBL_MIN 2.2250738585072014e-308 +#endif +#ifndef DBL_MAX +#define DBL_MAX 1.7976931348623157e+308 +#endif +#ifndef DBL_MIN_EXP +#define DBL_MIN_EXP (-1021) +#endif +#ifndef DBL_MAX_EXP +#define DBL_MAX_EXP 1024 +#endif +#ifndef DBL_MIN_10_EXP +#define DBL_MIN_10_EXP (-307) +#endif +#ifndef DBL_MAX_10_EXP +#define DBL_MAX_10_EXP 308 +#endif +#ifndef DBL_DIG +#define DBL_DIG 15 +#endif +#ifndef DBL_MANT_DIG +#define DBL_MANT_DIG 53 +#endif +#ifndef DBL_EPSILON +#define DBL_EPSILON 2.2204460492503131e-16 +#endif + +#ifndef HAVE_ROUND +double +round(double x) +{ + double f; + + if (x > 0.0) { + f = floor(x); + x = f + (x - f >= 0.5); + } + else if (x < 0.0) { + f = ceil(x); + x = f - (f - x >= 0.5); + } + return x; +} +#elif defined(__BEOS__) +/* appears to be a bug in the BeOS headers */ +double round(double x); +#endif + +static ID id_coerce, id_to_i, id_eq; + +VALUE rb_cNumeric; +VALUE rb_cFloat; +VALUE rb_cInteger; +VALUE rb_cFixnum; + +VALUE rb_eZeroDivError; +VALUE rb_eFloatDomainError; + +void +rb_num_zerodiv(void) +{ + rb_raise(rb_eZeroDivError, "divided by 0"); +} + + +/* + * call-seq: + * num.coerce(numeric) => array + * + * If aNumeric is the same type as num, returns an array + * containing aNumeric and num. Otherwise, returns an + * array with both aNumeric and num represented as + * Float objects. This coercion mechanism is used by + * Ruby to handle mixed-type numeric operations: it is intended to + * find a compatible common type between the two operands of the operator. + * + * 1.coerce(2.5) #=> [2.5, 1.0] + * 1.2.coerce(3) #=> [3.0, 1.2] + * 1.coerce(2) #=> [2, 1] + */ + +static VALUE +num_coerce(VALUE x, VALUE y) +{ + if (CLASS_OF(x) == CLASS_OF(y)) + return rb_assoc_new(y, x); + x = rb_Float(x); + y = rb_Float(y); + return rb_assoc_new(y, x); +} + +static VALUE +coerce_body(VALUE *x) +{ + return rb_funcall(x[1], id_coerce, 1, x[0]); +} + +static VALUE +coerce_rescue(VALUE *x) +{ + volatile VALUE v = rb_inspect(x[1]); + + rb_raise(rb_eTypeError, "%s can't be coerced into %s", + rb_special_const_p(x[1])? + RSTRING_PTR(v): + rb_obj_classname(x[1]), + rb_obj_classname(x[0])); + return Qnil; /* dummy */ +} + +static int +do_coerce(VALUE *x, VALUE *y, int err) +{ + VALUE ary; + VALUE a[2]; + + a[0] = *x; a[1] = *y; + + ary = rb_rescue(coerce_body, (VALUE)a, err?coerce_rescue:0, (VALUE)a); + if (TYPE(ary) != T_ARRAY || RARRAY_LEN(ary) != 2) { + if (err) { + rb_raise(rb_eTypeError, "coerce must return [x, y]"); + } + return Qfalse; + } + + *x = RARRAY_PTR(ary)[0]; + *y = RARRAY_PTR(ary)[1]; + return Qtrue; +} + +VALUE +rb_num_coerce_bin(VALUE x, VALUE y, ID func) +{ + do_coerce(&x, &y, Qtrue); + return rb_funcall(x, func, 1, y); +} + +VALUE +rb_num_coerce_cmp(VALUE x, VALUE y, ID func) +{ + if (do_coerce(&x, &y, Qfalse)) + return rb_funcall(x, func, 1, y); + return Qnil; +} + +VALUE +rb_num_coerce_relop(VALUE x, VALUE y, ID func) +{ + VALUE c, x0 = x, y0 = y; + + if (!do_coerce(&x, &y, Qfalse) || + NIL_P(c = rb_funcall(x, func, 1, y))) { + rb_cmperr(x0, y0); + return Qnil; /* not reached */ + } + return c; +} + +/* + * Trap attempts to add methods to Numeric objects. Always + * raises a TypeError + */ + +static VALUE +num_sadded(VALUE x, VALUE name) +{ + ID mid = rb_to_id(name); + /* ruby_frame = ruby_frame->prev; */ /* pop frame for "singleton_method_added" */ + /* Numerics should be values; singleton_methods should not be added to them */ + rb_remove_method_id(rb_singleton_class(x), mid); + rb_raise(rb_eTypeError, + "can't define singleton method \"%s\" for %s", + rb_id2name(mid), + rb_obj_classname(x)); + return Qnil; /* not reached */ +} + +/* :nodoc: */ +static VALUE +num_init_copy(VALUE x, VALUE y) +{ + /* Numerics are immutable values, which should not be copied */ + rb_raise(rb_eTypeError, "can't copy %s", rb_obj_classname(x)); + return Qnil; /* not reached */ +} + +/* + * call-seq: + * +num => num + * + * Unary Plus---Returns the receiver's value. + */ + +static VALUE +num_uplus(VALUE num) +{ + return num; +} + +/* + * call-seq: + * -num => numeric + * + * Unary Minus---Returns the receiver's value, negated. + */ + +static VALUE +num_uminus(VALUE num) +{ + VALUE zero; + + zero = INT2FIX(0); + do_coerce(&zero, &num, Qtrue); + + return rb_funcall(zero, '-', 1, num); +} + +/* + * call-seq: + * num.quo(numeric) => result + * + * Returns most exact division (rational for integers, float for floats). + */ + +static VALUE +num_quo(VALUE x, VALUE y) +{ + return rb_funcall(rb_rational_raw1(x), '/', 1, y); +} + + +/* + * call-seq: + * num.fdiv(numeric) => float + * + * Returns float division. + */ + +static VALUE +num_fdiv(VALUE x, VALUE y) +{ + return rb_funcall(rb_Float(x), '/', 1, y); +} + + +static VALUE num_floor(VALUE num); + +/* + * call-seq: + * num.div(numeric) => integer + * + * Uses / to perform division, then converts the result to + * an integer. Numeric does not define the / + * operator; this is left to subclasses. + */ + +static VALUE +num_div(VALUE x, VALUE y) +{ + if (rb_equal(INT2FIX(0), y)) rb_num_zerodiv(); + return num_floor(rb_funcall(x, '/', 1, y)); +} + + +/* + * call-seq: + * num.divmod( aNumeric ) -> anArray + * + * Returns an array containing the quotient and modulus obtained by + * dividing num by aNumeric. If q, r = + * x.divmod(y), then + * + * q = floor(float(x)/float(y)) + * x = q*y + r + * + * The quotient is rounded toward -infinity, as shown in the following table: + * + * a | b | a.divmod(b) | a/b | a.modulo(b) | a.remainder(b) + * ------+-----+---------------+---------+-------------+--------------- + * 13 | 4 | 3, 1 | 3 | 1 | 1 + * ------+-----+---------------+---------+-------------+--------------- + * 13 | -4 | -4, -3 | -3 | -3 | 1 + * ------+-----+---------------+---------+-------------+--------------- + * -13 | 4 | -4, 3 | -4 | 3 | -1 + * ------+-----+---------------+---------+-------------+--------------- + * -13 | -4 | 3, -1 | 3 | -1 | -1 + * ------+-----+---------------+---------+-------------+--------------- + * 11.5 | 4 | 2, 3.5 | 2.875 | 3.5 | 3.5 + * ------+-----+---------------+---------+-------------+--------------- + * 11.5 | -4 | -3, -0.5 | -2.875 | -0.5 | 3.5 + * ------+-----+---------------+---------+-------------+--------------- + * -11.5 | 4 | -3, 0.5 | -2.875 | 0.5 | -3.5 + * ------+-----+---------------+---------+-------------+--------------- + * -11.5 | -4 | 2, -3.5 | 2.875 | -3.5 | -3.5 + * + * + * Examples + * + * 11.divmod(3) #=> [3, 2] + * 11.divmod(-3) #=> [-4, -1] + * 11.divmod(3.5) #=> [3, 0.5] + * (-11).divmod(3.5) #=> [-4, 3.0] + * (11.5).divmod(3.5) #=> [3, 1.0] + */ + +static VALUE +num_divmod(VALUE x, VALUE y) +{ + return rb_assoc_new(num_div(x, y), rb_funcall(x, '%', 1, y)); +} + +/* + * call-seq: + * num.modulo(numeric) => result + * + * Equivalent to + * num.divmod(aNumeric)[1]. + */ + +static VALUE +num_modulo(VALUE x, VALUE y) +{ + return rb_funcall(x, '%', 1, y); +} + +/* + * call-seq: + * num.remainder(numeric) => result + * + * If num and numeric have different signs, returns + * mod-numeric; otherwise, returns mod. In + * both cases mod is the value + * num.modulo(numeric). The + * differences between remainder and modulo + * (%) are shown in the table under Numeric#divmod. + */ + +static VALUE +num_remainder(VALUE x, VALUE y) +{ + VALUE z = rb_funcall(x, '%', 1, y); + + if ((!rb_equal(z, INT2FIX(0))) && + ((RTEST(rb_funcall(x, '<', 1, INT2FIX(0))) && + RTEST(rb_funcall(y, '>', 1, INT2FIX(0)))) || + (RTEST(rb_funcall(x, '>', 1, INT2FIX(0))) && + RTEST(rb_funcall(y, '<', 1, INT2FIX(0)))))) { + return rb_funcall(z, '-', 1, y); + } + return z; +} + +/* + * call-seq: + * num.real? -> true or false + * + * Returns true if num is a Real + * (i.e. non Complex). + */ + +static VALUE +num_real_p(VALUE num) +{ + return Qtrue; +} + +/* + * call-seq: + * num.integer? -> true or false + * + * Returns true if num is an Integer + * (including Fixnum and Bignum). + */ + +static VALUE +num_int_p(VALUE num) +{ + return Qfalse; +} + +/* + * call-seq: + * num.abs => num or numeric + * + * Returns the absolute value of num. + * + * 12.abs #=> 12 + * (-34.56).abs #=> 34.56 + * -34.56.abs #=> 34.56 + */ + +static VALUE +num_abs(VALUE num) +{ + if (RTEST(rb_funcall(num, '<', 1, INT2FIX(0)))) { + return rb_funcall(num, rb_intern("-@"), 0); + } + return num; +} + + +/* + * call-seq: + * num.zero? => true or false + * + * Returns true if num has a zero value. + */ + +static VALUE +num_zero_p(VALUE num) +{ + if (rb_equal(num, INT2FIX(0))) { + return Qtrue; + } + return Qfalse; +} + + +/* + * call-seq: + * num.nonzero? => num or nil + * + * Returns num if num is not zero, nil + * otherwise. This behavior is useful when chaining comparisons: + * + * a = %w( z Bb bB bb BB a aA Aa AA A ) + * b = a.sort {|a,b| (a.downcase <=> b.downcase).nonzero? || a <=> b } + * b #=> ["A", "a", "AA", "Aa", "aA", "BB", "Bb", "bB", "bb", "z"] + */ + +static VALUE +num_nonzero_p(VALUE num) +{ + if (RTEST(rb_funcall(num, rb_intern("zero?"), 0, 0))) { + return Qnil; + } + return num; +} + +/* + * call-seq: + * num.to_int => integer + * + * Invokes the child class's to_i method to convert + * num to an integer. + */ + +static VALUE +num_to_int(VALUE num) +{ + return rb_funcall(num, id_to_i, 0, 0); +} + + +/******************************************************************** + * + * Document-class: Float + * + * Float objects represent real numbers using the native + * architecture's double-precision floating point representation. + */ + +VALUE +rb_float_new(double d) +{ + NEWOBJ(flt, struct RFloat); + OBJSETUP(flt, rb_cFloat, T_FLOAT); + + flt->float_value = d; + return (VALUE)flt; +} + +/* + * call-seq: + * flt.to_s => string + * + * Returns a string containing a representation of self. As well as a + * fixed or exponential form of the number, the call may return + * ``NaN'', ``Infinity'', and + * ``-Infinity''. + */ + +static VALUE +flo_to_s(VALUE flt) +{ + char buf[32]; + double value = RFLOAT_VALUE(flt); + char *p, *e; + + if (isinf(value)) + return rb_usascii_str_new2(value < 0 ? "-Infinity" : "Infinity"); + else if(isnan(value)) + return rb_usascii_str_new2("NaN"); + + snprintf(buf, sizeof(buf), "%#.15g", value); /* ensure to print decimal point */ + if (!(e = strchr(buf, 'e'))) { + e = buf + strlen(buf); + } + if (!ISDIGIT(e[-1])) { /* reformat if ended with decimal point (ex 111111111111111.) */ + snprintf(buf, sizeof(buf), "%#.14e", value); + if (!(e = strchr(buf, 'e'))) { + e = buf + strlen(buf); + } + } + p = e; + while (p[-1]=='0' && ISDIGIT(p[-2])) + p--; + memmove(p, e, strlen(e)+1); + return rb_usascii_str_new2(buf); +} + +/* + * MISSING: documentation + */ + +static VALUE +flo_coerce(VALUE x, VALUE y) +{ + return rb_assoc_new(rb_Float(y), x); +} + +/* + * call-seq: + * -float => float + * + * Returns float, negated. + */ + +static VALUE +flo_uminus(VALUE flt) +{ + return DBL2NUM(-RFLOAT_VALUE(flt)); +} + +/* + * call-seq: + * float + other => float + * + * Returns a new float which is the sum of float + * and other. + */ + +static VALUE +flo_plus(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + return DBL2NUM(RFLOAT_VALUE(x) + (double)FIX2LONG(y)); + case T_BIGNUM: + return DBL2NUM(RFLOAT_VALUE(x) + rb_big2dbl(y)); + case T_FLOAT: + return DBL2NUM(RFLOAT_VALUE(x) + RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '+'); + } +} + +/* + * call-seq: + * float + other => float + * + * Returns a new float which is the difference of float + * and other. + */ + +static VALUE +flo_minus(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + return DBL2NUM(RFLOAT_VALUE(x) - (double)FIX2LONG(y)); + case T_BIGNUM: + return DBL2NUM(RFLOAT_VALUE(x) - rb_big2dbl(y)); + case T_FLOAT: + return DBL2NUM(RFLOAT_VALUE(x) - RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '-'); + } +} + +/* + * call-seq: + * float * other => float + * + * Returns a new float which is the product of float + * and other. + */ + +static VALUE +flo_mul(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + return DBL2NUM(RFLOAT_VALUE(x) * (double)FIX2LONG(y)); + case T_BIGNUM: + return DBL2NUM(RFLOAT_VALUE(x) * rb_big2dbl(y)); + case T_FLOAT: + return DBL2NUM(RFLOAT_VALUE(x) * RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '*'); + } +} + +/* + * call-seq: + * float / other => float + * + * Returns a new float which is the result of dividing + * float by other. + */ + +static VALUE +flo_div(VALUE x, VALUE y) +{ + long f_y; + double d; + + switch (TYPE(y)) { + case T_FIXNUM: + f_y = FIX2LONG(y); + return DBL2NUM(RFLOAT_VALUE(x) / (double)f_y); + case T_BIGNUM: + d = rb_big2dbl(y); + return DBL2NUM(RFLOAT_VALUE(x) / d); + case T_FLOAT: + return DBL2NUM(RFLOAT_VALUE(x) / RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '/'); + } +} + +static VALUE +flo_quo(VALUE x, VALUE y) +{ + return rb_funcall(x, '/', 1, y); +} + +static void +flodivmod(double x, double y, double *divp, double *modp) +{ + double div, mod; + +#ifdef HAVE_FMOD + mod = fmod(x, y); +#else + { + double z; + + modf(x/y, &z); + mod = x - z * y; + } +#endif + if (isinf(x) && !isinf(y) && !isnan(y)) + div = x; + else + div = (x - mod) / y; + if (y*mod < 0) { + mod += y; + div -= 1.0; + } + if (modp) *modp = mod; + if (divp) *divp = div; +} + + +/* + * call-seq: + * flt % other => float + * flt.modulo(other) => float + * + * Return the modulo after division of flt by other. + * + * 6543.21.modulo(137) #=> 104.21 + * 6543.21.modulo(137.24) #=> 92.9299999999996 + */ + +static VALUE +flo_mod(VALUE x, VALUE y) +{ + double fy, mod; + + switch (TYPE(y)) { + case T_FIXNUM: + fy = (double)FIX2LONG(y); + break; + case T_BIGNUM: + fy = rb_big2dbl(y); + break; + case T_FLOAT: + fy = RFLOAT_VALUE(y); + break; + default: + return rb_num_coerce_bin(x, y, '%'); + } + flodivmod(RFLOAT_VALUE(x), fy, 0, &mod); + return DBL2NUM(mod); +} + +static VALUE +dbl2ival(double d) +{ + if (FIXABLE(d)) { + d = round(d); + return LONG2FIX((long)d); + } + else if (isnan(d) || isinf(d)) { + /* special case: cannot return integer value */ + return rb_float_new(d); + } + else { + return rb_dbl2big(d); + } +} + +/* + * call-seq: + * flt.divmod(numeric) => array + * + * See Numeric#divmod. + */ + +static VALUE +flo_divmod(VALUE x, VALUE y) +{ + double fy, div, mod; + volatile VALUE a, b; + + switch (TYPE(y)) { + case T_FIXNUM: + fy = (double)FIX2LONG(y); + break; + case T_BIGNUM: + fy = rb_big2dbl(y); + break; + case T_FLOAT: + fy = RFLOAT_VALUE(y); + break; + default: + return rb_num_coerce_bin(x, y, rb_intern("divmod")); + } + flodivmod(RFLOAT_VALUE(x), fy, &div, &mod); + a = dbl2ival(div); + b = DBL2NUM(mod); + return rb_assoc_new(a, b); +} + +/* + * call-seq: + * + * flt ** other => float + * + * Raises float the other power. + */ + +static VALUE +flo_pow(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_FIXNUM: + return DBL2NUM(pow(RFLOAT_VALUE(x), (double)FIX2LONG(y))); + case T_BIGNUM: + return DBL2NUM(pow(RFLOAT_VALUE(x), rb_big2dbl(y))); + case T_FLOAT: + return DBL2NUM(pow(RFLOAT_VALUE(x), RFLOAT_VALUE(y))); + default: + return rb_num_coerce_bin(x, y, rb_intern("**")); + } +} + +/* + * call-seq: + * num.eql?(numeric) => true or false + * + * Returns true if num and numeric are the + * same type and have equal values. + * + * 1 == 1.0 #=> true + * 1.eql?(1.0) #=> false + * (1.0).eql?(1.0) #=> true + */ + +static VALUE +num_eql(VALUE x, VALUE y) +{ + if (TYPE(x) != TYPE(y)) return Qfalse; + + return rb_equal(x, y); +} + +/* + * call-seq: + * num <=> other -> 0 or nil + * + * Returns zero if num equals other, nil + * otherwise. + */ + +static VALUE +num_cmp(VALUE x, VALUE y) +{ + if (x == y) return INT2FIX(0); + return Qnil; +} + +static VALUE +num_equal(VALUE x, VALUE y) +{ + if (x == y) return Qtrue; + return rb_funcall(y, id_eq, 1, x); +} + +/* + * call-seq: + * flt == obj => true or false + * + * Returns true only if obj has the same value + * as flt. Contrast this with Float#eql?, which + * requires obj to be a Float. + * + * 1.0 == 1 #=> true + * + */ + +static VALUE +flo_eq(VALUE x, VALUE y) +{ + volatile double a, b; + + switch (TYPE(y)) { + case T_FIXNUM: + b = FIX2LONG(y); + break; + case T_BIGNUM: + b = rb_big2dbl(y); + break; + case T_FLOAT: + b = RFLOAT_VALUE(y); + if (isnan(b)) return Qfalse; + break; + default: + return num_equal(x, y); + } + a = RFLOAT_VALUE(x); + if (isnan(a)) return Qfalse; + return (a == b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt.hash => integer + * + * Returns a hash code for this float. + */ + +static VALUE +flo_hash(VALUE num) +{ + double d; + int hash; + + d = RFLOAT_VALUE(num); + /* normalize -0.0 to 0.0 */ + if (d == 0.0) d = 0.0; + hash = rb_memhash(&d, sizeof(d)); + return INT2FIX(hash); +} + +VALUE +rb_dbl_cmp(double a, double b) +{ + if (isnan(a) || isnan(b)) return Qnil; + if (a == b) return INT2FIX(0); + if (a > b) return INT2FIX(1); + if (a < b) return INT2FIX(-1); + return Qnil; +} + +/* + * call-seq: + * flt <=> numeric => -1, 0, +1 + * + * Returns -1, 0, or +1 depending on whether flt is less than, + * equal to, or greater than numeric. This is the basis for the + * tests in Comparable. + */ + +static VALUE +flo_cmp(VALUE x, VALUE y) +{ + double a, b; + + a = RFLOAT_VALUE(x); + if (isnan(a)) return Qnil; + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + if (isinf(a)) { + if (a > 0.0) return INT2FIX(1); + else return INT2FIX(-1); + } + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT_VALUE(y); + break; + + default: + if (isinf(a) && (!rb_respond_to(y, rb_intern("infinite?")) || + !RTEST(rb_funcall(y, rb_intern("infinite?"), 0, 0)))) { + if (a > 0.0) return INT2FIX(1); + return INT2FIX(-1); + } + return rb_num_coerce_cmp(x, y, rb_intern("<=>")); + } + return rb_dbl_cmp(a, b); +} + +/* + * call-seq: + * flt > other => true or false + * + * true if flt is greater than other. + */ + +static VALUE +flo_gt(VALUE x, VALUE y) +{ + double a, b; + + a = RFLOAT_VALUE(x); + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT_VALUE(y); + if (isnan(b)) return Qfalse; + break; + + default: + return rb_num_coerce_relop(x, y, '>'); + } + if (isnan(a)) return Qfalse; + return (a > b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt >= other => true or false + * + * true if flt is greater than + * or equal to other. + */ + +static VALUE +flo_ge(VALUE x, VALUE y) +{ + double a, b; + + a = RFLOAT_VALUE(x); + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT_VALUE(y); + if (isnan(b)) return Qfalse; + break; + + default: + return rb_num_coerce_relop(x, y, rb_intern(">=")); + } + if (isnan(a)) return Qfalse; + return (a >= b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt < other => true or false + * + * true if flt is less than other. + */ + +static VALUE +flo_lt(VALUE x, VALUE y) +{ + double a, b; + + a = RFLOAT_VALUE(x); + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT_VALUE(y); + if (isnan(b)) return Qfalse; + break; + + default: + return rb_num_coerce_relop(x, y, '<'); + } + if (isnan(a)) return Qfalse; + return (a < b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt <= other => true or false + * + * true if flt is less than + * or equal to other. + */ + +static VALUE +flo_le(VALUE x, VALUE y) +{ + double a, b; + + a = RFLOAT_VALUE(x); + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT_VALUE(y); + if (isnan(b)) return Qfalse; + break; + + default: + return rb_num_coerce_relop(x, y, rb_intern("<=")); + } + if (isnan(a)) return Qfalse; + return (a <= b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt.eql?(obj) => true or false + * + * Returns true only if obj is a + * Float with the same value as flt. Contrast this + * with Float#==, which performs type conversions. + * + * 1.0.eql?(1) #=> false + */ + +static VALUE +flo_eql(VALUE x, VALUE y) +{ + if (TYPE(y) == T_FLOAT) { + double a = RFLOAT_VALUE(x); + double b = RFLOAT_VALUE(y); + + if (isnan(a) || isnan(b)) return Qfalse; + if (a == b) return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * flt.to_f => flt + * + * As flt is already a float, returns self. + */ + +static VALUE +flo_to_f(VALUE num) +{ + return num; +} + +/* + * call-seq: + * flt.abs => float + * + * Returns the absolute value of flt. + * + * (-34.56).abs #=> 34.56 + * -34.56.abs #=> 34.56 + * + */ + +static VALUE +flo_abs(VALUE flt) +{ + double val = fabs(RFLOAT_VALUE(flt)); + return DBL2NUM(val); +} + +/* + * call-seq: + * flt.zero? -> true or false + * + * Returns true if flt is 0.0. + * + */ + +static VALUE +flo_zero_p(VALUE num) +{ + if (RFLOAT_VALUE(num) == 0.0) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * flt.nan? -> true or false + * + * Returns true if flt is an invalid IEEE floating + * point number. + * + * a = -1.0 #=> -1.0 + * a.nan? #=> false + * a = 0.0/0.0 #=> NaN + * a.nan? #=> true + */ + +static VALUE +flo_is_nan_p(VALUE num) +{ + double value = RFLOAT_VALUE(num); + + return isnan(value) ? Qtrue : Qfalse; +} + +/* + * call-seq: + * flt.infinite? -> nil, -1, +1 + * + * Returns nil, -1, or +1 depending on whether flt + * is finite, -infinity, or +infinity. + * + * (0.0).infinite? #=> nil + * (-1.0/0.0).infinite? #=> -1 + * (+1.0/0.0).infinite? #=> 1 + */ + +static VALUE +flo_is_infinite_p(VALUE num) +{ + double value = RFLOAT_VALUE(num); + + if (isinf(value)) { + return INT2FIX( value < 0 ? -1 : 1 ); + } + + return Qnil; +} + +/* + * call-seq: + * flt.finite? -> true or false + * + * Returns true if flt is a valid IEEE floating + * point number (it is not infinite, and nan? is + * false). + * + */ + +static VALUE +flo_is_finite_p(VALUE num) +{ + double value = RFLOAT_VALUE(num); + +#if HAVE_FINITE + if (!finite(value)) + return Qfalse; +#else + if (isinf(value) || isnan(value)) + return Qfalse; +#endif + + return Qtrue; +} + +/* + * call-seq: + * flt.floor => integer + * + * Returns the largest integer less than or equal to flt. + * + * 1.2.floor #=> 1 + * 2.0.floor #=> 2 + * (-1.2).floor #=> -2 + * (-2.0).floor #=> -2 + */ + +static VALUE +flo_floor(VALUE num) +{ + double f = floor(RFLOAT_VALUE(num)); + long val; + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + +/* + * call-seq: + * flt.ceil => integer + * + * Returns the smallest Integer greater than or equal to + * flt. + * + * 1.2.ceil #=> 2 + * 2.0.ceil #=> 2 + * (-1.2).ceil #=> -1 + * (-2.0).ceil #=> -2 + */ + +static VALUE +flo_ceil(VALUE num) +{ + double f = ceil(RFLOAT_VALUE(num)); + long val; + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + +/* + * call-seq: + * flt.round([ndigits]) => integer or float + * + * Rounds flt to a given precision in decimal digits (default 0 digits). + * Precision may be negative. Returns a a floating point number when ndigits + * is more than one. + * + * 1.5.round #=> 2 + * (-1.5).round #=> -2 + */ + +static VALUE +flo_round(int argc, VALUE *argv, VALUE num) +{ + VALUE nd; + double number, f; + int ndigits = 0, i; + long val; + + if (argc > 0 && rb_scan_args(argc, argv, "01", &nd) == 1) { + ndigits = NUM2INT(nd); + } + number = RFLOAT_VALUE(num); + f = 1.0; + i = abs(ndigits); + while (--i >= 0) + f = f*10.0; + + if (isinf(f)) { + if (ndigits < 0) number = 0; + } + else { + if (ndigits < 0) number /= f; + else number *= f; + number = round(number); + if (ndigits < 0) number *= f; + else number /= f; + } + + if (ndigits > 0) return DBL2NUM(number); + + if (!FIXABLE(number)) { + return rb_dbl2big(number); + } + val = number; + return LONG2FIX(val); +} + +/* + * call-seq: + * flt.to_i => integer + * flt.to_int => integer + * flt.truncate => integer + * + * Returns flt truncated to an Integer. + */ + +static VALUE +flo_truncate(VALUE num) +{ + double f = RFLOAT_VALUE(num); + long val; + + if (f > 0.0) f = floor(f); + if (f < 0.0) f = ceil(f); + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + +/* + * call-seq: + * num.floor => integer + * + * Returns the largest integer less than or equal to num. + * Numeric implements this by converting anInteger + * to a Float and invoking Float#floor. + * + * 1.floor #=> 1 + * (-1).floor #=> -1 + */ + +static VALUE +num_floor(VALUE num) +{ + return flo_floor(rb_Float(num)); +} + + +/* + * call-seq: + * num.ceil => integer + * + * Returns the smallest Integer greater than or equal to + * num. Class Numeric achieves this by converting + * itself to a Float then invoking + * Float#ceil. + * + * 1.ceil #=> 1 + * 1.2.ceil #=> 2 + * (-1.2).ceil #=> -1 + * (-1.0).ceil #=> -1 + */ + +static VALUE +num_ceil(VALUE num) +{ + return flo_ceil(rb_Float(num)); +} + +/* + * call-seq: + * num.round([ndigits]) => integer or float + * + * Rounds num to a given precision in decimal digits (default 0 digits). + * Precision may be negative. Returns a a floating point number when ndigits + * is more than one. Numeric implements this by converting itself + * to a Float and invoking Float#round. + */ + +static VALUE +num_round(int argc, VALUE* argv, VALUE num) +{ + return flo_round(argc, argv, rb_Float(num)); +} + +/* + * call-seq: + * num.truncate => integer + * + * Returns num truncated to an integer. Numeric + * implements this by converting its value to a float and invoking + * Float#truncate. + */ + +static VALUE +num_truncate(VALUE num) +{ + return flo_truncate(rb_Float(num)); +} + + +int +ruby_float_step(VALUE from, VALUE to, VALUE step, int excl) +{ + if (TYPE(from) == T_FLOAT || TYPE(to) == T_FLOAT || TYPE(step) == T_FLOAT) { + const double epsilon = DBL_EPSILON; + double beg = NUM2DBL(from); + double end = NUM2DBL(to); + double unit = NUM2DBL(step); + double n = (end - beg)/unit; + double err = (fabs(beg) + fabs(end) + fabs(end-beg)) / fabs(unit) * epsilon; + long i; + + if (isinf(unit)) { + if (unit > 0) rb_yield(DBL2NUM(beg)); + } + else { + if (err>0.5) err=0.5; + n = floor(n + err); + if (!excl) n++; + for (i=0; i num + * + * Invokes block with the sequence of numbers starting at + * num, incremented by step on each call. The loop + * finishes when the value to be passed to the block is greater than + * limit (if step is positive) or less than + * limit (if step is negative). If all the arguments are + * integers, the loop operates using an integer counter. If any of the + * arguments are floating point numbers, all are converted to floats, + * and the loop is executed floor(n + n*epsilon)+ 1 times, + * where n = (limit - num)/step. Otherwise, the loop + * starts at num, uses either the < or + * > operator to compare the counter against + * limit, and increments itself using the + + * operator. + * + * 1.step(10, 2) { |i| print i, " " } + * Math::E.step(Math::PI, 0.2) { |f| print f, " " } + * + * produces: + * + * 1 3 5 7 9 + * 2.71828182845905 2.91828182845905 3.11828182845905 + */ + +static VALUE +num_step(int argc, VALUE *argv, VALUE from) +{ + VALUE to, step; + + RETURN_ENUMERATOR(from, argc, argv); + if (argc == 1) { + to = argv[0]; + step = INT2FIX(1); + } + else { + if (argc == 2) { + to = argv[0]; + step = argv[1]; + } + else { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + if (rb_equal(step, INT2FIX(0))) { + rb_raise(rb_eArgError, "step can't be 0"); + } + } + + if (FIXNUM_P(from) && FIXNUM_P(to) && FIXNUM_P(step)) { + long i, end, diff; + + i = FIX2LONG(from); + end = FIX2LONG(to); + diff = FIX2LONG(step); + + if (diff > 0) { + while (i <= end) { + rb_yield(LONG2FIX(i)); + i += diff; + } + } + else { + while (i >= end) { + rb_yield(LONG2FIX(i)); + i += diff; + } + } + } + else if (!ruby_float_step(from, to, step, Qfalse)) { + VALUE i = from; + ID cmp; + + if (RTEST(rb_funcall(step, '>', 1, INT2FIX(0)))) { + cmp = '>'; + } + else { + cmp = '<'; + } + for (;;) { + if (RTEST(rb_funcall(i, cmp, 1, to))) break; + rb_yield(i); + i = rb_funcall(i, '+', 1, step); + } + } + return from; +} + +SIGNED_VALUE +rb_num2long(VALUE val) +{ + again: + if (NIL_P(val)) { + rb_raise(rb_eTypeError, "no implicit conversion from nil to integer"); + } + + if (FIXNUM_P(val)) return FIX2LONG(val); + + switch (TYPE(val)) { + case T_FLOAT: + if (RFLOAT_VALUE(val) <= (double)LONG_MAX + && RFLOAT_VALUE(val) >= (double)LONG_MIN) { + return (SIGNED_VALUE)(RFLOAT_VALUE(val)); + } + else { + char buf[24]; + char *s; + + snprintf(buf, sizeof(buf), "%-.10g", RFLOAT_VALUE(val)); + if ((s = strchr(buf, ' ')) != 0) *s = '\0'; + rb_raise(rb_eRangeError, "float %s out of range of integer", buf); + } + + case T_BIGNUM: + return rb_big2long(val); + + default: + val = rb_to_int(val); + goto again; + } +} + +VALUE +rb_num2ulong(VALUE val) +{ + if (TYPE(val) == T_BIGNUM) { + return rb_big2ulong(val); + } + return (VALUE)rb_num2long(val); +} + +#if SIZEOF_INT < SIZEOF_VALUE +static void +check_int(SIGNED_VALUE num) +{ + const char *s; + + if (num < INT_MIN) { + s = "small"; + } + else if (num > INT_MAX) { + s = "big"; + } + else { + return; + } + rb_raise(rb_eRangeError, "integer %"PRIdVALUE " too %s to convert to `int'", num, s); +} + +static void +check_uint(VALUE num, VALUE sign) +{ + static const VALUE mask = ~(VALUE)UINT_MAX; + + if (RTEST(sign)) { + /* minus */ + if ((num & mask) != mask || (num & ~mask) <= INT_MAX + 1UL) + rb_raise(rb_eRangeError, "integer %"PRIdVALUE " too small to convert to `unsigned int'", num); + } + else { + /* plus */ + if ((num & mask) != 0) + rb_raise(rb_eRangeError, "integer %"PRIuVALUE " too big to convert to `unsigned int'", num); + } +} + +long +rb_num2int(VALUE val) +{ + long num = rb_num2long(val); + + check_int(num); + return num; +} + +long +rb_fix2int(VALUE val) +{ + long num = FIXNUM_P(val)?FIX2LONG(val):rb_num2long(val); + + check_int(num); + return num; +} + +unsigned long +rb_num2uint(VALUE val) +{ + unsigned long num = rb_num2ulong(val); + + check_uint(num, rb_funcall(val, '<', 1, INT2FIX(0))); + return num; +} + +unsigned long +rb_fix2uint(VALUE val) +{ + unsigned long num; + + if (!FIXNUM_P(val)) { + return rb_num2uint(val); + } + num = FIX2ULONG(val); + + check_uint(num, rb_funcall(val, '<', 1, INT2FIX(0))); + return num; +} +#else +long +rb_num2int(VALUE val) +{ + return rb_num2long(val); +} + +long +rb_fix2int(VALUE val) +{ + return FIX2INT(val); +} +#endif + +VALUE +rb_num2fix(VALUE val) +{ + long v; + + if (FIXNUM_P(val)) return val; + + v = rb_num2long(val); + if (!FIXABLE(v)) + rb_raise(rb_eRangeError, "integer %"PRIdVALUE " out of range of fixnum", v); + return LONG2FIX(v); +} + +#if HAVE_LONG_LONG + +LONG_LONG +rb_num2ll(VALUE val) +{ + if (NIL_P(val)) { + rb_raise(rb_eTypeError, "no implicit conversion from nil"); + } + + if (FIXNUM_P(val)) return (LONG_LONG)FIX2LONG(val); + + switch (TYPE(val)) { + case T_FLOAT: + if (RFLOAT_VALUE(val) <= (double)LLONG_MAX + && RFLOAT_VALUE(val) >= (double)LLONG_MIN) { + return (LONG_LONG)(RFLOAT_VALUE(val)); + } + else { + char buf[24]; + char *s; + + snprintf(buf, sizeof(buf), "%-.10g", RFLOAT_VALUE(val)); + if ((s = strchr(buf, ' ')) != 0) *s = '\0'; + rb_raise(rb_eRangeError, "float %s out of range of long long", buf); + } + + case T_BIGNUM: + return rb_big2ll(val); + + case T_STRING: + rb_raise(rb_eTypeError, "no implicit conversion from string"); + return Qnil; /* not reached */ + + case T_TRUE: + case T_FALSE: + rb_raise(rb_eTypeError, "no implicit conversion from boolean"); + return Qnil; /* not reached */ + + default: + val = rb_to_int(val); + return NUM2LL(val); + } +} + +unsigned LONG_LONG +rb_num2ull(VALUE val) +{ + if (TYPE(val) == T_BIGNUM) { + return rb_big2ull(val); + } + return (unsigned LONG_LONG)rb_num2ll(val); +} + +#endif /* HAVE_LONG_LONG */ + +/* + * Document-class: Integer + * + * Integer is the basis for the two concrete classes that + * hold whole numbers, Bignum and Fixnum. + * + */ + + +/* + * call-seq: + * int.to_i => int + * int.to_int => int + * int.floor => int + * int.ceil => int + * int.round => int + * int.truncate => int + * + * As int is already an Integer, all these + * methods simply return the receiver. + */ + +static VALUE +int_to_i(VALUE num) +{ + return num; +} + +/* + * call-seq: + * int.integer? -> true + * + * Always returns true. + */ + +static VALUE +int_int_p(VALUE num) +{ + return Qtrue; +} + +/* + * call-seq: + * int.odd? -> true or false + * + * Returns true if int is an odd number. + */ + +static VALUE +int_odd_p(VALUE num) +{ + if (rb_funcall(num, '%', 1, INT2FIX(2)) != INT2FIX(0)) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * int.even? -> true or false + * + * Returns true if int is an even number. + */ + +static VALUE +int_even_p(VALUE num) +{ + if (rb_funcall(num, '%', 1, INT2FIX(2)) == INT2FIX(0)) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * fixnum.next => integer + * fixnum.succ => integer + * + * Returns the Integer equal to int + 1. + * + * 1.next #=> 2 + * (-1).next #=> 0 + */ + +static VALUE +fix_succ(VALUE num) +{ + long i = FIX2LONG(num) + 1; + return LONG2NUM(i); +} + +/* + * call-seq: + * int.next => integer + * int.succ => integer + * + * Returns the Integer equal to int + 1. + * + * 1.next #=> 2 + * (-1).next #=> 0 + */ + +static VALUE +int_succ(VALUE num) +{ + if (FIXNUM_P(num)) { + long i = FIX2LONG(num) + 1; + return LONG2NUM(i); + } + return rb_funcall(num, '+', 1, INT2FIX(1)); +} + +/* + * call-seq: + * int.pred => integer + * + * Returns the Integer equal to int - 1. + * + * 1.pred #=> 0 + * (-1).pred #=> -2 + */ + +static VALUE +int_pred(VALUE num) +{ + if (FIXNUM_P(num)) { + long i = FIX2LONG(num) - 1; + return LONG2NUM(i); + } + return rb_funcall(num, '-', 1, INT2FIX(1)); +} + +/* + * call-seq: + * int.chr([encoding]) => string + * + * Returns a string containing the character represented by the + * receiver's value according to +encoding+. + * + * 65.chr #=> "A" + * 230.chr #=> "\346" + * 255.chr(Encoding::UTF_8) #=> "\303\277" + */ + +static VALUE +int_chr(int argc, VALUE *argv, VALUE num) +{ + char c; + int n; + long i = NUM2LONG(num); + rb_encoding *enc; + VALUE str; + + switch (argc) { + case 0: + if (i < 0 || 0xff < i) { + out_of_range: + rb_raise(rb_eRangeError, "%"PRIdVALUE " out of char range", i); + } + c = i; + if (i < 0x80) { + return rb_usascii_str_new(&c, 1); + } + else { + return rb_str_new(&c, 1); + } + case 1: + break; + default: + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0 or 1)", argc); + break; + } + enc = rb_to_encoding(argv[0]); + if (!enc) enc = rb_ascii8bit_encoding(); + if (i < 0 || (n = rb_enc_codelen(i, enc)) <= 0) goto out_of_range; + str = rb_enc_str_new(0, n, enc); + rb_enc_mbcput(i, RSTRING_PTR(str), enc); + return str; +} + +/* + * call-seq: + * int.ord => int + * + * Returns the int itself. + * + * ?a.ord #=> 97 + * + * This method is intended for compatibility to + * character constant in Ruby 1.9. + * For example, ?a.ord returns 97 both in 1.8 and 1.9. + */ + +static VALUE +int_ord(num) + VALUE num; +{ + return num; +} + +/******************************************************************** + * + * Document-class: Fixnum + * + * A Fixnum holds Integer values that can be + * represented in a native machine word (minus 1 bit). If any operation + * on a Fixnum exceeds this range, the value is + * automatically converted to a Bignum. + * + * Fixnum objects have immediate value. This means that + * when they are assigned or passed as parameters, the actual object is + * passed, rather than a reference to that object. Assignment does not + * alias Fixnum objects. There is effectively only one + * Fixnum object instance for any given integer value, so, + * for example, you cannot add a singleton method to a + * Fixnum. + */ + + +/* + * call-seq: + * -fix => integer + * + * Negates fix (which might return a Bignum). + */ + +static VALUE +fix_uminus(VALUE num) +{ + return LONG2NUM(-FIX2LONG(num)); +} + +VALUE +rb_fix2str(VALUE x, int base) +{ + extern const char ruby_digitmap[]; + char buf[SIZEOF_VALUE*CHAR_BIT + 2], *b = buf + sizeof buf; + long val = FIX2LONG(x); + int neg = 0; + + if (base < 2 || 36 < base) { + rb_raise(rb_eArgError, "invalid radix %d", base); + } + if (val == 0) { + return rb_usascii_str_new2("0"); + } + if (val < 0) { + val = -val; + neg = 1; + } + *--b = '\0'; + do { + *--b = ruby_digitmap[(int)(val % base)]; + } while (val /= base); + if (neg) { + *--b = '-'; + } + + return rb_usascii_str_new2(b); +} + +/* + * call-seq: + * fix.to_s( base=10 ) -> aString + * + * Returns a string containing the representation of fix radix + * base (between 2 and 36). + * + * 12345.to_s #=> "12345" + * 12345.to_s(2) #=> "11000000111001" + * 12345.to_s(8) #=> "30071" + * 12345.to_s(10) #=> "12345" + * 12345.to_s(16) #=> "3039" + * 12345.to_s(36) #=> "9ix" + * + */ +static VALUE +fix_to_s(int argc, VALUE *argv, VALUE x) +{ + int base; + + if (argc == 0) base = 10; + else { + VALUE b; + + rb_scan_args(argc, argv, "01", &b); + base = NUM2INT(b); + } + + return rb_fix2str(x, base); +} + +/* + * call-seq: + * fix + numeric => numeric_result + * + * Performs addition: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ + +static VALUE +fix_plus(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + long a, b, c; + VALUE r; + + a = FIX2LONG(x); + b = FIX2LONG(y); + c = a + b; + r = LONG2NUM(c); + + return r; + } + switch (TYPE(y)) { + case T_BIGNUM: + return rb_big_plus(y, x); + case T_FLOAT: + return DBL2NUM((double)FIX2LONG(x) + RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '+'); + } +} + +/* + * call-seq: + * fix - numeric => numeric_result + * + * Performs subtraction: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ + +static VALUE +fix_minus(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + long a, b, c; + VALUE r; + + a = FIX2LONG(x); + b = FIX2LONG(y); + c = a - b; + r = LONG2NUM(c); + + return r; + } + switch (TYPE(y)) { + case T_BIGNUM: + x = rb_int2big(FIX2LONG(x)); + return rb_big_minus(x, y); + case T_FLOAT: + return DBL2NUM((double)FIX2LONG(x) - RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '-'); + } +} + +#define SQRT_LONG_MAX ((SIGNED_VALUE)1<<((SIZEOF_LONG*CHAR_BIT-1)/2)) +/*tests if N*N would overflow*/ +#define FIT_SQRT_LONG(n) (((n)=-SQRT_LONG_MAX)) + +/* + * call-seq: + * fix * numeric => numeric_result + * + * Performs multiplication: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ + +static VALUE +fix_mul(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { +#ifdef __HP_cc +/* avoids an optimization bug of HP aC++/ANSI C B3910B A.06.05 [Jul 25 2005] */ + volatile +#endif + SIGNED_VALUE a, b; +#if SIZEOF_VALUE * 2 <= SIZEOF_LONG_LONG + LONG_LONG d; +#else + SIGNED_VALUE c; + VALUE r; +#endif + + a = FIX2LONG(x); + b = FIX2LONG(y); + +#if SIZEOF_VALUE * 2 <= SIZEOF_LONG_LONG + d = (LONG_LONG)a * b; + if (FIXABLE(d)) return LONG2FIX(d); + return rb_ll2inum(d); +#else + if (FIT_SQRT_LONG(a) && FIT_SQRT_LONG(b)) + return LONG2FIX(a*b); + c = a * b; + r = LONG2FIX(c); + + if (a == 0) return x; + if (FIX2LONG(r) != c || c/a != b) { + r = rb_big_mul(rb_int2big(a), rb_int2big(b)); + } + return r; +#endif + } + switch (TYPE(y)) { + case T_BIGNUM: + return rb_big_mul(y, x); + case T_FLOAT: + return DBL2NUM((double)FIX2LONG(x) * RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, '*'); + } +} + +static void +fixdivmod(long x, long y, long *divp, long *modp) +{ + long div, mod; + + if (y == 0) rb_num_zerodiv(); + if (y < 0) { + if (x < 0) + div = -x / -y; + else + div = - (x / -y); + } + else { + if (x < 0) + div = - (-x / y); + else + div = x / y; + } + mod = x - div*y; + if ((mod < 0 && y > 0) || (mod > 0 && y < 0)) { + mod += y; + div -= 1; + } + if (divp) *divp = div; + if (modp) *modp = mod; +} + +/* + * call-seq: + * fix.fdiv(numeric) => float + * + * Returns the floating point result of dividing fix by + * numeric. + * + * 654321.fdiv(13731) #=> 47.6528293642124 + * 654321.fdiv(13731.24) #=> 47.6519964693647 + * + */ + +static VALUE +fix_fdiv(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + return DBL2NUM((double)FIX2LONG(x) / (double)FIX2LONG(y)); + } + switch (TYPE(y)) { + case T_BIGNUM: + return DBL2NUM((double)FIX2LONG(x) / rb_big2dbl(y)); + case T_FLOAT: + return DBL2NUM((double)FIX2LONG(x) / RFLOAT_VALUE(y)); + default: + return rb_num_coerce_bin(x, y, rb_intern("fdiv")); + } +} + +static VALUE +fix_divide(VALUE x, VALUE y, ID op) +{ + if (FIXNUM_P(y)) { + long div; + + fixdivmod(FIX2LONG(x), FIX2LONG(y), &div, 0); + return LONG2NUM(div); + } + switch (TYPE(y)) { + case T_BIGNUM: + x = rb_int2big(FIX2LONG(x)); + return rb_big_div(x, y); + case T_FLOAT: + { + double div; + + if (op == '/') { + div = (double)FIX2LONG(x) / RFLOAT_VALUE(y); + return DBL2NUM(div); + } + else { + if (RFLOAT_VALUE(y) == 0) rb_num_zerodiv(); + div = (double)FIX2LONG(x) / RFLOAT_VALUE(y); + return rb_dbl2big(floor(div)); + } + } + default: + return rb_num_coerce_bin(x, y, op); + } +} + +/* + * call-seq: + * fix / numeric => numeric_result + * + * Performs division: the class of the resulting object depends on + * the class of numeric and on the magnitude of the + * result. + */ + +static VALUE +fix_div(VALUE x, VALUE y) +{ + return fix_divide(x, y, '/'); +} + +/* + * call-seq: + * fix.div(numeric) => numeric_result + * + * Performs integer division: returns integer value. + */ + +static VALUE +fix_idiv(VALUE x, VALUE y) +{ + return fix_divide(x, y, rb_intern("div")); +} + +/* + * call-seq: + * fix % other => Numeric + * fix.modulo(other) => Numeric + * + * Returns fix modulo other. + * See Numeric.divmod for more information. + */ + +static VALUE +fix_mod(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + long mod; + + fixdivmod(FIX2LONG(x), FIX2LONG(y), 0, &mod); + return LONG2NUM(mod); + } + switch (TYPE(y)) { + case T_BIGNUM: + x = rb_int2big(FIX2LONG(x)); + return rb_big_modulo(x, y); + case T_FLOAT: + { + double mod; + + flodivmod((double)FIX2LONG(x), RFLOAT_VALUE(y), 0, &mod); + return DBL2NUM(mod); + } + default: + return rb_num_coerce_bin(x, y, '%'); + } +} + +/* + * call-seq: + * fix.divmod(numeric) => array + * + * See Numeric#divmod. + */ +static VALUE +fix_divmod(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + long div, mod; + + fixdivmod(FIX2LONG(x), FIX2LONG(y), &div, &mod); + + return rb_assoc_new(LONG2NUM(div), LONG2NUM(mod)); + } + switch (TYPE(y)) { + case T_BIGNUM: + x = rb_int2big(FIX2LONG(x)); + return rb_big_divmod(x, y); + case T_FLOAT: + { + double div, mod; + volatile VALUE a, b; + + flodivmod((double)FIX2LONG(x), RFLOAT_VALUE(y), &div, &mod); + a = dbl2ival(div); + b = DBL2NUM(mod); + return rb_assoc_new(a, b); + } + default: + return rb_num_coerce_bin(x, y, rb_intern("divmod")); + } +} + +static VALUE +int_pow(long x, unsigned long y) +{ + int neg = x < 0; + long z = 1; + + if (neg) x = -x; + if (y & 1) + z = x; + else + neg = 0; + y &= ~1; + do { + while (y % 2 == 0) { + if (!FIT_SQRT_LONG(x)) { + VALUE v; + bignum: + v = rb_big_pow(rb_int2big(x), LONG2NUM(y)); + if (z != 1) v = rb_big_mul(rb_int2big(neg ? -z : z), v); + return v; + } + x = x * x; + y >>= 1; + } + { + long xz = x * z; + if (!POSFIXABLE(xz) || xz / x != z) { + goto bignum; + } + z = xz; + } + } while (--y); + if (neg) z = -z; + return LONG2NUM(z); +} + +/* + * call-seq: + * fix ** other => Numeric + * + * Raises fix to the other power, which may + * be negative or fractional. + * + * 2 ** 3 #=> 8 + * 2 ** -1 #=> 0.5 + * 2 ** 0.5 #=> 1.4142135623731 + */ + +static VALUE +fix_pow(VALUE x, VALUE y) +{ + static const double zero = 0.0; + long a = FIX2LONG(x); + + if (FIXNUM_P(y)) { + long b = FIX2LONG(y); + + if (b < 0) + return rb_funcall(rb_rational_raw1(x), rb_intern("**"), 1, y); + + if (b == 0) return INT2FIX(1); + if (b == 1) return x; + if (a == 0) { + if (b > 0) return INT2FIX(0); + return DBL2NUM(1.0 / zero); + } + if (a == 1) return INT2FIX(1); + if (a == -1) { + if (b % 2 == 0) + return INT2FIX(1); + else + return INT2FIX(-1); + } + return int_pow(a, b); + } + switch (TYPE(y)) { + case T_BIGNUM: + + if (rb_funcall(y, '<', 1, INT2FIX(0))) + return rb_funcall(rb_rational_raw1(x), rb_intern("**"), 1, y); + + if (a == 0) return INT2FIX(0); + if (a == 1) return INT2FIX(1); + if (a == -1) { + if (int_even_p(y)) return INT2FIX(1); + else return INT2FIX(-1); + } + x = rb_int2big(FIX2LONG(x)); + return rb_big_pow(x, y); + case T_FLOAT: + if (RFLOAT_VALUE(y) == 0.0) return DBL2NUM(1.0); + if (a == 0) { + return DBL2NUM(RFLOAT_VALUE(y) < 0 ? (1.0 / zero) : 0.0); + } + if (a == 1) return DBL2NUM(1.0); + return DBL2NUM(pow((double)a, RFLOAT_VALUE(y))); + default: + return rb_num_coerce_bin(x, y, rb_intern("**")); + } +} + +/* + * call-seq: + * fix == other + * + * Return true if fix equals other + * numerically. + * + * 1 == 2 #=> false + * 1 == 1.0 #=> true + */ + +static VALUE +fix_equal(VALUE x, VALUE y) +{ + if (x == y) return Qtrue; + if (FIXNUM_P(y)) return Qfalse; + switch (TYPE(y)) { + case T_BIGNUM: + return rb_big_eq(y, x); + case T_FLOAT: + return (double)FIX2LONG(x) == RFLOAT_VALUE(y) ? Qtrue : Qfalse; + default: + return num_equal(x, y); + } +} + +/* + * call-seq: + * fix <=> numeric => -1, 0, +1 + * + * Comparison---Returns -1, 0, or +1 depending on whether fix is + * less than, equal to, or greater than numeric. This is the + * basis for the tests in Comparable. + */ + +static VALUE +fix_cmp(VALUE x, VALUE y) +{ + if (x == y) return INT2FIX(0); + if (FIXNUM_P(y)) { + if (FIX2LONG(x) > FIX2LONG(y)) return INT2FIX(1); + return INT2FIX(-1); + } + switch (TYPE(y)) { + case T_BIGNUM: + return rb_big_cmp(rb_int2big(FIX2LONG(x)), y); + case T_FLOAT: + return rb_dbl_cmp((double)FIX2LONG(x), RFLOAT_VALUE(y)); + default: + return rb_num_coerce_cmp(x, y, rb_intern("<=>")); + } +} + +/* + * call-seq: + * fix > other => true or false + * + * Returns true if the value of fix is + * greater than that of other. + */ + +static VALUE +fix_gt(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + if (FIX2LONG(x) > FIX2LONG(y)) return Qtrue; + return Qfalse; + } + switch (TYPE(y)) { + case T_BIGNUM: + return FIX2INT(rb_big_cmp(rb_int2big(FIX2LONG(x)), y)) > 0 ? Qtrue : Qfalse; + case T_FLOAT: + return (double)FIX2LONG(x) > RFLOAT_VALUE(y) ? Qtrue : Qfalse; + default: + return rb_num_coerce_relop(x, y, '>'); + } +} + +/* + * call-seq: + * fix >= other => true or false + * + * Returns true if the value of fix is + * greater than or equal to that of other. + */ + +static VALUE +fix_ge(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + if (FIX2LONG(x) >= FIX2LONG(y)) return Qtrue; + return Qfalse; + } + switch (TYPE(y)) { + case T_BIGNUM: + return FIX2INT(rb_big_cmp(rb_int2big(FIX2LONG(x)), y)) >= 0 ? Qtrue : Qfalse; + case T_FLOAT: + return (double)FIX2LONG(x) >= RFLOAT_VALUE(y) ? Qtrue : Qfalse; + default: + return rb_num_coerce_relop(x, y, rb_intern(">=")); + } +} + +/* + * call-seq: + * fix < other => true or false + * + * Returns true if the value of fix is + * less than that of other. + */ + +static VALUE +fix_lt(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + if (FIX2LONG(x) < FIX2LONG(y)) return Qtrue; + return Qfalse; + } + switch (TYPE(y)) { + case T_BIGNUM: + return FIX2INT(rb_big_cmp(rb_int2big(FIX2LONG(x)), y)) < 0 ? Qtrue : Qfalse; + case T_FLOAT: + return (double)FIX2LONG(x) < RFLOAT_VALUE(y) ? Qtrue : Qfalse; + default: + return rb_num_coerce_relop(x, y, '<'); + } +} + +/* + * call-seq: + * fix <= other => true or false + * + * Returns true if the value of fix is + * less than or equal to that of other. + */ + +static VALUE +fix_le(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + if (FIX2LONG(x) <= FIX2LONG(y)) return Qtrue; + return Qfalse; + } + switch (TYPE(y)) { + case T_BIGNUM: + return FIX2INT(rb_big_cmp(rb_int2big(FIX2LONG(x)), y)) <= 0 ? Qtrue : Qfalse; + case T_FLOAT: + return (double)FIX2LONG(x) <= RFLOAT_VALUE(y) ? Qtrue : Qfalse; + default: + return rb_num_coerce_relop(x, y, rb_intern("<=")); + } +} + +/* + * call-seq: + * ~fix => integer + * + * One's complement: returns a number where each bit is flipped. + */ + +static VALUE +fix_rev(VALUE num) +{ + long val = FIX2LONG(num); + + val = ~val; + return LONG2NUM(val); +} + +static VALUE +bit_coerce(VALUE x) +{ + while (!FIXNUM_P(x) && TYPE(x) != T_BIGNUM) { + if (TYPE(x) == T_FLOAT) { + rb_raise(rb_eTypeError, "can't convert Float into Integer"); + } + x = rb_to_int(x); + } + return x; +} + +/* + * call-seq: + * fix & other => integer + * + * Bitwise AND. + */ + +static VALUE +fix_and(VALUE x, VALUE y) +{ + long val; + + if (!FIXNUM_P(y = bit_coerce(y))) { + return rb_big_and(y, x); + } + val = FIX2LONG(x) & FIX2LONG(y); + return LONG2NUM(val); +} + +/* + * call-seq: + * fix | other => integer + * + * Bitwise OR. + */ + +static VALUE +fix_or(VALUE x, VALUE y) +{ + long val; + + if (!FIXNUM_P(y = bit_coerce(y))) { + return rb_big_or(y, x); + } + val = FIX2LONG(x) | FIX2LONG(y); + return LONG2NUM(val); +} + +/* + * call-seq: + * fix ^ other => integer + * + * Bitwise EXCLUSIVE OR. + */ + +static VALUE +fix_xor(VALUE x, VALUE y) +{ + long val; + + if (!FIXNUM_P(y = bit_coerce(y))) { + return rb_big_xor(y, x); + } + val = FIX2LONG(x) ^ FIX2LONG(y); + return LONG2NUM(val); +} + +static VALUE fix_lshift(long, unsigned long); +static VALUE fix_rshift(long, unsigned long); + +/* + * call-seq: + * fix << count => integer + * + * Shifts _fix_ left _count_ positions (right if _count_ is negative). + */ + +static VALUE +rb_fix_lshift(VALUE x, VALUE y) +{ + long val, width; + + val = NUM2LONG(x); + if (!FIXNUM_P(y)) + return rb_big_lshift(rb_int2big(val), y); + width = FIX2LONG(y); + if (width < 0) + return fix_rshift(val, (unsigned long)-width); + return fix_lshift(val, width); +} + +static VALUE +fix_lshift(long val, unsigned long width) +{ + if (width > (SIZEOF_LONG*CHAR_BIT-1) + || ((unsigned long)val)>>(SIZEOF_LONG*CHAR_BIT-1-width) > 0) { + return rb_big_lshift(rb_int2big(val), ULONG2NUM(width)); + } + val = val << width; + return LONG2NUM(val); +} + +/* + * call-seq: + * fix >> count => integer + * + * Shifts _fix_ right _count_ positions (left if _count_ is negative). + */ + +static VALUE +rb_fix_rshift(VALUE x, VALUE y) +{ + long i, val; + + val = FIX2LONG(x); + if (!FIXNUM_P(y)) + return rb_big_rshift(rb_int2big(val), y); + i = FIX2LONG(y); + if (i == 0) return x; + if (i < 0) + return fix_lshift(val, (unsigned long)-i); + return fix_rshift(val, i); +} + +static VALUE +fix_rshift(long val, unsigned long i) +{ + if (i >= sizeof(long)*CHAR_BIT-1) { + if (val < 0) return INT2FIX(-1); + return INT2FIX(0); + } + val = RSHIFT(val, i); + return LONG2FIX(val); +} + +/* + * call-seq: + * fix[n] => 0, 1 + * + * Bit Reference---Returns the nth bit in the binary + * representation of fix, where fix[0] is the least + * significant bit. + * + * a = 0b11001100101010 + * 30.downto(0) do |n| print a[n] end + * + * produces: + * + * 0000000000000000011001100101010 + */ + +static VALUE +fix_aref(VALUE fix, VALUE idx) +{ + long val = FIX2LONG(fix); + long i; + + idx = rb_to_int(idx); + if (!FIXNUM_P(idx)) { + idx = rb_big_norm(idx); + if (!FIXNUM_P(idx)) { + if (!RBIGNUM_SIGN(idx) || val >= 0) + return INT2FIX(0); + return INT2FIX(1); + } + } + i = FIX2LONG(idx); + + if (i < 0) return INT2FIX(0); + if (SIZEOF_LONG*CHAR_BIT-1 < i) { + if (val < 0) return INT2FIX(1); + return INT2FIX(0); + } + if (val & (1L< float + * + * Converts fix to a Float. + * + */ + +static VALUE +fix_to_f(VALUE num) +{ + double val; + + val = (double)FIX2LONG(num); + + return DBL2NUM(val); +} + +/* + * call-seq: + * fix.abs -> aFixnum + * + * Returns the absolute value of fix. + * + * -12345.abs #=> 12345 + * 12345.abs #=> 12345 + * + */ + +static VALUE +fix_abs(VALUE fix) +{ + long i = FIX2LONG(fix); + + if (i < 0) i = -i; + + return LONG2NUM(i); +} + + + +/* + * call-seq: + * fix.size -> fixnum + * + * Returns the number of bytes in the machine representation + * of a Fixnum. + * + * 1.size #=> 4 + * -1.size #=> 4 + * 2147483647.size #=> 4 + */ + +static VALUE +fix_size(VALUE fix) +{ + return INT2FIX(sizeof(long)); +} + +/* + * call-seq: + * int.upto(limit) {|i| block } => int + * + * Iterates block, passing in integer values from int + * up to and including limit. + * + * 5.upto(10) { |i| print i, " " } + * + * produces: + * + * 5 6 7 8 9 10 + */ + +static VALUE +int_upto(VALUE from, VALUE to) +{ + RETURN_ENUMERATOR(from, 1, &to); + if (FIXNUM_P(from) && FIXNUM_P(to)) { + long i, end; + + end = FIX2LONG(to); + for (i = FIX2LONG(from); i <= end; i++) { + rb_yield(LONG2FIX(i)); + } + } + else { + VALUE i = from, c; + + while (!(c = rb_funcall(i, '>', 1, to))) { + rb_yield(i); + i = rb_funcall(i, '+', 1, INT2FIX(1)); + } + if (NIL_P(c)) rb_cmperr(i, to); + } + return from; +} + +/* + * call-seq: + * int.downto(limit) {|i| block } => int + * + * Iterates block, passing decreasing values from int + * down to and including limit. + * + * 5.downto(1) { |n| print n, ".. " } + * print " Liftoff!\n" + * + * produces: + * + * 5.. 4.. 3.. 2.. 1.. Liftoff! + */ + +static VALUE +int_downto(VALUE from, VALUE to) +{ + RETURN_ENUMERATOR(from, 1, &to); + if (FIXNUM_P(from) && FIXNUM_P(to)) { + long i, end; + + end = FIX2LONG(to); + for (i=FIX2LONG(from); i >= end; i--) { + rb_yield(LONG2FIX(i)); + } + } + else { + VALUE i = from, c; + + while (!(c = rb_funcall(i, '<', 1, to))) { + rb_yield(i); + i = rb_funcall(i, '-', 1, INT2FIX(1)); + } + if (NIL_P(c)) rb_cmperr(i, to); + } + return from; +} + +/* + * call-seq: + * int.times {|i| block } => int + * + * Iterates block int times, passing in values from zero to + * int - 1. + * + * 5.times do |i| + * print i, " " + * end + * + * produces: + * + * 0 1 2 3 4 + */ + +static VALUE +int_dotimes(VALUE num) +{ + RETURN_ENUMERATOR(num, 0, 0); + + if (FIXNUM_P(num)) { + long i, end; + + end = FIX2LONG(num); + for (i=0; i 0) { + return rb_Float(num); + } + if (ndigits == 0) { + return num; + } + ndigits = -ndigits; + if (ndigits < 0) { + rb_raise(rb_eArgError, "ndigits out of range"); + } + f = int_pow(10, ndigits); + if (FIXNUM_P(num) && FIXNUM_P(f)) { + SIGNED_VALUE x = FIX2LONG(num), y = FIX2LONG(f); + int neg = x < 0; + if (neg) x = -x; + x = (x + y / 2) / y * y; + if (neg) x = -x; + return LONG2NUM(x); + } + h = rb_funcall(f, '/', 1, INT2FIX(2)); + r = rb_funcall(num, '%', 1, f); + n = rb_funcall(num, '-', 1, r); + if (!RTEST(rb_funcall(r, '<', 1, h))) { + n = rb_funcall(n, '+', 1, f); + } + return n; +} + +/* + * call-seq: + * fix.zero? => true or false + * + * Returns true if fix is zero. + * + */ + +static VALUE +fix_zero_p(VALUE num) +{ + if (FIX2LONG(num) == 0) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * fix.odd? -> true or false + * + * Returns true if fix is an odd number. + */ + +static VALUE +fix_odd_p(VALUE num) +{ + if (num & 2) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * fix.even? -> true or false + * + * Returns true if fix is an even number. + */ + +static VALUE +fix_even_p(VALUE num) +{ + if (num & 2) { + return Qfalse; + } + return Qtrue; +} + +void +Init_Numeric(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + +#if defined(__FreeBSD__) && __FreeBSD__ < 4 + /* allow divide by zero -- Inf */ + fpsetmask(fpgetmask() & ~(FP_X_DZ|FP_X_INV|FP_X_OFL)); +#elif defined(_UNICOSMP) + /* Turn off floating point exceptions for divide by zero, etc. */ + _set_Creg(0, 0); +#elif defined(__BORLANDC__) + /* Turn off floating point exceptions for overflow, etc. */ + _control87(MCW_EM, MCW_EM); +#endif + id_coerce = rb_intern("coerce"); + id_to_i = rb_intern("to_i"); + id_eq = rb_intern("=="); + + rb_eZeroDivError = rb_define_class("ZeroDivisionError", rb_eStandardError); + rb_eFloatDomainError = rb_define_class("FloatDomainError", rb_eRangeError); + rb_cNumeric = rb_define_class("Numeric", rb_cObject); + + rb_define_method(rb_cNumeric, "singleton_method_added", num_sadded, 1); + rb_include_module(rb_cNumeric, rb_mComparable); + rb_define_method(rb_cNumeric, "initialize_copy", num_init_copy, 1); + rb_define_method(rb_cNumeric, "coerce", num_coerce, 1); + + rb_define_method(rb_cNumeric, "+@", num_uplus, 0); + rb_define_method(rb_cNumeric, "-@", num_uminus, 0); + rb_define_method(rb_cNumeric, "<=>", num_cmp, 1); + rb_define_method(rb_cNumeric, "eql?", num_eql, 1); + rb_define_method(rb_cNumeric, "quo", num_quo, 1); + rb_define_method(rb_cNumeric, "fdiv", num_fdiv, 1); + rb_define_method(rb_cNumeric, "div", num_div, 1); + rb_define_method(rb_cNumeric, "divmod", num_divmod, 1); + rb_define_method(rb_cNumeric, "modulo", num_modulo, 1); + rb_define_method(rb_cNumeric, "remainder", num_remainder, 1); + rb_define_method(rb_cNumeric, "abs", num_abs, 0); + rb_define_method(rb_cNumeric, "magnitude", num_abs, 0); + rb_define_method(rb_cNumeric, "to_int", num_to_int, 0); + + rb_define_method(rb_cNumeric, "real?", num_real_p, 0); + rb_define_method(rb_cNumeric, "integer?", num_int_p, 0); + rb_define_method(rb_cNumeric, "zero?", num_zero_p, 0); + rb_define_method(rb_cNumeric, "nonzero?", num_nonzero_p, 0); + + rb_define_method(rb_cNumeric, "floor", num_floor, 0); + rb_define_method(rb_cNumeric, "ceil", num_ceil, 0); + rb_define_method(rb_cNumeric, "round", num_round, -1); + rb_define_method(rb_cNumeric, "truncate", num_truncate, 0); + rb_define_method(rb_cNumeric, "step", num_step, -1); + + rb_cInteger = rb_define_class("Integer", rb_cNumeric); + rb_undef_alloc_func(rb_cInteger); + rb_undef_method(CLASS_OF(rb_cInteger), "new"); + + rb_define_method(rb_cInteger, "integer?", int_int_p, 0); + rb_define_method(rb_cInteger, "odd?", int_odd_p, 0); + rb_define_method(rb_cInteger, "even?", int_even_p, 0); + rb_define_method(rb_cInteger, "upto", int_upto, 1); + rb_define_method(rb_cInteger, "downto", int_downto, 1); + rb_define_method(rb_cInteger, "times", int_dotimes, 0); + rb_define_method(rb_cInteger, "succ", int_succ, 0); + rb_define_method(rb_cInteger, "next", int_succ, 0); + rb_define_method(rb_cInteger, "pred", int_pred, 0); + rb_define_method(rb_cInteger, "chr", int_chr, -1); + rb_define_method(rb_cInteger, "ord", int_ord, 0); + rb_define_method(rb_cInteger, "to_i", int_to_i, 0); + rb_define_method(rb_cInteger, "to_int", int_to_i, 0); + rb_define_method(rb_cInteger, "floor", int_to_i, 0); + rb_define_method(rb_cInteger, "ceil", int_to_i, 0); + rb_define_method(rb_cInteger, "truncate", int_to_i, 0); + rb_define_method(rb_cInteger, "round", int_round, -1); + + rb_cFixnum = rb_define_class("Fixnum", rb_cInteger); + + rb_define_method(rb_cFixnum, "to_s", fix_to_s, -1); + + rb_define_method(rb_cFixnum, "-@", fix_uminus, 0); + rb_define_method(rb_cFixnum, "+", fix_plus, 1); + rb_define_method(rb_cFixnum, "-", fix_minus, 1); + rb_define_method(rb_cFixnum, "*", fix_mul, 1); + rb_define_method(rb_cFixnum, "/", fix_div, 1); + rb_define_method(rb_cFixnum, "div", fix_idiv, 1); + rb_define_method(rb_cFixnum, "%", fix_mod, 1); + rb_define_method(rb_cFixnum, "modulo", fix_mod, 1); + rb_define_method(rb_cFixnum, "divmod", fix_divmod, 1); + rb_define_method(rb_cFixnum, "fdiv", fix_fdiv, 1); + rb_define_method(rb_cFixnum, "**", fix_pow, 1); + + rb_define_method(rb_cFixnum, "abs", fix_abs, 0); + rb_define_method(rb_cFixnum, "magnitude", fix_abs, 0); + + rb_define_method(rb_cFixnum, "==", fix_equal, 1); + rb_define_method(rb_cFixnum, "===", fix_equal, 1); + rb_define_method(rb_cFixnum, "<=>", fix_cmp, 1); + rb_define_method(rb_cFixnum, ">", fix_gt, 1); + rb_define_method(rb_cFixnum, ">=", fix_ge, 1); + rb_define_method(rb_cFixnum, "<", fix_lt, 1); + rb_define_method(rb_cFixnum, "<=", fix_le, 1); + + rb_define_method(rb_cFixnum, "~", fix_rev, 0); + rb_define_method(rb_cFixnum, "&", fix_and, 1); + rb_define_method(rb_cFixnum, "|", fix_or, 1); + rb_define_method(rb_cFixnum, "^", fix_xor, 1); + rb_define_method(rb_cFixnum, "[]", fix_aref, 1); + + rb_define_method(rb_cFixnum, "<<", rb_fix_lshift, 1); + rb_define_method(rb_cFixnum, ">>", rb_fix_rshift, 1); + + rb_define_method(rb_cFixnum, "to_f", fix_to_f, 0); + rb_define_method(rb_cFixnum, "size", fix_size, 0); + rb_define_method(rb_cFixnum, "zero?", fix_zero_p, 0); + rb_define_method(rb_cFixnum, "odd?", fix_odd_p, 0); + rb_define_method(rb_cFixnum, "even?", fix_even_p, 0); + rb_define_method(rb_cFixnum, "succ", fix_succ, 0); + + rb_cFloat = rb_define_class("Float", rb_cNumeric); + + rb_undef_alloc_func(rb_cFloat); + rb_undef_method(CLASS_OF(rb_cFloat), "new"); + + rb_define_const(rb_cFloat, "ROUNDS", INT2FIX(FLT_ROUNDS)); + rb_define_const(rb_cFloat, "RADIX", INT2FIX(FLT_RADIX)); + rb_define_const(rb_cFloat, "MANT_DIG", INT2FIX(DBL_MANT_DIG)); + rb_define_const(rb_cFloat, "DIG", INT2FIX(DBL_DIG)); + rb_define_const(rb_cFloat, "MIN_EXP", INT2FIX(DBL_MIN_EXP)); + rb_define_const(rb_cFloat, "MAX_EXP", INT2FIX(DBL_MAX_EXP)); + rb_define_const(rb_cFloat, "MIN_10_EXP", INT2FIX(DBL_MIN_10_EXP)); + rb_define_const(rb_cFloat, "MAX_10_EXP", INT2FIX(DBL_MAX_10_EXP)); + rb_define_const(rb_cFloat, "MIN", DBL2NUM(DBL_MIN)); + rb_define_const(rb_cFloat, "MAX", DBL2NUM(DBL_MAX)); + rb_define_const(rb_cFloat, "EPSILON", DBL2NUM(DBL_EPSILON)); + + rb_define_method(rb_cFloat, "to_s", flo_to_s, 0); + rb_define_method(rb_cFloat, "coerce", flo_coerce, 1); + rb_define_method(rb_cFloat, "-@", flo_uminus, 0); + rb_define_method(rb_cFloat, "+", flo_plus, 1); + rb_define_method(rb_cFloat, "-", flo_minus, 1); + rb_define_method(rb_cFloat, "*", flo_mul, 1); + rb_define_method(rb_cFloat, "/", flo_div, 1); + rb_define_method(rb_cFloat, "quo", flo_quo, 1); + rb_define_method(rb_cFloat, "fdiv", flo_quo, 1); + rb_define_method(rb_cFloat, "%", flo_mod, 1); + rb_define_method(rb_cFloat, "modulo", flo_mod, 1); + rb_define_method(rb_cFloat, "divmod", flo_divmod, 1); + rb_define_method(rb_cFloat, "**", flo_pow, 1); + rb_define_method(rb_cFloat, "==", flo_eq, 1); + rb_define_method(rb_cFloat, "===", flo_eq, 1); + rb_define_method(rb_cFloat, "<=>", flo_cmp, 1); + rb_define_method(rb_cFloat, ">", flo_gt, 1); + rb_define_method(rb_cFloat, ">=", flo_ge, 1); + rb_define_method(rb_cFloat, "<", flo_lt, 1); + rb_define_method(rb_cFloat, "<=", flo_le, 1); + rb_define_method(rb_cFloat, "eql?", flo_eql, 1); + rb_define_method(rb_cFloat, "hash", flo_hash, 0); + rb_define_method(rb_cFloat, "to_f", flo_to_f, 0); + rb_define_method(rb_cFloat, "abs", flo_abs, 0); + rb_define_method(rb_cFloat, "magnitude", flo_abs, 0); + rb_define_method(rb_cFloat, "zero?", flo_zero_p, 0); + + rb_define_method(rb_cFloat, "to_i", flo_truncate, 0); + rb_define_method(rb_cFloat, "to_int", flo_truncate, 0); + rb_define_method(rb_cFloat, "floor", flo_floor, 0); + rb_define_method(rb_cFloat, "ceil", flo_ceil, 0); + rb_define_method(rb_cFloat, "round", flo_round, -1); + rb_define_method(rb_cFloat, "truncate", flo_truncate, 0); + + rb_define_method(rb_cFloat, "nan?", flo_is_nan_p, 0); + rb_define_method(rb_cFloat, "infinite?", flo_is_infinite_p, 0); + rb_define_method(rb_cFloat, "finite?", flo_is_finite_p, 0); +} diff --git a/object.c b/object.c new file mode 100644 index 0000000..c68e04c --- /dev/null +++ b/object.c @@ -0,0 +1,2662 @@ +/********************************************************************** + + object.c - + + $Author: yugui $ + created at: Thu Jul 15 12:01:24 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/util.h" +#include +#include +#include +#include +#include + +VALUE rb_cBasicObject; +VALUE rb_mKernel; +VALUE rb_cObject; +VALUE rb_cModule; +VALUE rb_cClass; +VALUE rb_cData; + +VALUE rb_cNilClass; +VALUE rb_cTrueClass; +VALUE rb_cFalseClass; + +static ID id_eq, id_eql, id_match, id_inspect, id_init_copy; + +/* + * call-seq: + * obj === other => true or false + * + * Case Equality---For class Object, effectively the same + * as calling #==, but typically overridden by descendents + * to provide meaningful semantics in case statements. + */ + +VALUE +rb_equal(VALUE obj1, VALUE obj2) +{ + VALUE result; + + if (obj1 == obj2) return Qtrue; + result = rb_funcall(obj1, id_eq, 1, obj2); + if (RTEST(result)) return Qtrue; + return Qfalse; +} + +int +rb_eql(VALUE obj1, VALUE obj2) +{ + return RTEST(rb_funcall(obj1, id_eql, 1, obj2)); +} + +/* + * call-seq: + * obj == other => true or false + * obj.equal?(other) => true or false + * obj.eql?(other) => true or false + * + * Equality---At the Object level, == returns + * true only if obj and other are the + * same object. Typically, this method is overridden in descendent + * classes to provide class-specific meaning. + * + * Unlike ==, the equal? method should never be + * overridden by subclasses: it is used to determine object identity + * (that is, a.equal?(b) iff a is the same + * object as b). + * + * The eql? method returns true if + * obj and anObject have the same value. Used by + * Hash to test members for equality. For objects of + * class Object, eql? is synonymous with + * ==. Subclasses normally continue this tradition, but + * there are exceptions. Numeric types, for example, + * perform type conversion across ==, but not across + * eql?, so: + * + * 1 == 1.0 #=> true + * 1.eql? 1.0 #=> false + */ + +VALUE +rb_obj_equal(VALUE obj1, VALUE obj2) +{ + if (obj1 == obj2) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * !obj => true or false + * + * Boolean negate. + */ + +VALUE +rb_obj_not(VALUE obj) +{ + return RTEST(obj) ? Qfalse : Qtrue; +} + +/* + * call-seq: + * obj != other => true or false + * + * Returns true if two objects are not-equal, otherwise false. + */ + +VALUE +rb_obj_not_equal(VALUE obj1, VALUE obj2) +{ + VALUE result = rb_funcall(obj1, id_eq, 1, obj2); + return RTEST(result) ? Qfalse : Qtrue; +} + +VALUE +rb_class_real(VALUE cl) +{ + if (cl == 0) + return 0; + while ((RBASIC(cl)->flags & FL_SINGLETON) || BUILTIN_TYPE(cl) == T_ICLASS) { + cl = RCLASS_SUPER(cl); + } + return cl; +} + +/* + * call-seq: + * obj.class => class + * + * Returns the class of obj, now preferred over + * Object#type, as an object's type in Ruby is only + * loosely tied to that object's class. This method must always be + * called with an explicit receiver, as class is also a + * reserved word in Ruby. + * + * 1.class #=> Fixnum + * self.class #=> Object + */ + +VALUE +rb_obj_class(VALUE obj) +{ + return rb_class_real(CLASS_OF(obj)); +} + +static void +init_copy(VALUE dest, VALUE obj) +{ + if (OBJ_FROZEN(dest)) { + rb_raise(rb_eTypeError, "[bug] frozen object (%s) allocated", rb_obj_classname(dest)); + } + RBASIC(dest)->flags &= ~(T_MASK|FL_EXIVAR); + RBASIC(dest)->flags |= RBASIC(obj)->flags & (T_MASK|FL_EXIVAR|FL_TAINT|FL_UNTRUSTED); + rb_copy_generic_ivar(dest, obj); + rb_gc_copy_finalizer(dest, obj); + switch (TYPE(obj)) { + case T_OBJECT: + if (!(RBASIC(dest)->flags & ROBJECT_EMBED) && ROBJECT_IVPTR(dest)) { + xfree(ROBJECT_IVPTR(dest)); + ROBJECT(dest)->as.heap.ivptr = 0; + ROBJECT(dest)->as.heap.numiv = 0; + ROBJECT(dest)->as.heap.iv_index_tbl = 0; + } + if (RBASIC(obj)->flags & ROBJECT_EMBED) { + MEMCPY(ROBJECT(dest)->as.ary, ROBJECT(obj)->as.ary, VALUE, ROBJECT_EMBED_LEN_MAX); + RBASIC(dest)->flags |= ROBJECT_EMBED; + } + else { + long len = ROBJECT(obj)->as.heap.numiv; + VALUE *ptr = ALLOC_N(VALUE, len); + MEMCPY(ptr, ROBJECT(obj)->as.heap.ivptr, VALUE, len); + ROBJECT(dest)->as.heap.ivptr = ptr; + ROBJECT(dest)->as.heap.numiv = len; + ROBJECT(dest)->as.heap.iv_index_tbl = ROBJECT(obj)->as.heap.iv_index_tbl; + RBASIC(dest)->flags &= ~ROBJECT_EMBED; + } + break; + case T_CLASS: + case T_MODULE: + if (RCLASS_IV_TBL(dest)) { + st_free_table(RCLASS_IV_TBL(dest)); + RCLASS_IV_TBL(dest) = 0; + } + if (RCLASS_IV_TBL(obj)) { + RCLASS_IV_TBL(dest) = st_copy(RCLASS_IV_TBL(obj)); + } + break; + } + rb_funcall(dest, id_init_copy, 1, obj); +} + +/* + * call-seq: + * obj.clone -> an_object + * + * Produces a shallow copy of obj---the instance variables of + * obj are copied, but not the objects they reference. Copies + * the frozen and tainted state of obj. See also the discussion + * under Object#dup. + * + * class Klass + * attr_accessor :str + * end + * s1 = Klass.new #=> # + * s1.str = "Hello" #=> "Hello" + * s2 = s1.clone #=> # + * s2.str[1,4] = "i" #=> "i" + * s1.inspect #=> "#" + * s2.inspect #=> "#" + * + * This method may have class-specific behavior. If so, that + * behavior will be documented under the #+initialize_copy+ method of + * the class. + */ + +VALUE +rb_obj_clone(VALUE obj) +{ + VALUE clone; + + if (rb_special_const_p(obj)) { + rb_raise(rb_eTypeError, "can't clone %s", rb_obj_classname(obj)); + } + clone = rb_obj_alloc(rb_obj_class(obj)); + RBASIC(clone)->klass = rb_singleton_class_clone(obj); + RBASIC(clone)->flags = (RBASIC(obj)->flags | FL_TEST(clone, FL_TAINT) | FL_TEST(clone, FL_UNTRUSTED)) & ~(FL_FREEZE|FL_FINALIZE); + init_copy(clone, obj); + RBASIC(clone)->flags |= RBASIC(obj)->flags & FL_FREEZE; + + return clone; +} + +/* + * call-seq: + * obj.dup -> an_object + * + * Produces a shallow copy of obj---the instance variables of + * obj are copied, but not the objects they reference. + * dup copies the tainted state of obj. See also + * the discussion under Object#clone. In general, + * clone and dup may have different semantics + * in descendent classes. While clone is used to duplicate + * an object, including its internal state, dup typically + * uses the class of the descendent object to create the new instance. + * + * This method may have class-specific behavior. If so, that + * behavior will be documented under the #+initialize_copy+ method of + * the class. + */ + +VALUE +rb_obj_dup(VALUE obj) +{ + VALUE dup; + + if (rb_special_const_p(obj)) { + rb_raise(rb_eTypeError, "can't dup %s", rb_obj_classname(obj)); + } + dup = rb_obj_alloc(rb_obj_class(obj)); + init_copy(dup, obj); + + return dup; +} + +/* :nodoc: */ +VALUE +rb_obj_init_copy(VALUE obj, VALUE orig) +{ + if (obj == orig) return obj; + rb_check_frozen(obj); + if (TYPE(obj) != TYPE(orig) || rb_obj_class(obj) != rb_obj_class(orig)) { + rb_raise(rb_eTypeError, "initialize_copy should take same class object"); + } + return obj; +} + +/* + * call-seq: + * obj.to_s => string + * + * Returns a string representing obj. The default + * to_s prints the object's class and an encoding of the + * object id. As a special case, the top-level object that is the + * initial execution context of Ruby programs returns ``main.'' + */ + +VALUE +rb_any_to_s(VALUE obj) +{ + const char *cname = rb_obj_classname(obj); + VALUE str; + + str = rb_sprintf("#<%s:%p>", cname, (void*)obj); + OBJ_INFECT(str, obj); + + return str; +} + +VALUE +rb_inspect(VALUE obj) +{ + return rb_obj_as_string(rb_funcall(obj, id_inspect, 0, 0)); +} + +static int +inspect_i(ID id, VALUE value, VALUE str) +{ + VALUE str2; + const char *ivname; + + /* need not to show internal data */ + if (CLASS_OF(value) == 0) return ST_CONTINUE; + if (!rb_is_instance_id(id)) return ST_CONTINUE; + if (RSTRING_PTR(str)[0] == '-') { /* first element */ + RSTRING_PTR(str)[0] = '#'; + rb_str_cat2(str, " "); + } + else { + rb_str_cat2(str, ", "); + } + ivname = rb_id2name(id); + rb_str_cat2(str, ivname); + rb_str_cat2(str, "="); + str2 = rb_inspect(value); + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + + return ST_CONTINUE; +} + +static VALUE +inspect_obj(VALUE obj, VALUE str, int recur) +{ + if (recur) { + rb_str_cat2(str, " ..."); + } + else { + rb_ivar_foreach(obj, inspect_i, str); + } + rb_str_cat2(str, ">"); + RSTRING_PTR(str)[0] = '#'; + OBJ_INFECT(str, obj); + + return str; +} + +/* + * call-seq: + * obj.inspect => string + * + * Returns a string containing a human-readable representation of + * obj. If not overridden, uses the to_s method to + * generate the string. + * + * [ 1, 2, 3..4, 'five' ].inspect #=> "[1, 2, 3..4, \"five\"]" + * Time.new.inspect #=> "2008-03-08 19:43:39 +0900" + */ + + +static VALUE +rb_obj_inspect(VALUE obj) +{ + + if (TYPE(obj) == T_OBJECT) { + int has_ivar = 0; + VALUE *ptr = ROBJECT_IVPTR(obj); + long len = ROBJECT_NUMIV(obj); + long i; + + for (i = 0; i < len; i++) { + if (ptr[i] != Qundef) { + has_ivar = 1; + break; + } + } + + if (has_ivar) { + VALUE str; + const char *c = rb_obj_classname(obj); + + str = rb_sprintf("-<%s:%p", c, (void*)obj); + return rb_exec_recursive(inspect_obj, obj, str); + } + } + return rb_funcall(obj, rb_intern("to_s"), 0, 0); +} + + +/* + * call-seq: + * obj.instance_of?(class) => true or false + * + * Returns true if obj is an instance of the given + * class. See also Object#kind_of?. + */ + +VALUE +rb_obj_is_instance_of(VALUE obj, VALUE c) +{ + switch (TYPE(c)) { + case T_MODULE: + case T_CLASS: + case T_ICLASS: + break; + default: + rb_raise(rb_eTypeError, "class or module required"); + } + + if (rb_obj_class(obj) == c) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * obj.is_a?(class) => true or false + * obj.kind_of?(class) => true or false + * + * Returns true if class is the class of + * obj, or if class is one of the superclasses of + * obj or modules included in obj. + * + * module M; end + * class A + * include M + * end + * class B < A; end + * class C < B; end + * b = B.new + * b.instance_of? A #=> false + * b.instance_of? B #=> true + * b.instance_of? C #=> false + * b.instance_of? M #=> false + * b.kind_of? A #=> true + * b.kind_of? B #=> true + * b.kind_of? C #=> false + * b.kind_of? M #=> true + */ + +VALUE +rb_obj_is_kind_of(VALUE obj, VALUE c) +{ + VALUE cl = CLASS_OF(obj); + + switch (TYPE(c)) { + case T_MODULE: + case T_CLASS: + case T_ICLASS: + break; + + default: + rb_raise(rb_eTypeError, "class or module required"); + } + + while (cl) { + if (cl == c || RCLASS_M_TBL(cl) == RCLASS_M_TBL(c)) + return Qtrue; + cl = RCLASS_SUPER(cl); + } + return Qfalse; +} + + +/* + * call-seq: + * obj.tap{|x|...} => obj + * + * Yields x to the block, and then returns x. + * The primary purpose of this method is to "tap into" a method chain, + * in order to perform operations on intermediate results within the chain. + * + * (1..10) .tap {|x| puts "original: #{x.inspect}"} + * .to_a .tap {|x| puts "array: #{x.inspect}"} + * .select {|x| x%2==0} .tap {|x| puts "evens: #{x.inspect}"} + * .map { |x| x*x } .tap {|x| puts "squares: #{x.inspect}"} + * + */ + +VALUE +rb_obj_tap(VALUE obj) +{ + rb_yield(obj); + return obj; +} + + +/* + * Document-method: inherited + * + * call-seq: + * inherited(subclass) + * + * Callback invoked whenever a subclass of the current class is created. + * + * Example: + * + * class Foo + * def self.inherited(subclass) + * puts "New subclass: #{subclass}" + * end + * end + * + * class Bar < Foo + * end + * + * class Baz < Bar + * end + * + * produces: + * + * New subclass: Bar + * New subclass: Baz + */ + +/* + * Document-method: singleton_method_added + * + * call-seq: + * singleton_method_added(symbol) + * + * Invoked as a callback whenever a singleton method is added to the + * receiver. + * + * module Chatty + * def Chatty.singleton_method_added(id) + * puts "Adding #{id.id2name}" + * end + * def self.one() end + * def two() end + * def Chatty.three() end + * end + * + * produces: + * + * Adding singleton_method_added + * Adding one + * Adding three + * + */ + +/* + * Document-method: singleton_method_removed + * + * call-seq: + * singleton_method_removed(symbol) + * + * Invoked as a callback whenever a singleton method is removed from + * the receiver. + * + * module Chatty + * def Chatty.singleton_method_removed(id) + * puts "Removing #{id.id2name}" + * end + * def self.one() end + * def two() end + * def Chatty.three() end + * class <produces: + * + * Removing three + * Removing one + */ + +/* + * Document-method: singleton_method_undefined + * + * call-seq: + * singleton_method_undefined(symbol) + * + * Invoked as a callback whenever a singleton method is undefined in + * the receiver. + * + * module Chatty + * def Chatty.singleton_method_undefined(id) + * puts "Undefining #{id.id2name}" + * end + * def Chatty.one() end + * class << self + * undef_method(:one) + * end + * end + * + * produces: + * + * Undefining one + */ + + +/* + * Document-method: included + * + * call-seq: + * included( othermod ) + * + * Callback invoked whenever the receiver is included in another + * module or class. This should be used in preference to + * Module.append_features if your code wants to perform some + * action when a module is included in another. + * + * module A + * def A.included(mod) + * puts "#{self} included in #{mod}" + * end + * end + * module Enumerable + * include A + * end + */ + + +/* + * Not documented + */ + +static VALUE +rb_obj_dummy(void) +{ + return Qnil; +} + +/* + * call-seq: + * obj.tainted? => true or false + * + * Returns true if the object is tainted. + */ + +VALUE +rb_obj_tainted(VALUE obj) +{ + if (OBJ_TAINTED(obj)) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj.taint -> obj + * + * Marks obj as tainted---if the $SAFE level is + * set appropriately, many method calls which might alter the running + * programs environment will refuse to accept tainted strings. + */ + +VALUE +rb_obj_taint(VALUE obj) +{ + rb_secure(4); + if (!OBJ_TAINTED(obj)) { + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + OBJ_TAINT(obj); + } + return obj; +} + + +/* + * call-seq: + * obj.untaint => obj + * + * Removes the taint from obj. + */ + +VALUE +rb_obj_untaint(VALUE obj) +{ + rb_secure(3); + if (OBJ_TAINTED(obj)) { + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + FL_UNSET(obj, FL_TAINT); + } + return obj; +} + +/* + * call-seq: + * obj.untrusted? => true or false + * + * Returns true if the object is untrusted. + */ + +VALUE +rb_obj_untrusted(VALUE obj) +{ + if (OBJ_UNTRUSTED(obj)) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj.untrust -> obj + * + * Marks obj as untrusted. + */ + +VALUE +rb_obj_untrust(VALUE obj) +{ + rb_secure(4); + if (!OBJ_UNTRUSTED(obj)) { + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + OBJ_UNTRUST(obj); + } + return obj; +} + + +/* + * call-seq: + * obj.trust => obj + * + * Removes the untrusted mark from obj. + */ + +VALUE +rb_obj_trust(VALUE obj) +{ + rb_secure(3); + if (OBJ_UNTRUSTED(obj)) { + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + FL_UNSET(obj, FL_UNTRUSTED); + } + return obj; +} + +void +rb_obj_infect(VALUE obj1, VALUE obj2) +{ + OBJ_INFECT(obj1, obj2); +} + +static st_table *immediate_frozen_tbl = 0; + +/* + * call-seq: + * obj.freeze => obj + * + * Prevents further modifications to obj. A + * RuntimeError will be raised if modification is attempted. + * There is no way to unfreeze a frozen object. See also + * Object#frozen?. + * + * a = [ "a", "b", "c" ] + * a.freeze + * a << "z" + * + * produces: + * + * prog.rb:3:in `<<': can't modify frozen array (RuntimeError) + * from prog.rb:3 + */ + +VALUE +rb_obj_freeze(VALUE obj) +{ + if (!OBJ_FROZEN(obj)) { + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(obj)) { + rb_raise(rb_eSecurityError, "Insecure: can't freeze object"); + } + OBJ_FREEZE(obj); + if (SPECIAL_CONST_P(obj)) { + if (!immediate_frozen_tbl) { + immediate_frozen_tbl = st_init_numtable(); + } + st_insert(immediate_frozen_tbl, obj, (st_data_t)Qtrue); + } + } + return obj; +} + +/* + * call-seq: + * obj.frozen? => true or false + * + * Returns the freeze status of obj. + * + * a = [ "a", "b", "c" ] + * a.freeze #=> ["a", "b", "c"] + * a.frozen? #=> true + */ + +VALUE +rb_obj_frozen_p(VALUE obj) +{ + if (OBJ_FROZEN(obj)) return Qtrue; + if (SPECIAL_CONST_P(obj)) { + if (!immediate_frozen_tbl) return Qfalse; + if (st_lookup(immediate_frozen_tbl, obj, 0)) return Qtrue; + } + return Qfalse; +} + + +/* + * Document-class: NilClass + * + * The class of the singleton object nil. + */ + +/* + * call-seq: + * nil.to_i => 0 + * + * Always returns zero. + * + * nil.to_i #=> 0 + */ + + +static VALUE +nil_to_i(VALUE obj) +{ + return INT2FIX(0); +} + +/* + * call-seq: + * nil.to_f => 0.0 + * + * Always returns zero. + * + * nil.to_f #=> 0.0 + */ + +static VALUE +nil_to_f(VALUE obj) +{ + return DBL2NUM(0.0); +} + +/* + * call-seq: + * nil.to_s => "" + * + * Always returns the empty string. + */ + +static VALUE +nil_to_s(VALUE obj) +{ + return rb_usascii_str_new(0, 0); +} + +/* + * Document-method: to_a + * + * call-seq: + * nil.to_a => [] + * + * Always returns an empty array. + * + * nil.to_a #=> [] + */ + +static VALUE +nil_to_a(VALUE obj) +{ + return rb_ary_new2(0); +} + +/* + * call-seq: + * nil.inspect => "nil" + * + * Always returns the string "nil". + */ + +static VALUE +nil_inspect(VALUE obj) +{ + return rb_usascii_str_new2("nil"); +} + +/*********************************************************************** + * Document-class: TrueClass + * + * The global value true is the only instance of class + * TrueClass and represents a logically true value in + * boolean expressions. The class provides operators allowing + * true to be used in logical expressions. + */ + + +/* + * call-seq: + * true.to_s => "true" + * + * The string representation of true is "true". + */ + +static VALUE +true_to_s(VALUE obj) +{ + return rb_usascii_str_new2("true"); +} + + +/* + * call-seq: + * true & obj => true or false + * + * And---Returns false if obj is + * nil or false, true otherwise. + */ + +static VALUE +true_and(VALUE obj, VALUE obj2) +{ + return RTEST(obj2)?Qtrue:Qfalse; +} + +/* + * call-seq: + * true | obj => true + * + * Or---Returns true. As anObject is an argument to + * a method call, it is always evaluated; there is no short-circuit + * evaluation in this case. + * + * true | puts("or") + * true || puts("logical or") + * + * produces: + * + * or + */ + +static VALUE +true_or(VALUE obj, VALUE obj2) +{ + return Qtrue; +} + + +/* + * call-seq: + * true ^ obj => !obj + * + * Exclusive Or---Returns true if obj is + * nil or false, false + * otherwise. + */ + +static VALUE +true_xor(VALUE obj, VALUE obj2) +{ + return RTEST(obj2)?Qfalse:Qtrue; +} + + +/* + * Document-class: FalseClass + * + * The global value false is the only instance of class + * FalseClass and represents a logically false value in + * boolean expressions. The class provides operators allowing + * false to participate correctly in logical expressions. + * + */ + +/* + * call-seq: + * false.to_s => "false" + * + * 'nuf said... + */ + +static VALUE +false_to_s(VALUE obj) +{ + return rb_usascii_str_new2("false"); +} + +/* + * call-seq: + * false & obj => false + * nil & obj => false + * + * And---Returns false. obj is always + * evaluated as it is the argument to a method call---there is no + * short-circuit evaluation in this case. + */ + +static VALUE +false_and(VALUE obj, VALUE obj2) +{ + return Qfalse; +} + + +/* + * call-seq: + * false | obj => true or false + * nil | obj => true or false + * + * Or---Returns false if obj is + * nil or false; true otherwise. + */ + +static VALUE +false_or(VALUE obj, VALUE obj2) +{ + return RTEST(obj2)?Qtrue:Qfalse; +} + + + +/* + * call-seq: + * false ^ obj => true or false + * nil ^ obj => true or false + * + * Exclusive Or---If obj is nil or + * false, returns false; otherwise, returns + * true. + * + */ + +static VALUE +false_xor(VALUE obj, VALUE obj2) +{ + return RTEST(obj2)?Qtrue:Qfalse; +} + +/* + * call_seq: + * nil.nil? => true + * + * Only the object nil responds true to nil?. + */ + +static VALUE +rb_true(VALUE obj) +{ + return Qtrue; +} + +/* + * call_seq: + * nil.nil? => true + * .nil? => false + * + * Only the object nil responds true to nil?. + */ + + +static VALUE +rb_false(VALUE obj) +{ + return Qfalse; +} + + +/* + * call-seq: + * obj =~ other => nil + * + * Pattern Match---Overridden by descendents (notably + * Regexp and String) to provide meaningful + * pattern-match semantics. + */ + +static VALUE +rb_obj_match(VALUE obj1, VALUE obj2) +{ + return Qnil; +} + +/* + * call-seq: + * obj !~ other => true or false + * + * Returns true if two objects do not match (using the =~ + * method), otherwise false. + */ + +static VALUE +rb_obj_not_match(VALUE obj1, VALUE obj2) +{ + VALUE result = rb_funcall(obj1, id_match, 1, obj2); + return RTEST(result) ? Qfalse : Qtrue; +} + + +/*********************************************************************** + * + * Document-class: Module + * + * A Module is a collection of methods and constants. The + * methods in a module may be instance methods or module methods. + * Instance methods appear as methods in a class when the module is + * included, module methods do not. Conversely, module methods may be + * called without creating an encapsulating object, while instance + * methods may not. (See Module#module_function) + * + * In the descriptions that follow, the parameter syml refers + * to a symbol, which is either a quoted string or a + * Symbol (such as :name). + * + * module Mod + * include Math + * CONST = 1 + * def meth + * # ... + * end + * end + * Mod.class #=> Module + * Mod.constants #=> [:CONST, :PI, :E] + * Mod.instance_methods #=> [:meth] + * + */ + +/* + * call-seq: + * mod.to_s => string + * + * Return a string representing this module or class. For basic + * classes and modules, this is the name. For singletons, we + * show information on the thing we're attached to as well. + */ + +static VALUE +rb_mod_to_s(VALUE klass) +{ + if (FL_TEST(klass, FL_SINGLETON)) { + VALUE s = rb_usascii_str_new2("#<"); + VALUE v = rb_iv_get(klass, "__attached__"); + + rb_str_cat2(s, "Class:"); + switch (TYPE(v)) { + case T_CLASS: case T_MODULE: + rb_str_append(s, rb_inspect(v)); + break; + default: + rb_str_append(s, rb_any_to_s(v)); + break; + } + rb_str_cat2(s, ">"); + + return s; + } + return rb_str_dup(rb_class_name(klass)); +} + +/* + * call-seq: + * mod.freeze + * + * Prevents further modifications to mod. + */ + +static VALUE +rb_mod_freeze(VALUE mod) +{ + rb_class_name(mod); + return rb_obj_freeze(mod); +} + +/* + * call-seq: + * mod === obj => true or false + * + * Case Equality---Returns true if anObject is an + * instance of mod or one of mod's descendents. Of + * limited use for modules, but can be used in case + * statements to classify objects by class. + */ + +static VALUE +rb_mod_eqq(VALUE mod, VALUE arg) +{ + return rb_obj_is_kind_of(arg, mod); +} + +/* + * call-seq: + * mod <= other => true, false, or nil + * + * Returns true if mod is a subclass of other or + * is the same as other. Returns + * nil if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class A arg */ + while (arg) { + if (RCLASS_M_TBL(arg) == RCLASS_M_TBL(start)) + return Qfalse; + arg = RCLASS_SUPER(arg); + } + return Qnil; +} + +/* + * call-seq: + * mod < other => true, false, or nil + * + * Returns true if mod is a subclass of other. Returns + * nil if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class A= other => true, false, or nil + * + * Returns true if mod is an ancestor of other, or the + * two modules are the same. Returns + * nil if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class AA"). + * + */ + +static VALUE +rb_mod_ge(VALUE mod, VALUE arg) +{ + switch (TYPE(arg)) { + case T_MODULE: + case T_CLASS: + break; + default: + rb_raise(rb_eTypeError, "compared with non class/module"); + } + + return rb_class_inherited_p(arg, mod); +} + +/* + * call-seq: + * mod > other => true, false, or nil + * + * Returns true if mod is an ancestor of other. Returns + * nil if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class AA"). + * + */ + +static VALUE +rb_mod_gt(VALUE mod, VALUE arg) +{ + if (mod == arg) return Qfalse; + return rb_mod_ge(mod, arg); +} + +/* + * call-seq: + * mod <=> other_mod => -1, 0, +1, or nil + * + * Comparison---Returns -1 if mod includes other_mod, 0 if + * mod is the same as other_mod, and +1 if mod is + * included by other_mod. Returns nil if mod + * has no relationship with other_mod or if other_mod is + * not a module. + */ + +static VALUE +rb_mod_cmp(VALUE mod, VALUE arg) +{ + VALUE cmp; + + if (mod == arg) return INT2FIX(0); + switch (TYPE(arg)) { + case T_MODULE: + case T_CLASS: + break; + default: + return Qnil; + } + + cmp = rb_class_inherited_p(mod, arg); + if (NIL_P(cmp)) return Qnil; + if (cmp) { + return INT2FIX(-1); + } + return INT2FIX(1); +} + +static VALUE +rb_module_s_alloc(VALUE klass) +{ + VALUE mod = rb_module_new(); + + RBASIC(mod)->klass = klass; + return mod; +} + +static VALUE +rb_class_s_alloc(VALUE klass) +{ + return rb_class_boot(0); +} + +/* + * call-seq: + * Module.new => mod + * Module.new {|mod| block } => mod + * + * Creates a new anonymous module. If a block is given, it is passed + * the module object, and the block is evaluated in the context of this + * module using module_eval. + * + * Fred = Module.new do + * def meth1 + * "hello" + * end + * def meth2 + * "bye" + * end + * end + * a = "my string" + * a.extend(Fred) #=> "my string" + * a.meth1 #=> "hello" + * a.meth2 #=> "bye" + */ + +static VALUE +rb_mod_initialize(VALUE module) +{ + extern VALUE rb_mod_module_exec(int argc, VALUE *argv, VALUE mod); + + if (rb_block_given_p()) { + rb_mod_module_exec(1, &module, module); + } + return Qnil; +} + +/* + * call-seq: + * Class.new(super_class=Object) => a_class + * + * Creates a new anonymous (unnamed) class with the given superclass + * (or Object if no parameter is given). You can give a + * class a name by assigning the class object to a constant. + * + */ + +static VALUE +rb_class_initialize(int argc, VALUE *argv, VALUE klass) +{ + VALUE super; + + if (RCLASS_SUPER(klass) != 0) { + rb_raise(rb_eTypeError, "already initialized class"); + } + if (argc == 0) { + super = rb_cObject; + } + else { + rb_scan_args(argc, argv, "01", &super); + rb_check_inheritable(super); + } + RCLASS_SUPER(klass) = super; + rb_make_metaclass(klass, RBASIC(super)->klass); + rb_class_inherited(super, klass); + rb_mod_initialize(klass); + + return klass; +} + +/* + * call-seq: + * class.allocate() => obj + * + * Allocates space for a new object of class's class and does not + * call initialize on the new instance. The returned object must be an + * instance of class. + * + * klass = Class.new do + * def initialize(*args) + * @initialized = true + * end + * + * def initialized? + * @initialized || false + * end + * end + * + * klass.allocate.initialized? #=> false + * + */ + +VALUE +rb_obj_alloc(VALUE klass) +{ + VALUE obj; + + if (RCLASS_SUPER(klass) == 0 && klass != rb_cBasicObject) { + rb_raise(rb_eTypeError, "can't instantiate uninitialized class"); + } + if (FL_TEST(klass, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "can't create instance of singleton class"); + } + obj = rb_funcall(klass, ID_ALLOCATOR, 0, 0); + if (rb_obj_class(obj) != rb_class_real(klass)) { + rb_raise(rb_eTypeError, "wrong instance allocation"); + } + return obj; +} + +static VALUE +rb_class_allocate_instance(VALUE klass) +{ + NEWOBJ(obj, struct RObject); + OBJSETUP(obj, klass, T_OBJECT); + return (VALUE)obj; +} + +/* + * call-seq: + * class.new(args, ...) => obj + * + * Calls allocate to create a new object of + * class's class, then invokes that object's + * initialize method, passing it args. + * This is the method that ends up getting called whenever + * an object is constructed using .new. + * + */ + +VALUE +rb_class_new_instance(int argc, VALUE *argv, VALUE klass) +{ + VALUE obj; + + obj = rb_obj_alloc(klass); + rb_obj_call_init(obj, argc, argv); + + return obj; +} + +/* + * call-seq: + * class.superclass -> a_super_class or nil + * + * Returns the superclass of class, or nil. + * + * File.superclass #=> IO + * IO.superclass #=> Object + * Object.superclass #=> BasicObject + * class Foo; end + * class Bar < Foo; end + * Bar.superclass #=> Foo + * + * returns nil when the given class hasn't a parent class: + * + * BasicObject.superclass #=> nil + * + */ + +static VALUE +rb_class_superclass(VALUE klass) +{ + VALUE super = RCLASS_SUPER(klass); + + if (!super) { + if (klass == rb_cBasicObject) return Qnil; + rb_raise(rb_eTypeError, "uninitialized class"); + } + while (TYPE(super) == T_ICLASS) { + super = RCLASS_SUPER(super); + } + if (!super) { + return Qnil; + } + return super; +} + +/* + * call-seq: + * attr_reader(symbol, ...) => nil + * attr(symbol, ...) => nil + * + * Creates instance variables and corresponding methods that return the + * value of each instance variable. Equivalent to calling + * ``attr:name'' on each name in turn. + */ + +static VALUE +rb_mod_attr_reader(int argc, VALUE *argv, VALUE klass) +{ + int i; + + for (i=0; i nil + * + * Creates an accessor method to allow assignment to the attribute + * aSymbol.id2name. + */ + +static VALUE +rb_mod_attr_writer(int argc, VALUE *argv, VALUE klass) +{ + int i; + + for (i=0; i nil + * + * Defines a named attribute for this module, where the name is + * symbol.id2name, creating an instance variable + * (@name) and a corresponding access method to read it. + * Also creates a method called name= to set the attribute. + * + * module Mod + * attr_accessor(:one, :two) + * end + * Mod.instance_methods.sort #=> [:one, :one=, :two, :two=] + */ + +static VALUE +rb_mod_attr_accessor(int argc, VALUE *argv, VALUE klass) +{ + int i; + + for (i=0; i obj + * + * Returns the value of the named constant in mod. + * + * Math.const_get(:PI) #=> 3.14159265358979 + * + * If the constant is not defined or is defined by the ancestors and + * +inherit+ is false, +NameError+ will be raised. + */ + +static VALUE +rb_mod_const_get(int argc, VALUE *argv, VALUE mod) +{ + VALUE name, recur; + ID id; + + if (argc == 1) { + name = argv[0]; + recur = Qtrue; + } + else { + rb_scan_args(argc, argv, "11", &name, &recur); + } + id = rb_to_id(name); + if (!rb_is_const_id(id)) { + rb_name_error(id, "wrong constant name %s", rb_id2name(id)); + } + return RTEST(recur) ? rb_const_get(mod, id) : rb_const_get_at(mod, id); +} + +/* + * call-seq: + * mod.const_set(sym, obj) => obj + * + * Sets the named constant to the given object, returning that object. + * Creates a new constant if no constant with the given name previously + * existed. + * + * Math.const_set("HIGH_SCHOOL_PI", 22.0/7.0) #=> 3.14285714285714 + * Math::HIGH_SCHOOL_PI - Math::PI #=> 0.00126448926734968 + */ + +static VALUE +rb_mod_const_set(VALUE mod, VALUE name, VALUE value) +{ + ID id = rb_to_id(name); + + if (!rb_is_const_id(id)) { + rb_name_error(id, "wrong constant name %s", rb_id2name(id)); + } + rb_const_set(mod, id, value); + return value; +} + +/* + * call-seq: + * mod.const_defined?(sym, inherit=true) => true or false + * + * Returns true if a constant with the given name is + * defined by mod, or its ancestors if +inherit+ is not false. + * + * Math.const_defined? "PI" #=> true + * IO.const_defined? "SYNC" #=> true + * IO.const_defined? "SYNC", false #=> false + */ + +static VALUE +rb_mod_const_defined(int argc, VALUE *argv, VALUE mod) +{ + VALUE name, recur; + ID id; + + if (argc == 1) { + name = argv[0]; + recur = Qtrue; + } + else { + rb_scan_args(argc, argv, "11", &name, &recur); + } + id = rb_to_id(name); + if (!rb_is_const_id(id)) { + rb_name_error(id, "wrong constant name %s", rb_id2name(id)); + } + return RTEST(recur) ? rb_const_defined(mod, id) : rb_const_defined_at(mod, id); +} + +/* + * call-seq: + * obj.methods => array + * + * Returns a list of the names of methods publicly accessible in + * obj. This will include all the methods accessible in + * obj's ancestors. + * + * class Klass + * def kMethod() + * end + * end + * k = Klass.new + * k.methods[0..9] #=> ["kMethod", "freeze", "nil?", "is_a?", + * # "class", "instance_variable_set", + * # "methods", "extend", "__send__", "instance_eval"] + * k.methods.length #=> 42 + */ + +static VALUE +rb_obj_methods(int argc, VALUE *argv, VALUE obj) +{ + retry: + if (argc == 0) { + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_instance_methods(1, args, CLASS_OF(obj)); + } + else { + VALUE recur; + + rb_scan_args(argc, argv, "1", &recur); + if (RTEST(recur)) { + argc = 0; + goto retry; + } + return rb_obj_singleton_methods(argc, argv, obj); + } +} + +/* + * call-seq: + * obj.protected_methods(all=true) => array + * + * Returns the list of protected methods accessible to obj. If + * the all parameter is set to false, only those methods + * in the receiver will be listed. + */ + +static VALUE +rb_obj_protected_methods(int argc, VALUE *argv, VALUE obj) +{ + if (argc == 0) { /* hack to stop warning */ + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_protected_instance_methods(1, args, CLASS_OF(obj)); + } + return rb_class_protected_instance_methods(argc, argv, CLASS_OF(obj)); +} + +/* + * call-seq: + * obj.private_methods(all=true) => array + * + * Returns the list of private methods accessible to obj. If + * the all parameter is set to false, only those methods + * in the receiver will be listed. + */ + +static VALUE +rb_obj_private_methods(int argc, VALUE *argv, VALUE obj) +{ + if (argc == 0) { /* hack to stop warning */ + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_private_instance_methods(1, args, CLASS_OF(obj)); + } + return rb_class_private_instance_methods(argc, argv, CLASS_OF(obj)); +} + +/* + * call-seq: + * obj.public_methods(all=true) => array + * + * Returns the list of public methods accessible to obj. If + * the all parameter is set to false, only those methods + * in the receiver will be listed. + */ + +static VALUE +rb_obj_public_methods(int argc, VALUE *argv, VALUE obj) +{ + if (argc == 0) { /* hack to stop warning */ + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_public_instance_methods(1, args, CLASS_OF(obj)); + } + return rb_class_public_instance_methods(argc, argv, CLASS_OF(obj)); +} + +/* + * call-seq: + * obj.instance_variable_get(symbol) => obj + * + * Returns the value of the given instance variable, or nil if the + * instance variable is not set. The @ part of the + * variable name should be included for regular instance + * variables. Throws a NameError exception if the + * supplied symbol is not valid as an instance variable name. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_get(:@a) #=> "cat" + * fred.instance_variable_get("@b") #=> 99 + */ + +static VALUE +rb_obj_ivar_get(VALUE obj, VALUE iv) +{ + ID id = rb_to_id(iv); + + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + return rb_ivar_get(obj, id); +} + +/* + * call-seq: + * obj.instance_variable_set(symbol, obj) => obj + * + * Sets the instance variable names by symbol to + * object, thereby frustrating the efforts of the class's + * author to attempt to provide proper encapsulation. The variable + * did not have to exist prior to this call. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_set(:@a, 'dog') #=> "dog" + * fred.instance_variable_set(:@c, 'cat') #=> "cat" + * fred.inspect #=> "#" + */ + +static VALUE +rb_obj_ivar_set(VALUE obj, VALUE iv, VALUE val) +{ + ID id = rb_to_id(iv); + + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + return rb_ivar_set(obj, id, val); +} + +/* + * call-seq: + * obj.instance_variable_defined?(symbol) => true or false + * + * Returns true if the given instance variable is + * defined in obj. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_defined?(:@a) #=> true + * fred.instance_variable_defined?("@b") #=> true + * fred.instance_variable_defined?("@c") #=> false + */ + +static VALUE +rb_obj_ivar_defined(VALUE obj, VALUE iv) +{ + ID id = rb_to_id(iv); + + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + return rb_ivar_defined(obj, id); +} + +/* + * call-seq: + * mod.class_variable_get(symbol) => obj + * + * Returns the value of the given class variable (or throws a + * NameError exception). The @@ part of the + * variable name should be included for regular class variables + * + * class Fred + * @@foo = 99 + * end + * Fred.class_variable_get(:@@foo) #=> 99 + */ + +static VALUE +rb_mod_cvar_get(VALUE obj, VALUE iv) +{ + ID id = rb_to_id(iv); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "`%s' is not allowed as a class variable name", rb_id2name(id)); + } + return rb_cvar_get(obj, id); +} + +/* + * call-seq: + * obj.class_variable_set(symbol, obj) => obj + * + * Sets the class variable names by symbol to + * object. + * + * class Fred + * @@foo = 99 + * def foo + * @@foo + * end + * end + * Fred.class_variable_set(:@@foo, 101) #=> 101 + * Fred.new.foo #=> 101 + */ + +static VALUE +rb_mod_cvar_set(VALUE obj, VALUE iv, VALUE val) +{ + ID id = rb_to_id(iv); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "`%s' is not allowed as a class variable name", rb_id2name(id)); + } + rb_cvar_set(obj, id, val); + return val; +} + +/* + * call-seq: + * obj.class_variable_defined?(symbol) => true or false + * + * Returns true if the given class variable is defined + * in obj. + * + * class Fred + * @@foo = 99 + * end + * Fred.class_variable_defined?(:@@foo) #=> true + * Fred.class_variable_defined?(:@@bar) #=> false + */ + +static VALUE +rb_mod_cvar_defined(VALUE obj, VALUE iv) +{ + ID id = rb_to_id(iv); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "`%s' is not allowed as a class variable name", rb_id2name(id)); + } + return rb_cvar_defined(obj, id); +} + +static struct conv_method_tbl { + const char *method; + ID id; +} conv_method_names[] = { + {"to_int", 0}, + {"to_ary", 0}, + {"to_str", 0}, + {"to_sym", 0}, + {"to_hash", 0}, + {"to_proc", 0}, + {"to_io", 0}, + {"to_a", 0}, + {"to_s", 0}, + {NULL, 0} +}; + +static VALUE +convert_type(VALUE val, const char *tname, const char *method, int raise) +{ + ID m = 0; + int i; + + for (i=0; conv_method_names[i].method; i++) { + if (conv_method_names[i].method[0] == method[0] && + strcmp(conv_method_names[i].method, method) == 0) { + m = conv_method_names[i].id; + break; + } + } + if (!m) m = rb_intern(method); + if (!rb_respond_to(val, m)) { + if (raise) { + rb_raise(rb_eTypeError, "can't convert %s into %s", + NIL_P(val) ? "nil" : + val == Qtrue ? "true" : + val == Qfalse ? "false" : + rb_obj_classname(val), + tname); + } + else { + return Qnil; + } + } + return rb_funcall(val, m, 0); +} + +VALUE +rb_convert_type(VALUE val, int type, const char *tname, const char *method) +{ + VALUE v; + + if (TYPE(val) == type) return val; + v = convert_type(val, tname, method, Qtrue); + if (TYPE(v) != type) { + const char *cname = rb_obj_classname(val); + rb_raise(rb_eTypeError, "can't convert %s to %s (%s#%s gives %s)", + cname, tname, cname, method, rb_obj_classname(v)); + } + return v; +} + +VALUE +rb_check_convert_type(VALUE val, int type, const char *tname, const char *method) +{ + VALUE v; + + /* always convert T_DATA */ + if (TYPE(val) == type && type != T_DATA) return val; + v = convert_type(val, tname, method, Qfalse); + if (NIL_P(v)) return Qnil; + if (TYPE(v) != type) { + const char *cname = rb_obj_classname(val); + rb_raise(rb_eTypeError, "can't convert %s to %s (%s#%s gives %s)", + cname, tname, cname, method, rb_obj_classname(v)); + } + return v; +} + + +static VALUE +rb_to_integer(VALUE val, const char *method) +{ + VALUE v; + + if (FIXNUM_P(val)) return val; + v = convert_type(val, "Integer", method, Qtrue); + if (!rb_obj_is_kind_of(v, rb_cInteger)) { + const char *cname = rb_obj_classname(val); + rb_raise(rb_eTypeError, "can't convert %s to Integer (%s#%s gives %s)", + cname, cname, method, rb_obj_classname(v)); + } + return v; +} + +VALUE +rb_check_to_integer(VALUE val, const char *method) +{ + VALUE v; + + if (FIXNUM_P(val)) return val; + v = convert_type(val, "Integer", method, Qfalse); + if (!rb_obj_is_kind_of(v, rb_cInteger)) { + return Qnil; + } + return v; +} + +VALUE +rb_to_int(VALUE val) +{ + return rb_to_integer(val, "to_int"); +} + +VALUE +rb_Integer(VALUE val) +{ + VALUE tmp; + + switch (TYPE(val)) { + case T_FLOAT: + if (RFLOAT_VALUE(val) <= (double)FIXNUM_MAX + && RFLOAT_VALUE(val) >= (double)FIXNUM_MIN) { + break; + } + return rb_dbl2big(RFLOAT_VALUE(val)); + + case T_FIXNUM: + case T_BIGNUM: + return val; + + case T_STRING: + return rb_str_to_inum(val, 0, Qtrue); + + case T_NIL: + rb_raise(rb_eTypeError, "can't convert nil into Integer"); + break; + + default: + break; + } + tmp = convert_type(val, "Integer", "to_int", Qfalse); + if (NIL_P(tmp)) { + return rb_to_integer(val, "to_i"); + } + return tmp; +} + +/* + * call-seq: + * Integer(arg) => integer + * + * Converts arg to a Fixnum or Bignum. + * Numeric types are converted directly (with floating point numbers + * being truncated). If arg is a String, leading + * radix indicators (0, 0b, and + * 0x) are honored. Others are converted using + * to_int and to_i. This behavior is + * different from that of String#to_i. + * + * Integer(123.999) #=> 123 + * Integer("0x1a") #=> 26 + * Integer(Time.new) #=> 1204973019 + */ + +static VALUE +rb_f_integer(VALUE obj, VALUE arg) +{ + return rb_Integer(arg); +} + +double +rb_cstr_to_dbl(const char *p, int badcheck) +{ + const char *q; + char *end; + double d; + const char *ellipsis = ""; + int w; +#define OutOfRange() (((w = end - p) > 20) ? (w = 20, ellipsis = "...") : (ellipsis = "")) + + if (!p) return 0.0; + q = p; + while (ISSPACE(*p)) p++; + d = strtod(p, &end); + if (errno == ERANGE) { + OutOfRange(); + rb_warning("Float %.*s%s out of range", w, p, ellipsis); + errno = 0; + } + if (p == end) { + if (badcheck) { + bad: + rb_invalid_str(q, "Float()"); + } + return d; + } + if (*end) { + char buf[DBL_DIG * 4 + 10]; + char *n = buf; + char *e = buf + sizeof(buf) - 1; + char prev = 0; + + while (p < end && n < e) prev = *n++ = *p++; + while (*p) { + if (*p == '_') { + /* remove underscores between digits */ + if (badcheck) { + if (n == buf || !ISDIGIT(prev)) goto bad; + ++p; + if (!ISDIGIT(*p)) goto bad; + } + else { + while (*++p == '_'); + continue; + } + } + prev = *p++; + if (n < e) *n++ = prev; + } + *n = '\0'; + p = buf; + d = strtod(p, &end); + if (errno == ERANGE) { + OutOfRange(); + rb_warning("Float %.*s%s out of range", w, p, ellipsis); + errno = 0; + } + if (badcheck) { + if (!end || p == end) goto bad; + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; + } + } + if (errno == ERANGE) { + errno = 0; + OutOfRange(); + rb_raise(rb_eArgError, "Float %.*s%s out of range", w, q, ellipsis); + } + return d; +} + +double +rb_str_to_dbl(VALUE str, int badcheck) +{ + char *s; + long len; + + StringValue(str); + s = RSTRING_PTR(str); + len = RSTRING_LEN(str); + if (s) { + if (s[len]) { /* no sentinel somehow */ + char *p = ALLOCA_N(char, len+1); + + MEMCPY(p, s, char, len); + p[len] = '\0'; + s = p; + } + if (badcheck && len != strlen(s)) { + rb_raise(rb_eArgError, "string for Float contains null byte"); + } + } + return rb_cstr_to_dbl(s, badcheck); +} + +VALUE +rb_Float(VALUE val) +{ + switch (TYPE(val)) { + case T_FIXNUM: + return DBL2NUM((double)FIX2LONG(val)); + + case T_FLOAT: + return val; + + case T_BIGNUM: + return DBL2NUM(rb_big2dbl(val)); + + case T_STRING: + return DBL2NUM(rb_str_to_dbl(val, Qtrue)); + + case T_NIL: + rb_raise(rb_eTypeError, "can't convert nil into Float"); + break; + + default: + return rb_convert_type(val, T_FLOAT, "Float", "to_f"); + } +} + +/* + * call-seq: + * Float(arg) => float + * + * Returns arg converted to a float. Numeric types are converted + * directly, the rest are converted using arg.to_f. As of Ruby + * 1.8, converting nil generates a TypeError. + * + * Float(1) #=> 1.0 + * Float("123.456") #=> 123.456 + */ + +static VALUE +rb_f_float(VALUE obj, VALUE arg) +{ + return rb_Float(arg); +} + +VALUE +rb_to_float(VALUE val) +{ + if (TYPE(val) == T_FLOAT) return val; + if (!rb_obj_is_kind_of(val, rb_cNumeric)) { + rb_raise(rb_eTypeError, "can't convert %s into Float", + NIL_P(val) ? "nil" : + val == Qtrue ? "true" : + val == Qfalse ? "false" : + rb_obj_classname(val)); + } + return rb_convert_type(val, T_FLOAT, "Float", "to_f"); +} + +double +rb_num2dbl(VALUE val) +{ + switch (TYPE(val)) { + case T_FLOAT: + return RFLOAT_VALUE(val); + + case T_STRING: + rb_raise(rb_eTypeError, "no implicit conversion to float from string"); + break; + + case T_NIL: + rb_raise(rb_eTypeError, "no implicit conversion to float from nil"); + break; + + default: + break; + } + + return RFLOAT_VALUE(rb_Float(val)); +} + +char* +rb_str2cstr(VALUE str, long *len) +{ + StringValue(str); + if (len) *len = RSTRING_LEN(str); + else if (RTEST(ruby_verbose) && RSTRING_LEN(str) != strlen(RSTRING_PTR(str))) { + rb_warn("string contains \\0 character"); + } + return RSTRING_PTR(str); +} + +VALUE +rb_String(VALUE val) +{ + return rb_convert_type(val, T_STRING, "String", "to_s"); +} + + +/* + * call-seq: + * String(arg) => string + * + * Converts arg to a String by calling its + * to_s method. + * + * String(self) #=> "main" + * String(self.class) #=> "Object" + * String(123456) #=> "123456" + */ + +static VALUE +rb_f_string(VALUE obj, VALUE arg) +{ + return rb_String(arg); +} + +VALUE +rb_Array(VALUE val) +{ + VALUE tmp = rb_check_array_type(val); + + if (NIL_P(tmp)) { + tmp = rb_check_convert_type(val, T_ARRAY, "Array", "to_a"); + if (NIL_P(tmp)) { + return rb_ary_new3(1, val); + } + } + return tmp; +} + +/* + * call-seq: + * Array(arg) => array + * + * Returns arg as an Array. First tries to call + * arg.to_ary, then arg.to_a. + * + * Array(1..5) #=> [1, 2, 3, 4, 5] + */ + +static VALUE +rb_f_array(VALUE obj, VALUE arg) +{ + return rb_Array(arg); +} + +static VALUE +boot_defclass(const char *name, VALUE super) +{ + extern st_table *rb_class_tbl; + VALUE obj = rb_class_boot(super); + ID id = rb_intern(name); + + rb_name_class(obj, id); + st_add_direct(rb_class_tbl, id, obj); + rb_const_set((rb_cObject ? rb_cObject : obj), id, obj); + return obj; +} + +static void +boot_defmetametaclass(VALUE klass, VALUE metametaclass) +{ + RBASIC(RBASIC(klass)->klass)->klass = metametaclass; +} + +/* + * Document-class: Class + * + * Classes in Ruby are first-class objects---each is an instance of + * class Class. + * + * When a new class is created (typically using class Name ... + * end), an object of type Class is created and + * assigned to a global constant (Name in this case). When + * Name.new is called to create a new object, the + * new method in Class is run by default. + * This can be demonstrated by overriding new in + * Class: + * + * class Class + * alias oldNew new + * def new(*args) + * print "Creating a new ", self.name, "\n" + * oldNew(*args) + * end + * end + * + * + * class Name + * end + * + * + * n = Name.new + * + * produces: + * + * Creating a new Name + * + * Classes, modules, and objects are interrelated. In the diagram + * that follows, the vertical arrows represent inheritance, and the + * parentheses meta-classes. All metaclasses are instances + * of the class `Class'. + * + * +-----------------+ + * | | + * BasicObject-->(BasicObject) | + * ^ ^ | + * | | | + * Object---->(Object) | + * ^ ^ ^ ^ | + * | | | | | + * | | +-----+ +---------+ | + * | | | | | + * | +-----------+ | | + * | | | | | + * +------+ | Module--->(Module) | + * | | ^ ^ | + * OtherClass-->(OtherClass) | | | + * | | | + * Class---->(Class) | + * ^ | + * | | + * +----------------+ + */ + + +/* + * BasicObject is the parent class of all classes in Ruby. + * It's an explicit blank class. Object, the root of Ruby's + * class hierarchy is a direct subclass of BasicObject. Its + * methods are therefore available to all objects unless explicitly + * overridden. + * + * Object mixes in the Kernel module, making + * the built-in kernel functions globally accessible. Although the + * instance methods of Object are defined by the + * Kernel module, we have chosen to document them here for + * clarity. + * + * In the descriptions of Object's methods, the parameter symbol refers + * to a symbol, which is either a quoted string or a + * Symbol (such as :name). + */ + +void +Init_Object(void) +{ + int i; + +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + VALUE metaclass; + + rb_cBasicObject = boot_defclass("BasicObject", 0); + rb_cObject = boot_defclass("Object", rb_cBasicObject); + rb_cModule = boot_defclass("Module", rb_cObject); + rb_cClass = boot_defclass("Class", rb_cModule); + + metaclass = rb_make_metaclass(rb_cBasicObject, rb_cClass); + metaclass = rb_make_metaclass(rb_cObject, metaclass); + metaclass = rb_make_metaclass(rb_cModule, metaclass); + metaclass = rb_make_metaclass(rb_cClass, metaclass); + boot_defmetametaclass(rb_cModule, metaclass); + boot_defmetametaclass(rb_cObject, metaclass); + boot_defmetametaclass(rb_cBasicObject, metaclass); + + rb_define_private_method(rb_cBasicObject, "initialize", rb_obj_dummy, 0); + rb_define_alloc_func(rb_cBasicObject, rb_class_allocate_instance); + rb_define_method(rb_cBasicObject, "==", rb_obj_equal, 1); + rb_define_method(rb_cBasicObject, "equal?", rb_obj_equal, 1); + rb_define_method(rb_cBasicObject, "!", rb_obj_not, 0); + rb_define_method(rb_cBasicObject, "!=", rb_obj_not_equal, 1); + + rb_define_private_method(rb_cBasicObject, "singleton_method_added", rb_obj_dummy, 1); + rb_define_private_method(rb_cBasicObject, "singleton_method_removed", rb_obj_dummy, 1); + rb_define_private_method(rb_cBasicObject, "singleton_method_undefined", rb_obj_dummy, 1); + + rb_mKernel = rb_define_module("Kernel"); + rb_include_module(rb_cObject, rb_mKernel); + rb_define_private_method(rb_cClass, "inherited", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "included", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "extended", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "method_added", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "method_removed", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "method_undefined", rb_obj_dummy, 1); + + rb_define_method(rb_mKernel, "nil?", rb_false, 0); + rb_define_method(rb_mKernel, "===", rb_equal, 1); + rb_define_method(rb_mKernel, "=~", rb_obj_match, 1); + rb_define_method(rb_mKernel, "!~", rb_obj_not_match, 1); + rb_define_method(rb_mKernel, "eql?", rb_obj_equal, 1); + + rb_define_method(rb_mKernel, "class", rb_obj_class, 0); + rb_define_method(rb_mKernel, "clone", rb_obj_clone, 0); + rb_define_method(rb_mKernel, "dup", rb_obj_dup, 0); + rb_define_method(rb_mKernel, "initialize_copy", rb_obj_init_copy, 1); + + rb_define_method(rb_mKernel, "taint", rb_obj_taint, 0); + rb_define_method(rb_mKernel, "tainted?", rb_obj_tainted, 0); + rb_define_method(rb_mKernel, "untaint", rb_obj_untaint, 0); + rb_define_method(rb_mKernel, "untrust", rb_obj_untrust, 0); + rb_define_method(rb_mKernel, "untrusted?", rb_obj_untrusted, 0); + rb_define_method(rb_mKernel, "trust", rb_obj_trust, 0); + rb_define_method(rb_mKernel, "freeze", rb_obj_freeze, 0); + rb_define_method(rb_mKernel, "frozen?", rb_obj_frozen_p, 0); + + rb_define_method(rb_mKernel, "to_s", rb_any_to_s, 0); + rb_define_method(rb_mKernel, "inspect", rb_obj_inspect, 0); + rb_define_method(rb_mKernel, "methods", rb_obj_methods, -1); + rb_define_method(rb_mKernel, "singleton_methods", rb_obj_singleton_methods, -1); /* in class.c */ + rb_define_method(rb_mKernel, "protected_methods", rb_obj_protected_methods, -1); + rb_define_method(rb_mKernel, "private_methods", rb_obj_private_methods, -1); + rb_define_method(rb_mKernel, "public_methods", rb_obj_public_methods, -1); + rb_define_method(rb_mKernel, "instance_variables", rb_obj_instance_variables, 0); /* in variable.c */ + rb_define_method(rb_mKernel, "instance_variable_get", rb_obj_ivar_get, 1); + rb_define_method(rb_mKernel, "instance_variable_set", rb_obj_ivar_set, 2); + rb_define_method(rb_mKernel, "instance_variable_defined?", rb_obj_ivar_defined, 1); + rb_define_private_method(rb_mKernel, "remove_instance_variable", + rb_obj_remove_instance_variable, 1); /* in variable.c */ + + rb_define_method(rb_mKernel, "instance_of?", rb_obj_is_instance_of, 1); + rb_define_method(rb_mKernel, "kind_of?", rb_obj_is_kind_of, 1); + rb_define_method(rb_mKernel, "is_a?", rb_obj_is_kind_of, 1); + rb_define_method(rb_mKernel, "tap", rb_obj_tap, 0); + + rb_define_global_function("sprintf", rb_f_sprintf, -1); /* in sprintf.c */ + rb_define_global_function("format", rb_f_sprintf, -1); /* in sprintf.c */ + + rb_define_global_function("Integer", rb_f_integer, 1); + rb_define_global_function("Float", rb_f_float, 1); + + rb_define_global_function("String", rb_f_string, 1); + rb_define_global_function("Array", rb_f_array, 1); + + rb_cNilClass = rb_define_class("NilClass", rb_cObject); + rb_define_method(rb_cNilClass, "to_i", nil_to_i, 0); + rb_define_method(rb_cNilClass, "to_f", nil_to_f, 0); + rb_define_method(rb_cNilClass, "to_s", nil_to_s, 0); + rb_define_method(rb_cNilClass, "to_a", nil_to_a, 0); + rb_define_method(rb_cNilClass, "inspect", nil_inspect, 0); + rb_define_method(rb_cNilClass, "&", false_and, 1); + rb_define_method(rb_cNilClass, "|", false_or, 1); + rb_define_method(rb_cNilClass, "^", false_xor, 1); + + rb_define_method(rb_cNilClass, "nil?", rb_true, 0); + rb_undef_alloc_func(rb_cNilClass); + rb_undef_method(CLASS_OF(rb_cNilClass), "new"); + rb_define_global_const("NIL", Qnil); + + rb_define_method(rb_cModule, "freeze", rb_mod_freeze, 0); + rb_define_method(rb_cModule, "===", rb_mod_eqq, 1); + rb_define_method(rb_cModule, "==", rb_obj_equal, 1); + rb_define_method(rb_cModule, "<=>", rb_mod_cmp, 1); + rb_define_method(rb_cModule, "<", rb_mod_lt, 1); + rb_define_method(rb_cModule, "<=", rb_class_inherited_p, 1); + rb_define_method(rb_cModule, ">", rb_mod_gt, 1); + rb_define_method(rb_cModule, ">=", rb_mod_ge, 1); + rb_define_method(rb_cModule, "initialize_copy", rb_mod_init_copy, 1); /* in class.c */ + rb_define_method(rb_cModule, "to_s", rb_mod_to_s, 0); + rb_define_method(rb_cModule, "included_modules", rb_mod_included_modules, 0); /* in class.c */ + rb_define_method(rb_cModule, "include?", rb_mod_include_p, 1); /* in class.c */ + rb_define_method(rb_cModule, "name", rb_mod_name, 0); /* in variable.c */ + rb_define_method(rb_cModule, "ancestors", rb_mod_ancestors, 0); /* in class.c */ + + rb_define_private_method(rb_cModule, "attr", rb_mod_attr, -1); + rb_define_private_method(rb_cModule, "attr_reader", rb_mod_attr_reader, -1); + rb_define_private_method(rb_cModule, "attr_writer", rb_mod_attr_writer, -1); + rb_define_private_method(rb_cModule, "attr_accessor", rb_mod_attr_accessor, -1); + + rb_define_alloc_func(rb_cModule, rb_module_s_alloc); + rb_define_method(rb_cModule, "initialize", rb_mod_initialize, 0); + rb_define_method(rb_cModule, "instance_methods", rb_class_instance_methods, -1); /* in class.c */ + rb_define_method(rb_cModule, "public_instance_methods", + rb_class_public_instance_methods, -1); /* in class.c */ + rb_define_method(rb_cModule, "protected_instance_methods", + rb_class_protected_instance_methods, -1); /* in class.c */ + rb_define_method(rb_cModule, "private_instance_methods", + rb_class_private_instance_methods, -1); /* in class.c */ + + rb_define_method(rb_cModule, "constants", rb_mod_constants, -1); /* in variable.c */ + rb_define_method(rb_cModule, "const_get", rb_mod_const_get, -1); + rb_define_method(rb_cModule, "const_set", rb_mod_const_set, 2); + rb_define_method(rb_cModule, "const_defined?", rb_mod_const_defined, -1); + rb_define_private_method(rb_cModule, "remove_const", + rb_mod_remove_const, 1); /* in variable.c */ + rb_define_method(rb_cModule, "const_missing", + rb_mod_const_missing, 1); /* in variable.c */ + rb_define_method(rb_cModule, "class_variables", + rb_mod_class_variables, 0); /* in variable.c */ + rb_define_method(rb_cModule, "remove_class_variable", + rb_mod_remove_cvar, 1); /* in variable.c */ + rb_define_method(rb_cModule, "class_variable_get", rb_mod_cvar_get, 1); + rb_define_method(rb_cModule, "class_variable_set", rb_mod_cvar_set, 2); + rb_define_method(rb_cModule, "class_variable_defined?", rb_mod_cvar_defined, 1); + + rb_define_method(rb_cClass, "allocate", rb_obj_alloc, 0); + rb_define_method(rb_cClass, "new", rb_class_new_instance, -1); + rb_define_method(rb_cClass, "initialize", rb_class_initialize, -1); + rb_define_method(rb_cClass, "initialize_copy", rb_class_init_copy, 1); /* in class.c */ + rb_define_method(rb_cClass, "superclass", rb_class_superclass, 0); + rb_define_alloc_func(rb_cClass, rb_class_s_alloc); + rb_undef_method(rb_cClass, "extend_object"); + rb_undef_method(rb_cClass, "append_features"); + + rb_cData = rb_define_class("Data", rb_cObject); + rb_undef_alloc_func(rb_cData); + + rb_cTrueClass = rb_define_class("TrueClass", rb_cObject); + rb_define_method(rb_cTrueClass, "to_s", true_to_s, 0); + rb_define_method(rb_cTrueClass, "&", true_and, 1); + rb_define_method(rb_cTrueClass, "|", true_or, 1); + rb_define_method(rb_cTrueClass, "^", true_xor, 1); + rb_undef_alloc_func(rb_cTrueClass); + rb_undef_method(CLASS_OF(rb_cTrueClass), "new"); + rb_define_global_const("TRUE", Qtrue); + + rb_cFalseClass = rb_define_class("FalseClass", rb_cObject); + rb_define_method(rb_cFalseClass, "to_s", false_to_s, 0); + rb_define_method(rb_cFalseClass, "&", false_and, 1); + rb_define_method(rb_cFalseClass, "|", false_or, 1); + rb_define_method(rb_cFalseClass, "^", false_xor, 1); + rb_undef_alloc_func(rb_cFalseClass); + rb_undef_method(CLASS_OF(rb_cFalseClass), "new"); + rb_define_global_const("FALSE", Qfalse); + + id_eq = rb_intern("=="); + id_eql = rb_intern("eql?"); + id_match = rb_intern("=~"); + id_inspect = rb_intern("inspect"); + id_init_copy = rb_intern("initialize_copy"); + + for (i=0; conv_method_names[i].method; i++) { + conv_method_names[i].id = rb_intern(conv_method_names[i].method); + } +} diff --git a/pack.c b/pack.c new file mode 100644 index 0000000..ab07ae6 --- /dev/null +++ b/pack.c @@ -0,0 +1,2180 @@ +/********************************************************************** + + pack.c - + + $Author: yugui $ + created at: Thu Feb 10 15:17:05 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" +#include +#include +#include + +#define SIZE16 2 +#define SIZE32 4 + +#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 +# define NATINT_PACK +#endif + +#ifdef NATINT_PACK +# define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16))) +# define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32))) +# define NATINT_LEN(type,len) (natint?sizeof(type):(len)) +# ifdef WORDS_BIGENDIAN +# define OFF16(p) OFF16B(p) +# define OFF32(p) OFF32B(p) +# endif +# define NATINT_HTOVS(x) (natint?htovs(x):htov16(x)) +# define NATINT_HTOVL(x) (natint?htovl(x):htov32(x)) +# define NATINT_HTONS(x) (natint?htons(x):hton16(x)) +# define NATINT_HTONL(x) (natint?htonl(x):hton32(x)) +#else +# define NATINT_LEN(type,len) sizeof(type) +# define NATINT_HTOVS(x) htovs(x) +# define NATINT_HTOVL(x) htovl(x) +# define NATINT_HTONS(x) htons(x) +# define NATINT_HTONL(x) htonl(x) +#endif + +#ifndef OFF16 +# define OFF16(p) (char*)(p) +# define OFF32(p) (char*)(p) +#endif +#ifndef OFF16B +# define OFF16B(p) (char*)(p) +# define OFF32B(p) (char*)(p) +#endif + +#define define_swapx(x, xtype) \ +static xtype \ +TOKEN_PASTE(swap,x)(xtype z) \ +{ \ + xtype r; \ + xtype *zp; \ + unsigned char *s, *t; \ + int i; \ + \ + zp = xmalloc(sizeof(xtype)); \ + *zp = z; \ + s = (unsigned char*)zp; \ + t = xmalloc(sizeof(xtype)); \ + for (i=0; i>8)&0xFF)) +#endif +#if SIZEOF_SHORT == 2 +#define swaps(x) swap16(x) +#else +#if SIZEOF_SHORT == 4 +#define swaps(x) ((((x)&0xFF)<<24) \ + |(((x)>>24)&0xFF) \ + |(((x)&0x0000FF00)<<8) \ + |(((x)&0x00FF0000)>>8) ) +#else +define_swapx(s,short) +#endif +#endif + +#ifndef swap32 +#define swap32(x) ((((x)&0xFF)<<24) \ + |(((x)>>24)&0xFF) \ + |(((x)&0x0000FF00)<<8) \ + |(((x)&0x00FF0000)>>8) ) +#endif +#if SIZEOF_LONG == 4 +#define swapl(x) swap32(x) +#else +#if SIZEOF_LONG == 8 +#define swapl(x) ((((x)&0x00000000000000FF)<<56) \ + |(((x)&0xFF00000000000000)>>56) \ + |(((x)&0x000000000000FF00)<<40) \ + |(((x)&0x00FF000000000000)>>40) \ + |(((x)&0x0000000000FF0000)<<24) \ + |(((x)&0x0000FF0000000000)>>24) \ + |(((x)&0x00000000FF000000)<<8) \ + |(((x)&0x000000FF00000000)>>8)) +#else +define_swapx(l,long) +#endif +#endif + +#if SIZEOF_FLOAT == 4 +#if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */ +#define swapf(x) swapl(x) +#define FLOAT_SWAPPER unsigned long +#else +#if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */ +#define swapf(x) swaps(x) +#define FLOAT_SWAPPER unsigned short +#else /* SIZEOF_FLOAT == 4 but undivide by known size of int */ +define_swapx(f,float) +#endif /* #if SIZEOF_SHORT == 4 */ +#endif /* #if SIZEOF_LONG == 4 */ +#else /* SIZEOF_FLOAT != 4 */ +define_swapx(f,float) +#endif /* #if SIZEOF_FLOAT == 4 */ + +#if SIZEOF_DOUBLE == 8 +#if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */ +#define swapd(x) swapl(x) +#define DOUBLE_SWAPPER unsigned long +#else +#if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */ +static double +swapd(const double d) +{ + double dtmp = d; + unsigned long utmp[2]; + unsigned long utmp0; + + utmp[0] = 0; utmp[1] = 0; + memcpy(utmp,&dtmp,sizeof(double)); + utmp0 = utmp[0]; + utmp[0] = swapl(utmp[1]); + utmp[1] = swapl(utmp0); + memcpy(&dtmp,utmp,sizeof(double)); + return dtmp; +} +#else +#if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */ +static double +swapd(const double d) +{ + double dtmp = d; + unsigned short utmp[2]; + unsigned short utmp0; + + utmp[0] = 0; utmp[1] = 0; + memcpy(utmp,&dtmp,sizeof(double)); + utmp0 = utmp[0]; + utmp[0] = swaps(utmp[1]); + utmp[1] = swaps(utmp0); + memcpy(&dtmp,utmp,sizeof(double)); + return dtmp; +} +#else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */ +define_swapx(d, double) +#endif /* #if SIZEOF_SHORT == 4 */ +#endif /* #if SIZEOF_LONG == 4 */ +#endif /* #if SIZEOF_LONG == 8 */ +#else /* SIZEOF_DOUBLE != 8 */ +define_swapx(d, double) +#endif /* #if SIZEOF_DOUBLE == 8 */ + +#undef define_swapx + +#ifdef DYNAMIC_ENDIAN +#ifdef ntohs +#undef ntohs +#undef ntohl +#undef htons +#undef htonl +#endif +static int +endian(void) +{ + static int init = 0; + static int endian_value; + char *p; + + if (init) return endian_value; + init = 1; + p = (char*)&init; + return endian_value = p[0]?0:1; +} + +#define ntohs(x) (endian()?(x):swaps(x)) +#define ntohl(x) (endian()?(x):swapl(x)) +#define ntohf(x) (endian()?(x):swapf(x)) +#define ntohd(x) (endian()?(x):swapd(x)) +#define htons(x) (endian()?(x):swaps(x)) +#define htonl(x) (endian()?(x):swapl(x)) +#define htonf(x) (endian()?(x):swapf(x)) +#define htond(x) (endian()?(x):swapd(x)) +#define htovs(x) (endian()?swaps(x):(x)) +#define htovl(x) (endian()?swapl(x):(x)) +#define htovf(x) (endian()?swapf(x):(x)) +#define htovd(x) (endian()?swapd(x):(x)) +#define vtohs(x) (endian()?swaps(x):(x)) +#define vtohl(x) (endian()?swapl(x):(x)) +#define vtohf(x) (endian()?swapf(x):(x)) +#define vtohd(x) (endian()?swapd(x):(x)) +# ifdef NATINT_PACK +#define htov16(x) (endian()?swap16(x):(x)) +#define htov32(x) (endian()?swap32(x):(x)) +#define hton16(x) (endian()?(x):swap16(x)) +#define hton32(x) (endian()?(x):swap32(x)) +# endif +#else +#ifdef WORDS_BIGENDIAN +#ifndef ntohs +#define ntohs(x) (x) +#define ntohl(x) (x) +#define htons(x) (x) +#define htonl(x) (x) +#endif +#define ntohf(x) (x) +#define ntohd(x) (x) +#define htonf(x) (x) +#define htond(x) (x) +#define htovs(x) swaps(x) +#define htovl(x) swapl(x) +#define htovf(x) swapf(x) +#define htovd(x) swapd(x) +#define vtohs(x) swaps(x) +#define vtohl(x) swapl(x) +#define vtohf(x) swapf(x) +#define vtohd(x) swapd(x) +# ifdef NATINT_PACK +#define htov16(x) swap16(x) +#define htov32(x) swap32(x) +#define hton16(x) (x) +#define hton32(x) (x) +# endif +#else /* LITTLE ENDIAN */ +#ifdef ntohs +#undef ntohs +#undef ntohl +#undef htons +#undef htonl +#endif +#define ntohs(x) swaps(x) +#define ntohl(x) swapl(x) +#define htons(x) swaps(x) +#define htonl(x) swapl(x) +#define ntohf(x) swapf(x) +#define ntohd(x) swapd(x) +#define htonf(x) swapf(x) +#define htond(x) swapd(x) +#define htovs(x) (x) +#define htovl(x) (x) +#define htovf(x) (x) +#define htovd(x) (x) +#define vtohs(x) (x) +#define vtohl(x) (x) +#define vtohf(x) (x) +#define vtohd(x) (x) +# ifdef NATINT_PACK +#define htov16(x) (x) +#define htov32(x) (x) +#define hton16(x) swap16(x) +#define hton32(x) swap32(x) +# endif +#endif +#endif + +#ifdef FLOAT_SWAPPER +#define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; +#define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = htonf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = htovf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = ntohf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = vtohf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#else +#define FLOAT_CONVWITH(y) +#define HTONF(x,y) htonf(x) +#define HTOVF(x,y) htovf(x) +#define NTOHF(x,y) ntohf(x) +#define VTOHF(x,y) vtohf(x) +#endif + +#ifdef DOUBLE_SWAPPER +#define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; +#define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = htond((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = htovd((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = ntohd((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = vtohd((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#else +#define DOUBLE_CONVWITH(y) +#define HTOND(x,y) htond(x) +#define HTOVD(x,y) htovd(x) +#define NTOHD(x,y) ntohd(x) +#define VTOHD(x,y) vtohd(x) +#endif + +unsigned long rb_big2ulong_pack(VALUE x); + +static unsigned long +num2i32(VALUE x) +{ + x = rb_to_int(x); /* is nil OK? (should not) */ + + if (FIXNUM_P(x)) return FIX2LONG(x); + if (TYPE(x) == T_BIGNUM) { + return rb_big2ulong_pack(x); + } + rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x)); + return 0; /* not reached */ +} + +#if SIZEOF_LONG == SIZE32 +# define EXTEND32(x) +#else +/* invariant in modulo 1<<31 */ +# define EXTEND32(x) do { if (!natint) {(x) = (((1L<<31)-1-(x))^~(~0L<<31));}} while(0) +#endif +#if SIZEOF_SHORT == SIZE16 +# define EXTEND16(x) +#else +# define EXTEND16(x) do { if (!natint) {(x) = (short)(((1<<15)-1-(x))^~(~0<<15));}} while(0) +#endif + +#ifdef HAVE_LONG_LONG +# define QUAD_SIZE sizeof(LONG_LONG) +#else +# define QUAD_SIZE 8 +#endif +static const char toofew[] = "too few arguments"; + +static void encodes(VALUE,const char*,long,int,int); +static void qpencode(VALUE,VALUE,long); + +static unsigned long utf8_to_uv(const char*,long*); + +/* + * call-seq: + * arr.pack ( aTemplateString ) -> aBinaryString + * + * Packs the contents of arr into a binary sequence according to + * the directives in aTemplateString (see the table below) + * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, + * which gives the width of the resulting field. The remaining + * directives also may take a count, indicating the number of array + * elements to convert. If the count is an asterisk + * (``*''), all remaining array elements will be + * converted. Any of the directives ``sSiIlL'' may be + * followed by an underscore (``_'') to use the underlying + * platform's native size for the specified type; otherwise, they use a + * platform-independent size. Spaces are ignored in the template + * string. See also String#unpack. + * + * a = [ "a", "b", "c" ] + * n = [ 65, 66, 67 ] + * a.pack("A3A3A3") #=> "a b c " + * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" + * n.pack("ccc") #=> "ABC" + * + * Directives for +pack+. + * + * Directive Meaning + * --------------------------------------------------------------- + * @ | Moves to absolute position + * A | arbitrary binary string (space padded, count is width) + * a | arbitrary binary string (null padded, count is width) + * B | Bit string (descending bit order) + * b | Bit string (ascending bit order) + * C | Unsigned byte (C unsigned char) + * c | Byte (C char) + * D, d | Double-precision float, native format + * E | Double-precision float, little-endian byte order + * e | Single-precision float, little-endian byte order + * F, f | Single-precision float, native format + * G | Double-precision float, network (big-endian) byte order + * g | Single-precision float, network (big-endian) byte order + * H | Hex string (high nibble first) + * h | Hex string (low nibble first) + * I | Unsigned integer + * i | Integer + * L | Unsigned long + * l | Long + * M | Quoted printable, MIME encoding (see RFC2045) + * m | Base64 encoded string (see RFC 2045, count is width) + * | (if count is 0, no line feed are added, see RFC 4648) + * N | Long, network (big-endian) byte order + * n | Short, network (big-endian) byte-order + * P | Pointer to a structure (fixed-length string) + * p | Pointer to a null-terminated string + * Q, q | 64-bit number + * S | Unsigned short + * s | Short + * U | UTF-8 + * u | UU-encoded string + * V | Long, little-endian byte order + * v | Short, little-endian byte order + * w | BER-compressed integer\fnm + * X | Back up a byte + * x | Null byte + * Z | Same as ``a'', except that null is added with * + */ + +static VALUE +pack_pack(VALUE ary, VALUE fmt) +{ + static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; + static const char spc10[] = " "; + const char *p, *pend; + VALUE res, from, associates = 0; + char type; + long items, len, idx, plen; + const char *ptr; + int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ +#ifdef NATINT_PACK + int natint; /* native integer */ +#endif + + StringValue(fmt); + p = RSTRING_PTR(fmt); + pend = p + RSTRING_LEN(fmt); + res = rb_str_buf_new(0); + + items = RARRAY_LEN(ary); + idx = 0; + +#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) +#define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW) +#define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW) + + while (p < pend) { + if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { + rb_raise(rb_eRuntimeError, "format string modified"); + } + type = *p++; /* get data type */ +#ifdef NATINT_PACK + natint = 0; +#endif + + if (ISSPACE(type)) continue; + if (type == '#') { + while ((p < pend) && (*p != '\n')) { + p++; + } + continue; + } + if (*p == '_' || *p == '!') { + static const char natstr[] = "sSiIlL"; + + if (strchr(natstr, type)) { +#ifdef NATINT_PACK + natint = 1; +#endif + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + } + if (*p == '*') { /* set data length */ + len = strchr("@Xxu", type) ? 0 + : strchr("PMm", type) ? 1 + : items; + p++; + } + else if (ISDIGIT(*p)) { + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } + } + else { + len = 1; + } + + switch (type) { + case 'U': + /* if encoding is US-ASCII, upgrade to UTF-8 */ + if (enc_info == 1) enc_info = 2; + break; + case 'm': case 'M': case 'u': + /* keep US-ASCII (do nothing) */ + break; + default: + /* fall back to BINARY */ + enc_info = 0; + break; + } + switch (type) { + case 'A': case 'a': case 'Z': + case 'B': case 'b': + case 'H': case 'h': + from = NEXTFROM; + if (NIL_P(from)) { + ptr = ""; + plen = 0; + } + else { + StringValue(from); + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + OBJ_INFECT(res, from); + } + + if (p[-1] == '*') + len = plen; + + switch (type) { + case 'a': /* arbitrary binary string (null padded) */ + case 'A': /* arbitrary binary string (ASCII space padded) */ + case 'Z': /* null terminated string */ + if (plen >= len) { + rb_str_buf_cat(res, ptr, len); + if (p[-1] == '*' && type == 'Z') + rb_str_buf_cat(res, nul10, 1); + } + else { + rb_str_buf_cat(res, ptr, plen); + len -= plen; + while (len >= 10) { + rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); + len -= 10; + } + rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); + } + break; + + case 'b': /* bit string (ascending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (*ptr & 1) + byte |= 128; + if (i & 7) + byte >>= 1; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte >>= 7 - (len & 7); + c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'B': /* bit string (descending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + byte |= *ptr & 1; + if (i & 7) + byte <<= 1; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte <<= 7 - (len & 7); + c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'h': /* hex string (low nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len + 1) / 2 - (plen + 1) / 2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= (((*ptr & 15) + 9) & 15) << 4; + else + byte |= (*ptr & 15) << 4; + if (i & 1) + byte >>= 4; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'H': /* hex string (high nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len + 1) / 2 - (plen + 1) / 2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= ((*ptr & 15) + 9) & 15; + else + byte |= *ptr & 15; + if (i & 1) + byte <<= 4; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + } + break; + + case 'c': /* signed char */ + case 'C': /* unsigned char */ + while (len-- > 0) { + char c; + + from = NEXTFROM; + c = num2i32(from); + rb_str_buf_cat(res, &c, sizeof(char)); + } + break; + + case 's': /* signed short */ + case 'S': /* unsigned short */ + while (len-- > 0) { + short s; + + from = NEXTFROM; + s = num2i32(from); + rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); + } + break; + + case 'i': /* signed int */ + case 'I': /* unsigned int */ + while (len-- > 0) { + int i; + + from = NEXTFROM; + i = num2i32(from); + rb_str_buf_cat(res, (char*)&i, sizeof(int)); + } + break; + + case 'l': /* signed long */ + case 'L': /* unsigned long */ + while (len-- > 0) { + long l; + + from = NEXTFROM; + l = num2i32(from); + rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); + } + break; + + case 'q': /* signed quad (64bit) int */ + case 'Q': /* unsigned quad (64bit) int */ + while (len-- > 0) { + char tmp[QUAD_SIZE]; + + from = NEXTFROM; + rb_quad_pack(tmp, from); + rb_str_buf_cat(res, (char*)&tmp, QUAD_SIZE); + } + break; + + case 'n': /* unsigned short (network byte-order) */ + while (len-- > 0) { + unsigned short s; + + from = NEXTFROM; + s = num2i32(from); + s = NATINT_HTONS(s); + rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); + } + break; + + case 'N': /* unsigned long (network byte-order) */ + while (len-- > 0) { + unsigned long l; + + from = NEXTFROM; + l = num2i32(from); + l = NATINT_HTONL(l); + rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); + } + break; + + case 'v': /* unsigned short (VAX byte-order) */ + while (len-- > 0) { + unsigned short s; + + from = NEXTFROM; + s = num2i32(from); + s = NATINT_HTOVS(s); + rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); + } + break; + + case 'V': /* unsigned long (VAX byte-order) */ + while (len-- > 0) { + unsigned long l; + + from = NEXTFROM; + l = num2i32(from); + l = NATINT_HTOVL(l); + rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); + } + break; + + case 'f': /* single precision float in native format */ + case 'F': /* ditto */ + while (len-- > 0) { + float f; + + from = NEXTFROM; + f = RFLOAT_VALUE(rb_to_float(from)); + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'e': /* single precision float in VAX byte-order */ + while (len-- > 0) { + float f; + FLOAT_CONVWITH(ftmp); + + from = NEXTFROM; + f = RFLOAT_VALUE(rb_Float(from)); + f = HTOVF(f,ftmp); + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'E': /* double precision float in VAX byte-order */ + while (len-- > 0) { + double d; + DOUBLE_CONVWITH(dtmp); + + from = NEXTFROM; + d = RFLOAT_VALUE(rb_Float(from)); + d = HTOVD(d,dtmp); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'd': /* double precision float in native format */ + case 'D': /* ditto */ + while (len-- > 0) { + double d; + + from = NEXTFROM; + d = RFLOAT_VALUE(rb_Float(from)); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'g': /* single precision float in network byte-order */ + while (len-- > 0) { + float f; + FLOAT_CONVWITH(ftmp); + + from = NEXTFROM; + f = RFLOAT_VALUE(rb_Float(from)); + f = HTONF(f,ftmp); + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'G': /* double precision float in network byte-order */ + while (len-- > 0) { + double d; + DOUBLE_CONVWITH(dtmp); + + from = NEXTFROM; + d = RFLOAT_VALUE(rb_Float(from)); + d = HTOND(d,dtmp); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'x': /* null byte */ + grow: + while (len >= 10) { + rb_str_buf_cat(res, nul10, 10); + len -= 10; + } + rb_str_buf_cat(res, nul10, len); + break; + + case 'X': /* back up byte */ + shrink: + plen = RSTRING_LEN(res); + if (plen < len) + rb_raise(rb_eArgError, "X outside of string"); + rb_str_set_len(res, plen - len); + break; + + case '@': /* null fill to absolute position */ + len -= RSTRING_LEN(res); + if (len > 0) goto grow; + len = -len; + if (len > 0) goto shrink; + break; + + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'U': /* Unicode character */ + while (len-- > 0) { + SIGNED_VALUE l; + char buf[8]; + int le; + + from = NEXTFROM; + from = rb_to_int(from); + l = NUM2LONG(from); + if (l < 0) { + rb_raise(rb_eRangeError, "pack(U): value out of range"); + } + le = rb_uv_to_utf8(buf, l); + rb_str_buf_cat(res, (char*)buf, le); + } + break; + + case 'u': /* uuencoded string */ + case 'm': /* base64 encoded string */ + from = NEXTFROM; + StringValue(from); + ptr = RSTRING_PTR(from); + plen = RSTRING_LEN(from); + + if (len == 0 && type == 'm') { + encodes(res, ptr, plen, type, 0); + ptr += plen; + break; + } + if (len <= 2) + len = 45; + else + len = len / 3 * 3; + while (plen > 0) { + long todo; + + if (plen > len) + todo = len; + else + todo = plen; + encodes(res, ptr, todo, type, 1); + plen -= todo; + ptr += todo; + } + break; + + case 'M': /* quoted-printable encoded string */ + from = rb_obj_as_string(NEXTFROM); + if (len <= 1) + len = 72; + qpencode(res, from, len); + break; + + case 'P': /* pointer to packed byte string */ + from = THISFROM; + if (!NIL_P(from)) { + StringValue(from); + if (RSTRING_LEN(from) < len) { + rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", + RSTRING_LEN(from), len); + } + } + len = 1; + /* FALL THROUGH */ + case 'p': /* pointer to string */ + while (len-- > 0) { + char *t; + from = NEXTFROM; + if (NIL_P(from)) { + t = 0; + } + else { + t = StringValuePtr(from); + } + if (!associates) { + associates = rb_ary_new(); + } + rb_ary_push(associates, from); + rb_obj_taint(from); + rb_str_buf_cat(res, (char*)&t, sizeof(char*)); + } + break; + + case 'w': /* BER compressed integer */ + while (len-- > 0) { + unsigned long ul; + VALUE buf = rb_str_new(0, 0); + char c, *bufs, *bufe; + + from = NEXTFROM; + if (TYPE(from) == T_BIGNUM) { + VALUE big128 = rb_uint2big(128); + while (TYPE(from) == T_BIGNUM) { + from = rb_big_divmod(from, big128); + c = NUM2INT(RARRAY_PTR(from)[1]) | 0x80; /* mod */ + rb_str_buf_cat(buf, &c, sizeof(char)); + from = RARRAY_PTR(from)[0]; /* div */ + } + } + + { + long l = NUM2LONG(from); + if (l < 0) { + rb_raise(rb_eArgError, "can't compress negative numbers"); + } + ul = l; + } + + while (ul) { + c = ((ul & 0x7f) | 0x80); + rb_str_buf_cat(buf, &c, sizeof(char)); + ul >>= 7; + } + + if (RSTRING_LEN(buf)) { + bufs = RSTRING_PTR(buf); + bufe = bufs + RSTRING_LEN(buf) - 1; + *bufs &= 0x7f; /* clear continue bit */ + while (bufs < bufe) { /* reverse */ + c = *bufs; + *bufs++ = *bufe; + *bufe-- = c; + } + rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); + } + else { + c = 0; + rb_str_buf_cat(res, &c, sizeof(char)); + } + } + break; + + default: + break; + } + } + + if (associates) { + rb_str_associate(res, associates); + } + OBJ_INFECT(res, fmt); + switch (enc_info) { + case 1: + ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + break; + case 2: + rb_enc_set_index(res, rb_utf8_encindex()); + break; + default: + /* do nothing, keep ASCII-8BIT */ + break; + } + return res; +} + +static const char uu_table[] = +"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; +static const char b64_table[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static void +encodes(VALUE str, const char *s, long len, int type, int tail_lf) +{ + char buff[4096]; + long i = 0; + const char *trans = type == 'u' ? uu_table : b64_table; + int padding; + + if (type == 'u') { + buff[i++] = len + ' '; + padding = '`'; + } + else { + padding = '='; + } + while (len >= 3) { + while (len >= 3 && sizeof(buff)-i >= 4) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; + buff[i++] = trans[077 & s[2]]; + s += 3; + len -= 3; + } + if (sizeof(buff)-i < 4) { + rb_str_buf_cat(str, buff, i); + i = 0; + } + } + + if (len == 2) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; + buff[i++] = padding; + } + else if (len == 1) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; + buff[i++] = padding; + buff[i++] = padding; + } + if (tail_lf) buff[i++] = '\n'; + rb_str_buf_cat(str, buff, i); +} + +static const char hex_table[] = "0123456789ABCDEF"; + +static void +qpencode(VALUE str, VALUE from, long len) +{ + char buff[1024]; + long i = 0, n = 0, prev = EOF; + unsigned char *s = (unsigned char*)RSTRING_PTR(from); + unsigned char *send = s + RSTRING_LEN(from); + + while (s < send) { + if ((*s > 126) || + (*s < 32 && *s != '\n' && *s != '\t') || + (*s == '=')) { + buff[i++] = '='; + buff[i++] = hex_table[*s >> 4]; + buff[i++] = hex_table[*s & 0x0f]; + n += 3; + prev = EOF; + } + else if (*s == '\n') { + if (prev == ' ' || prev == '\t') { + buff[i++] = '='; + buff[i++] = *s; + } + buff[i++] = *s; + n = 0; + prev = *s; + } + else { + buff[i++] = *s; + n++; + prev = *s; + } + if (n > len) { + buff[i++] = '='; + buff[i++] = '\n'; + n = 0; + prev = '\n'; + } + if (i > 1024 - 5) { + rb_str_buf_cat(str, buff, i); + i = 0; + } + s++; + } + if (n > 0) { + buff[i++] = '='; + buff[i++] = '\n'; + } + if (i > 0) { + rb_str_buf_cat(str, buff, i); + } +} + +static inline int +hex2num(char c) +{ + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return c - '0'; + case 'a': case 'b': case 'c': + case 'd': case 'e': case 'f': + return c - 'a' + 10; + case 'A': case 'B': case 'C': + case 'D': case 'E': case 'F': + return c - 'A' + 10; + default: + return -1; + } +} + +#define PACK_LENGTH_ADJUST_SIZE(sz) do { \ + tmp = 0; \ + if (len > (send-s)/sz) { \ + if (!star) { \ + tmp = len-(send-s)/sz; \ + } \ + len = (send-s)/sz; \ + } \ +} while (0) + +#ifdef NATINT_PACK +#define PACK_LENGTH_ADJUST(type,sz) do { \ + int t__len = NATINT_LEN(type,(sz)); \ + PACK_LENGTH_ADJUST_SIZE(t__len); \ +} while (0) +#else +#define PACK_LENGTH_ADJUST(type,sz) \ + PACK_LENGTH_ADJUST_SIZE(sizeof(type)) +#endif + +#define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil) + +static VALUE +infected_str_new(const char *ptr, long len, VALUE str) +{ + VALUE s = rb_str_new(ptr, len); + + OBJ_INFECT(s, str); + return s; +} + +/* + * call-seq: + * str.unpack(format) => anArray + * + * Decodes str (which may contain binary data) according to the + * format string, returning an array of each value extracted. The + * format string consists of a sequence of single-character directives, + * summarized in the table at the end of this entry. + * Each directive may be followed + * by a number, indicating the number of times to repeat with this + * directive. An asterisk (``*'') will use up all + * remaining elements. The directives sSiIlL may each be + * followed by an underscore (``_'') to use the underlying + * platform's native size for the specified type; otherwise, it uses a + * platform-independent consistent size. Spaces are ignored in the + * format string. See also Array#pack. + * + * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] + * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] + * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] + * "aa".unpack('b8B8') #=> ["10000110", "01100001"] + * "aaa".unpack('h2H2c') #=> ["16", "61", 97] + * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] + * "now=20is".unpack('M*') #=> ["now is"] + * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] + * + * This table summarizes the various formats and the Ruby classes + * returned by each. + * + * Format | Returns | Function + * -------+---------+----------------------------------------- + * A | String | arbitrary binary string with trailing + * | | nulls and ASCII spaces removed + * -------+---------+----------------------------------------- + * a | String | arbitrary binary string + * -------+---------+----------------------------------------- + * B | String | extract bits from each character (msb first) + * -------+---------+----------------------------------------- + * b | String | extract bits from each character (lsb first) + * -------+---------+----------------------------------------- + * C | Fixnum | extract a byte (C char) as an unsigned integer + * -------+---------+----------------------------------------- + * c | Fixnum | extract a byte (C char) as an integer + * -------+---------+----------------------------------------- + * d,D | Float | treat sizeof(double) characters as + * | | a native double + * -------+---------+----------------------------------------- + * E | Float | treat sizeof(double) characters as + * | | a double in little-endian byte order + * -------+---------+----------------------------------------- + * e | Float | treat sizeof(float) characters as + * | | a float in little-endian byte order + * -------+---------+----------------------------------------- + * f,F | Float | treat sizeof(float) characters as + * | | a native float + * -------+---------+----------------------------------------- + * G | Float | treat sizeof(double) characters as + * | | a double in network byte order + * -------+---------+----------------------------------------- + * g | Float | treat sizeof(float) characters as a + * | | float in network byte order + * -------+---------+----------------------------------------- + * H | String | extract hex nibbles from each character + * | | (most significant first) + * -------+---------+----------------------------------------- + * h | String | extract hex nibbles from each character + * | | (least significant first) + * -------+---------+----------------------------------------- + * I | Integer | treat sizeof(int) (modified by _) + * | | successive characters as an unsigned + * | | native integer + * -------+---------+----------------------------------------- + * i | Integer | treat sizeof(int) (modified by _) + * | | successive characters as a signed + * | | native integer + * -------+---------+----------------------------------------- + * L | Integer | treat four (modified by _) successive + * | | characters as an unsigned native + * | | long integer + * -------+---------+----------------------------------------- + * l | Integer | treat four (modified by _) successive + * | | characters as a signed native + * | | long integer + * -------+---------+----------------------------------------- + * M | String | quoted-printable + * -------+---------+----------------------------------------- + * m | String | base64-encoded (RFC 2045) (default) + * | | base64-encoded (RFC 4648) if followed by 0 + * -------+---------+----------------------------------------- + * N | Integer | treat four characters as an unsigned + * | | long in network byte order + * -------+---------+----------------------------------------- + * n | Fixnum | treat two characters as an unsigned + * | | short in network byte order + * -------+---------+----------------------------------------- + * P | String | treat sizeof(char *) characters as a + * | | pointer, and return \emph{len} characters + * | | from the referenced location + * -------+---------+----------------------------------------- + * p | String | treat sizeof(char *) characters as a + * | | pointer to a null-terminated string + * -------+---------+----------------------------------------- + * Q | Integer | treat 8 characters as an unsigned + * | | quad word (64 bits) + * -------+---------+----------------------------------------- + * q | Integer | treat 8 characters as a signed + * | | quad word (64 bits) + * -------+---------+----------------------------------------- + * S | Fixnum | treat two (different if _ used) + * | | successive characters as an unsigned + * | | short in native byte order + * -------+---------+----------------------------------------- + * s | Fixnum | Treat two (different if _ used) + * | | successive characters as a signed short + * | | in native byte order + * -------+---------+----------------------------------------- + * U | Integer | UTF-8 characters as unsigned integers + * -------+---------+----------------------------------------- + * u | String | UU-encoded + * -------+---------+----------------------------------------- + * V | Fixnum | treat four characters as an unsigned + * | | long in little-endian byte order + * -------+---------+----------------------------------------- + * v | Fixnum | treat two characters as an unsigned + * | | short in little-endian byte order + * -------+---------+----------------------------------------- + * w | Integer | BER-compressed integer (see Array.pack) + * -------+---------+----------------------------------------- + * X | --- | skip backward one character + * -------+---------+----------------------------------------- + * x | --- | skip forward one character + * -------+---------+----------------------------------------- + * Z | String | with trailing nulls removed + * | | upto first null with * + * -------+---------+----------------------------------------- + * @ | --- | skip to the offset given by the + * | | length argument + * -------+---------+----------------------------------------- + */ + +static VALUE +pack_unpack(VALUE str, VALUE fmt) +{ + static const char hexdigits[] = "0123456789abcdef"; + char *s, *send; + char *p, *pend; + VALUE ary; + char type; + long len; + int tmp, star; +#ifdef NATINT_PACK + int natint; /* native integer */ +#endif + int block_p = rb_block_given_p(); +#define UNPACK_PUSH(item) do {\ + VALUE item_val = (item);\ + if (block_p) {\ + rb_yield(item_val);\ + }\ + else {\ + rb_ary_push(ary, item_val);\ + }\ + } while (0) + + StringValue(str); + StringValue(fmt); + s = RSTRING_PTR(str); + send = s + RSTRING_LEN(str); + p = RSTRING_PTR(fmt); + pend = p + RSTRING_LEN(fmt); + + ary = block_p ? Qnil : rb_ary_new(); + while (p < pend) { + type = *p++; +#ifdef NATINT_PACK + natint = 0; +#endif + + if (ISSPACE(type)) continue; + if (type == '#') { + while ((p < pend) && (*p != '\n')) { + p++; + } + continue; + } + star = 0; + if (*p == '_' || *p == '!') { + static const char natstr[] = "sSiIlL"; + + if (strchr(natstr, type)) { +#ifdef NATINT_PACK + natint = 1; +#endif + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + } + if (p >= pend) + len = 1; + else if (*p == '*') { + star = 1; + len = send - s; + p++; + } + else if (ISDIGIT(*p)) { + errno = 0; + len = STRTOUL(p, (char**)&p, 10); + if (errno) { + rb_raise(rb_eRangeError, "pack length too big"); + } + } + else { + len = (type != '@'); + } + + switch (type) { + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'A': + if (len > send - s) len = send - s; + { + long end = len; + char *t = s + len - 1; + + while (t >= s) { + if (*t != ' ' && *t != '\0') break; + t--; len--; + } + UNPACK_PUSH(infected_str_new(s, len, str)); + s += end; + } + break; + + case 'Z': + { + char *t = s; + + if (len > send-s) len = send-s; + while (t < s+len && *t) t++; + UNPACK_PUSH(infected_str_new(s, t-s, str)); + if (t < send) t++; + s = star ? t : s+len; + } + break; + + case 'a': + if (len > send - s) len = send - s; + UNPACK_PUSH(infected_str_new(s, len, str)); + s += len; + break; + + case 'b': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + UNPACK_PUSH(bitstr = rb_str_new(0, len)); + t = RSTRING_PTR(bitstr); + for (i=0; i>= 1; + else bits = *s++; + *t++ = (bits & 1) ? '1' : '0'; + } + } + break; + + case 'B': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + UNPACK_PUSH(bitstr = rb_str_new(0, len)); + t = RSTRING_PTR(bitstr); + for (i=0; i (send - s) * 2) + len = (send - s) * 2; + bits = 0; + UNPACK_PUSH(bitstr = rb_str_new(0, len)); + t = RSTRING_PTR(bitstr); + for (i=0; i>= 4; + else + bits = *s++; + *t++ = hexdigits[bits & 15]; + } + } + break; + + case 'H': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + UNPACK_PUSH(bitstr = rb_str_new(0, len)); + t = RSTRING_PTR(bitstr); + for (i=0; i> 4) & 15]; + } + } + break; + + case 'c': + PACK_LENGTH_ADJUST(char,sizeof(char)); + while (len-- > 0) { + int c = *s++; + if (c > (char)127) c-=256; + UNPACK_PUSH(INT2FIX(c)); + } + PACK_ITEM_ADJUST(); + break; + + case 'C': + PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char)); + while (len-- > 0) { + unsigned char c = *s++; + UNPACK_PUSH(INT2FIX(c)); + } + PACK_ITEM_ADJUST(); + break; + + case 's': + PACK_LENGTH_ADJUST(short,2); + while (len-- > 0) { + short tmp = 0; + memcpy(OFF16(&tmp), s, NATINT_LEN(short,2)); + EXTEND16(tmp); + s += NATINT_LEN(short,2); + UNPACK_PUSH(INT2FIX(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'S': + PACK_LENGTH_ADJUST(unsigned short,2); + while (len-- > 0) { + unsigned short tmp = 0; + memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); + s += NATINT_LEN(unsigned short,2); + UNPACK_PUSH(INT2FIX(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'i': + PACK_LENGTH_ADJUST(int,sizeof(int)); + while (len-- > 0) { + int tmp; + memcpy(&tmp, s, sizeof(int)); + s += sizeof(int); + UNPACK_PUSH(INT2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'I': + PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int)); + while (len-- > 0) { + unsigned int tmp; + memcpy(&tmp, s, sizeof(unsigned int)); + s += sizeof(unsigned int); + UNPACK_PUSH(UINT2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'l': + PACK_LENGTH_ADJUST(long,4); + while (len-- > 0) { + long tmp = 0; + memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); + EXTEND32(tmp); + s += NATINT_LEN(long,4); + UNPACK_PUSH(LONG2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + case 'L': + PACK_LENGTH_ADJUST(unsigned long,4); + while (len-- > 0) { + unsigned long tmp = 0; + memcpy(OFF32(&tmp), s, NATINT_LEN(unsigned long,4)); + s += NATINT_LEN(unsigned long,4); + UNPACK_PUSH(ULONG2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'q': + PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE); + while (len-- > 0) { + char *tmp = (char*)s; + s += QUAD_SIZE; + UNPACK_PUSH(rb_quad_unpack(tmp, 1)); + } + PACK_ITEM_ADJUST(); + break; + case 'Q': + PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE); + while (len-- > 0) { + char *tmp = (char*)s; + s += QUAD_SIZE; + UNPACK_PUSH(rb_quad_unpack(tmp, 0)); + } + break; + + case 'n': + PACK_LENGTH_ADJUST(unsigned short,2); + while (len-- > 0) { + unsigned short tmp = 0; + memcpy(OFF16B(&tmp), s, NATINT_LEN(unsigned short,2)); + s += NATINT_LEN(unsigned short,2); + UNPACK_PUSH(UINT2NUM(ntohs(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'N': + PACK_LENGTH_ADJUST(unsigned long,4); + while (len-- > 0) { + unsigned long tmp = 0; + memcpy(OFF32B(&tmp), s, NATINT_LEN(unsigned long,4)); + s += NATINT_LEN(unsigned long,4); + UNPACK_PUSH(ULONG2NUM(ntohl(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'v': + PACK_LENGTH_ADJUST(unsigned short,2); + while (len-- > 0) { + unsigned short tmp = 0; + memcpy(&tmp, s, NATINT_LEN(unsigned short,2)); + s += NATINT_LEN(unsigned short,2); + UNPACK_PUSH(UINT2NUM(vtohs(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'V': + PACK_LENGTH_ADJUST(unsigned long,4); + while (len-- > 0) { + unsigned long tmp = 0; + memcpy(&tmp, s, NATINT_LEN(long,4)); + s += NATINT_LEN(long,4); + UNPACK_PUSH(ULONG2NUM(vtohl(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'f': + case 'F': + PACK_LENGTH_ADJUST(float,sizeof(float)); + while (len-- > 0) { + float tmp; + memcpy(&tmp, s, sizeof(float)); + s += sizeof(float); + UNPACK_PUSH(DBL2NUM((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'e': + PACK_LENGTH_ADJUST(float,sizeof(float)); + while (len-- > 0) { + float tmp; + FLOAT_CONVWITH(ftmp); + + memcpy(&tmp, s, sizeof(float)); + s += sizeof(float); + tmp = VTOHF(tmp,ftmp); + UNPACK_PUSH(DBL2NUM((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'E': + PACK_LENGTH_ADJUST(double,sizeof(double)); + while (len-- > 0) { + double tmp; + DOUBLE_CONVWITH(dtmp); + + memcpy(&tmp, s, sizeof(double)); + s += sizeof(double); + tmp = VTOHD(tmp,dtmp); + UNPACK_PUSH(DBL2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'D': + case 'd': + PACK_LENGTH_ADJUST(double,sizeof(double)); + while (len-- > 0) { + double tmp; + memcpy(&tmp, s, sizeof(double)); + s += sizeof(double); + UNPACK_PUSH(DBL2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'g': + PACK_LENGTH_ADJUST(float,sizeof(float)); + while (len-- > 0) { + float tmp; + FLOAT_CONVWITH(ftmp;) + + memcpy(&tmp, s, sizeof(float)); + s += sizeof(float); + tmp = NTOHF(tmp,ftmp); + UNPACK_PUSH(DBL2NUM((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'G': + PACK_LENGTH_ADJUST(double,sizeof(double)); + while (len-- > 0) { + double tmp; + DOUBLE_CONVWITH(dtmp); + + memcpy(&tmp, s, sizeof(double)); + s += sizeof(double); + tmp = NTOHD(tmp,dtmp); + UNPACK_PUSH(DBL2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'U': + if (len > send - s) len = send - s; + while (len > 0 && s < send) { + long alen = send - s; + unsigned long l; + + l = utf8_to_uv(s, &alen); + s += alen; len--; + UNPACK_PUSH(ULONG2NUM(l)); + } + break; + + case 'u': + { + VALUE buf = infected_str_new(0, (send - s)*3/4, str); + char *ptr = RSTRING_PTR(buf); + long total = 0; + + while (s < send && *s > ' ' && *s < 'a') { + long a,b,c,d; + char hunk[4]; + + hunk[3] = '\0'; + len = (*s++ - ' ') & 077; + total += len; + if (total > RSTRING_LEN(buf)) { + len -= total - RSTRING_LEN(buf); + total = RSTRING_LEN(buf); + } + + while (len > 0) { + long mlen = len > 3 ? 3 : len; + + if (s < send && *s >= ' ') + a = (*s++ - ' ') & 077; + else + a = 0; + if (s < send && *s >= ' ') + b = (*s++ - ' ') & 077; + else + b = 0; + if (s < send && *s >= ' ') + c = (*s++ - ' ') & 077; + else + c = 0; + if (s < send && *s >= ' ') + d = (*s++ - ' ') & 077; + else + d = 0; + hunk[0] = a << 2 | b >> 4; + hunk[1] = b << 4 | c >> 2; + hunk[2] = c << 6 | d; + memcpy(ptr, hunk, mlen); + ptr += mlen; + len -= mlen; + } + if (*s == '\r') s++; + if (*s == '\n') s++; + else if (s < send && (s+1 == send || s[1] == '\n')) + s += 2; /* possible checksum byte */ + } + + rb_str_set_len(buf, total); + UNPACK_PUSH(buf); + } + break; + + case 'm': + { + VALUE buf = infected_str_new(0, (send - s)*3/4, str); + char *ptr = RSTRING_PTR(buf); + int a = -1,b = -1,c = 0,d = 0; + static signed char b64_xtable[256]; + + if (b64_xtable['/'] <= 0) { + int i; + + for (i = 0; i < 256; i++) { + b64_xtable[i] = -1; + } + for (i = 0; i < 64; i++) { + b64_xtable[(unsigned char)b64_table[i]] = i; + } + } + if (len == 0) { + while (s < send) { + a = b = c = d = -1; + a = b64_xtable[(unsigned char)*s++]; + if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); + b = b64_xtable[(unsigned char)*s++]; + if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); + if (*s == '=') { + if (s + 2 == send && *(s + 1) == '=') break; + rb_raise(rb_eArgError, "invalid base64"); + } + c = b64_xtable[(unsigned char)*s++]; + if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); + if (s + 1 == send && *s == '=') break; + d = b64_xtable[(unsigned char)*s++]; + if (d == -1) rb_raise(rb_eArgError, "invalid base64"); + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + *ptr++ = c << 6 | d; + } + if (c == -1) { + *ptr++ = a << 2 | b >> 4; + if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); + } + else if (d == -1) { + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); + } + } + else { + while (s < send) { + a = b = c = d = -1; + while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} + if (s >= send) break; + s++; + while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} + if (*s == '=' || s >= send) break; + s++; + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + *ptr++ = c << 6 | d; + } + if (a != -1 && b != -1) { + if (c == -1 && *s == '=') + *ptr++ = a << 2 | b >> 4; + else if (c != -1 && *s == '=') { + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + } + } + } + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + UNPACK_PUSH(buf); + } + break; + + case 'M': + { + VALUE buf = infected_str_new(0, send - s, str); + char *ptr = RSTRING_PTR(buf); + int c1, c2; + + while (s < send) { + if (*s == '=') { + if (++s == send) break; + if (s+1 < send && *s == '\r' && *(s+1) == '\n') + s++; + if (*s != '\n') { + if ((c1 = hex2num(*s)) == -1) break; + if (++s == send) break; + if ((c2 = hex2num(*s)) == -1) break; + *ptr++ = c1 << 4 | c2; + } + } + else { + *ptr++ = *s; + } + s++; + } + rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); + ENCODING_CODERANGE_SET(buf, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + UNPACK_PUSH(buf); + } + break; + + case '@': + if (len > RSTRING_LEN(str)) + rb_raise(rb_eArgError, "@ outside of string"); + s = RSTRING_PTR(str) + len; + break; + + case 'X': + if (len > s - RSTRING_PTR(str)) + rb_raise(rb_eArgError, "X outside of string"); + s -= len; + break; + + case 'x': + if (len > send - s) + rb_raise(rb_eArgError, "x outside of string"); + s += len; + break; + + case 'P': + if (sizeof(char *) <= send - s) { + VALUE tmp = Qnil; + char *t; + + memcpy(&t, s, sizeof(char *)); + s += sizeof(char *); + + if (t) { + VALUE a, *p, *pend; + + if (!(a = rb_str_associated(str))) { + rb_raise(rb_eArgError, "no associated pointer"); + } + p = RARRAY_PTR(a); + pend = p + RARRAY_LEN(a); + while (p < pend) { + if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) { + if (len < RSTRING_LEN(*p)) { + tmp = rb_tainted_str_new(t, len); + rb_str_associate(tmp, a); + } + else { + tmp = *p; + } + break; + } + p++; + } + if (p == pend) { + rb_raise(rb_eArgError, "non associated pointer"); + } + } + UNPACK_PUSH(tmp); + } + break; + + case 'p': + if (len > (send - s) / sizeof(char *)) + len = (send - s) / sizeof(char *); + while (len-- > 0) { + if (send - s < sizeof(char *)) + break; + else { + VALUE tmp = Qnil; + char *t; + + memcpy(&t, s, sizeof(char *)); + s += sizeof(char *); + + if (t) { + VALUE a, *p, *pend; + + if (!(a = rb_str_associated(str))) { + rb_raise(rb_eArgError, "no associated pointer"); + } + p = RARRAY_PTR(a); + pend = p + RARRAY_LEN(a); + while (p < pend) { + if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) { + tmp = *p; + break; + } + p++; + } + if (p == pend) { + rb_raise(rb_eArgError, "non associated pointer"); + } + } + UNPACK_PUSH(tmp); + } + } + break; + + case 'w': + { + unsigned long ul = 0; + unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8); + + while (len > 0 && s < send) { + ul <<= 7; + ul |= (*s & 0x7f); + if (!(*s++ & 0x80)) { + UNPACK_PUSH(ULONG2NUM(ul)); + len--; + ul = 0; + } + else if (ul & ulmask) { + VALUE big = rb_uint2big(ul); + VALUE big128 = rb_uint2big(128); + while (s < send) { + big = rb_big_mul(big, big128); + big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); + if (!(*s++ & 0x80)) { + UNPACK_PUSH(big); + len--; + ul = 0; + break; + } + } + } + } + } + break; + + default: + break; + } + } + + return ary; +} + +#define BYTEWIDTH 8 + +int +rb_uv_to_utf8(char buf[6], unsigned long uv) +{ + if (uv <= 0x7f) { + buf[0] = (char)uv; + return 1; + } + if (uv <= 0x7ff) { + buf[0] = ((uv>>6)&0xff)|0xc0; + buf[1] = (uv&0x3f)|0x80; + return 2; + } + if (uv <= 0xffff) { + buf[0] = ((uv>>12)&0xff)|0xe0; + buf[1] = ((uv>>6)&0x3f)|0x80; + buf[2] = (uv&0x3f)|0x80; + return 3; + } + if (uv <= 0x1fffff) { + buf[0] = ((uv>>18)&0xff)|0xf0; + buf[1] = ((uv>>12)&0x3f)|0x80; + buf[2] = ((uv>>6)&0x3f)|0x80; + buf[3] = (uv&0x3f)|0x80; + return 4; + } + if (uv <= 0x3ffffff) { + buf[0] = ((uv>>24)&0xff)|0xf8; + buf[1] = ((uv>>18)&0x3f)|0x80; + buf[2] = ((uv>>12)&0x3f)|0x80; + buf[3] = ((uv>>6)&0x3f)|0x80; + buf[4] = (uv&0x3f)|0x80; + return 5; + } + if (uv <= 0x7fffffff) { + buf[0] = ((uv>>30)&0xff)|0xfc; + buf[1] = ((uv>>24)&0x3f)|0x80; + buf[2] = ((uv>>18)&0x3f)|0x80; + buf[3] = ((uv>>12)&0x3f)|0x80; + buf[4] = ((uv>>6)&0x3f)|0x80; + buf[5] = (uv&0x3f)|0x80; + return 6; + } + rb_raise(rb_eRangeError, "pack(U): value out of range"); +} + +static const unsigned long utf8_limits[] = { + 0x0, /* 1 */ + 0x80, /* 2 */ + 0x800, /* 3 */ + 0x10000, /* 4 */ + 0x200000, /* 5 */ + 0x4000000, /* 6 */ + 0x80000000, /* 7 */ +}; + +static unsigned long +utf8_to_uv(const char *p, long *lenp) +{ + int c = *p++ & 0xff; + unsigned long uv = c; + long n; + + if (!(uv & 0x80)) { + *lenp = 1; + return uv; + } + if (!(uv & 0x40)) { + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + + if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } + else { + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + if (n > *lenp) { + rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", + n, *lenp); + } + *lenp = n--; + if (n != 0) { + while (n--) { + c = *p++ & 0xff; + if ((c & 0xc0) != 0x80) { + *lenp -= n + 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + else { + c &= 0x3f; + uv = uv << 6 | c; + } + } + } + n = *lenp - 1; + if (uv < utf8_limits[n]) { + rb_raise(rb_eArgError, "redundant UTF-8 sequence"); + } + return uv; +} + +void +Init_pack(void) +{ + rb_define_method(rb_cArray, "pack", pack_pack, 1); + rb_define_method(rb_cString, "unpack", pack_unpack, 1); +} diff --git a/parse.y b/parse.y new file mode 100644 index 0000000..abd7017 --- /dev/null +++ b/parse.y @@ -0,0 +1,10491 @@ +/********************************************************************** + + parse.y - + + $Author: yugui $ + created at: Fri May 28 18:02:42 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +%{ + +#define YYDEBUG 1 +#define YYERROR_VERBOSE 1 +#define YYSTACK_USE_ALLOCA 0 + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/encoding.h" +#include "node.h" +#include "parse.h" +#include "id.h" +#include "regenc.h" +#include +#include +#include + +#define YYMALLOC(size) rb_parser_malloc(parser, size) +#define YYREALLOC(ptr, size) rb_parser_realloc(parser, ptr, size) +#define YYCALLOC(nelem, size) rb_parser_calloc(parser, nelem, size) +#define YYFREE(ptr) rb_parser_free(parser, ptr) +#define malloc YYMALLOC +#define realloc YYREALLOC +#define calloc YYCALLOC +#define free YYFREE + +#ifndef RIPPER +static ID register_symid(ID, const char *, long, rb_encoding *); +#define REGISTER_SYMID(id, name) register_symid(id, name, strlen(name), enc) +#include "id.c" +#endif + +#define is_notop_id(id) ((id)>tLAST_TOKEN) +#define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL) +#define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL) +#define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE) +#define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET) +#define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST) +#define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS) +#define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK) + +#define is_asgn_or_id(id) ((is_notop_id(id)) && \ + (((id)&ID_SCOPE_MASK) == ID_GLOBAL || \ + ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \ + ((id)&ID_SCOPE_MASK) == ID_CLASS)) + +enum lex_state_e { + EXPR_BEG, /* ignore newline, +/- is a sign. */ + EXPR_END, /* newline significant, +/- is an operator. */ + EXPR_ENDARG, /* ditto, and unbound braces. */ + EXPR_ARG, /* newline significant, +/- is an operator. */ + EXPR_CMDARG, /* newline significant, +/- is an operator. */ + EXPR_MID, /* newline significant, +/- is an operator. */ + EXPR_FNAME, /* ignore newline, no reserved words. */ + EXPR_DOT, /* right after `.' or `::', no reserved words. */ + EXPR_CLASS, /* immediate after `class', no here document. */ + EXPR_VALUE /* alike EXPR_BEG but label is disallowed. */ +}; + +# ifdef HAVE_LONG_LONG +typedef unsigned LONG_LONG stack_type; +# else +typedef unsigned long stack_type; +# endif + +# define BITSTACK_PUSH(stack, n) (stack = (stack<<1)|((n)&1)) +# define BITSTACK_POP(stack) (stack = stack >> 1) +# define BITSTACK_LEXPOP(stack) (stack = (stack >> 1) | (stack & 1)) +# define BITSTACK_SET_P(stack) (stack&1) + +#define COND_PUSH(n) BITSTACK_PUSH(cond_stack, n) +#define COND_POP() BITSTACK_POP(cond_stack) +#define COND_LEXPOP() BITSTACK_LEXPOP(cond_stack) +#define COND_P() BITSTACK_SET_P(cond_stack) + +#define CMDARG_PUSH(n) BITSTACK_PUSH(cmdarg_stack, n) +#define CMDARG_POP() BITSTACK_POP(cmdarg_stack) +#define CMDARG_LEXPOP() BITSTACK_LEXPOP(cmdarg_stack) +#define CMDARG_P() BITSTACK_SET_P(cmdarg_stack) + +struct vtable { + ID *tbl; + int pos; + int capa; + struct vtable *prev; +}; + +struct local_vars { + struct vtable *args; + struct vtable *vars; + struct local_vars *prev; +}; + +#define DVARS_INHERIT ((void*)1) +#define DVARS_TOPSCOPE NULL +#define DVARS_SPECIAL_P(tbl) (!POINTER_P(tbl)) +#define POINTER_P(val) ((VALUE)(val) & ~(VALUE)3) + +static int +vtable_size(const struct vtable *tbl) +{ + if (POINTER_P(tbl)) { + return tbl->pos; + } + else { + return 0; + } +} + +#define VTBL_DEBUG 0 + +static struct vtable * +vtable_alloc(struct vtable *prev) +{ + struct vtable *tbl = ALLOC(struct vtable); + tbl->pos = 0; + tbl->capa = 8; + tbl->tbl = ALLOC_N(ID, tbl->capa); + tbl->prev = prev; + if (VTBL_DEBUG) printf("vtable_alloc: %p\n", (void *)tbl); + return tbl; +} + +static void +vtable_free(struct vtable *tbl) +{ + if (VTBL_DEBUG)printf("vtable_free: %p\n", (void *)tbl); + if (POINTER_P(tbl)) { + if (tbl->tbl) { + xfree(tbl->tbl); + } + xfree(tbl); + } +} + +static void +vtable_add(struct vtable *tbl, ID id) +{ + if (!POINTER_P(tbl)) { + rb_bug("vtable_add: vtable is not allocated (%p)", (void *)tbl); + } + if (VTBL_DEBUG) printf("vtable_add: %p, %s\n", (void *)tbl, rb_id2name(id)); + + if (tbl->pos == tbl->capa) { + tbl->capa = tbl->capa * 2; + REALLOC_N(tbl->tbl, ID, tbl->capa); + } + tbl->tbl[tbl->pos++] = id; +} + +static int +vtable_included(const struct vtable * tbl, ID id) +{ + int i; + + if (POINTER_P(tbl)) { + for (i = 0; i < tbl->pos; i++) { + if (tbl->tbl[i] == id) { + return 1; + } + } + } + return 0; +} + + +#ifndef RIPPER +typedef struct token_info { + const char *token; + int linenum; + int column; + int nonspc; + struct token_info *next; +} token_info; +#endif + +/* + Structure of Lexer Buffer: + + lex_pbeg tokp lex_p lex_pend + | | | | + |-----------+--------------+------------| + |<------------>| + token +*/ +struct parser_params { + int is_ripper; + NODE *heap; + + YYSTYPE *parser_yylval; + VALUE eofp; + + NODE *parser_lex_strterm; + enum lex_state_e parser_lex_state; + stack_type parser_cond_stack; + stack_type parser_cmdarg_stack; + int parser_class_nest; + int parser_paren_nest; + int parser_lpar_beg; + int parser_in_single; + int parser_in_def; + int parser_compile_for_eval; + VALUE parser_cur_mid; + int parser_in_defined; + char *parser_tokenbuf; + int parser_tokidx; + int parser_toksiz; + VALUE parser_lex_input; + VALUE parser_lex_lastline; + VALUE parser_lex_nextline; + const char *parser_lex_pbeg; + const char *parser_lex_p; + const char *parser_lex_pend; + int parser_heredoc_end; + int parser_command_start; + NODE *parser_deferred_nodes; + int parser_lex_gets_ptr; + VALUE (*parser_lex_gets)(struct parser_params*,VALUE); + struct local_vars *parser_lvtbl; + int parser_ruby__end__seen; + int line_count; + int has_shebang; + char *parser_ruby_sourcefile; /* current source file */ + int parser_ruby_sourceline; /* current line no. */ + rb_encoding *enc; + rb_encoding *utf8; + + int parser_yydebug; + +#ifndef RIPPER + /* Ruby core only */ + NODE *parser_eval_tree_begin; + NODE *parser_eval_tree; + VALUE debug_lines; + VALUE coverage; + int nerr; + + token_info *parser_token_info; +#else + /* Ripper only */ + VALUE parser_ruby_sourcefile_string; + const char *tokp; + VALUE delayed; + int delayed_line; + int delayed_col; + + VALUE value; + VALUE result; + VALUE parsing_thread; + int toplevel_p; +#endif +}; + +#define UTF8_ENC() (parser->utf8 ? parser->utf8 : \ + (parser->utf8 = rb_utf8_encoding())) +#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc) +#define STR_NEW0() rb_enc_str_new(0,0,parser->enc) +#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc) +#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc) +#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT) +#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), parser->enc) + +#ifdef YYMALLOC +void *rb_parser_malloc(struct parser_params *, size_t); +void *rb_parser_realloc(struct parser_params *, void *, size_t); +void *rb_parser_calloc(struct parser_params *, size_t, size_t); +void rb_parser_free(struct parser_params *, void *); +#endif + +static int parser_yyerror(struct parser_params*, const char*); +#define yyerror(msg) parser_yyerror(parser, msg) + +#define YYLEX_PARAM parser + +#define lex_strterm (parser->parser_lex_strterm) +#define lex_state (parser->parser_lex_state) +#define cond_stack (parser->parser_cond_stack) +#define cmdarg_stack (parser->parser_cmdarg_stack) +#define class_nest (parser->parser_class_nest) +#define paren_nest (parser->parser_paren_nest) +#define lpar_beg (parser->parser_lpar_beg) +#define in_single (parser->parser_in_single) +#define in_def (parser->parser_in_def) +#define compile_for_eval (parser->parser_compile_for_eval) +#define cur_mid (parser->parser_cur_mid) +#define in_defined (parser->parser_in_defined) +#define tokenbuf (parser->parser_tokenbuf) +#define tokidx (parser->parser_tokidx) +#define toksiz (parser->parser_toksiz) +#define lex_input (parser->parser_lex_input) +#define lex_lastline (parser->parser_lex_lastline) +#define lex_nextline (parser->parser_lex_nextline) +#define lex_pbeg (parser->parser_lex_pbeg) +#define lex_p (parser->parser_lex_p) +#define lex_pend (parser->parser_lex_pend) +#define heredoc_end (parser->parser_heredoc_end) +#define command_start (parser->parser_command_start) +#define deferred_nodes (parser->parser_deferred_nodes) +#define lex_gets_ptr (parser->parser_lex_gets_ptr) +#define lex_gets (parser->parser_lex_gets) +#define lvtbl (parser->parser_lvtbl) +#define ruby__end__seen (parser->parser_ruby__end__seen) +#define ruby_sourceline (parser->parser_ruby_sourceline) +#define ruby_sourcefile (parser->parser_ruby_sourcefile) +#define yydebug (parser->parser_yydebug) +#ifdef RIPPER +#else +#define ruby_eval_tree (parser->parser_eval_tree) +#define ruby_eval_tree_begin (parser->parser_eval_tree_begin) +#define ruby_debug_lines (parser->debug_lines) +#define ruby_coverage (parser->coverage) +#endif + +static int yylex(void*, void*); + +#ifndef RIPPER +#define yyparse ruby_yyparse + +static NODE* node_newnode(struct parser_params *, enum node_type, VALUE, VALUE, VALUE); +#define rb_node_newnode(type, a1, a2, a3) node_newnode(parser, type, a1, a2, a3) + +static NODE *cond_gen(struct parser_params*,NODE*); +#define cond(node) cond_gen(parser, node) +static NODE *logop_gen(struct parser_params*,enum node_type,NODE*,NODE*); +#define logop(type,node1,node2) logop_gen(parser, type, node1, node2) + +static NODE *newline_node(NODE*); +static void fixpos(NODE*,NODE*); + +static int value_expr_gen(struct parser_params*,NODE*); +static void void_expr_gen(struct parser_params*,NODE*); +static NODE *remove_begin(NODE*); +#define value_expr(node) value_expr_gen(parser, (node) = remove_begin(node)) +#define void_expr0(node) void_expr_gen(parser, (node)) +#define void_expr(node) void_expr0((node) = remove_begin(node)) +static void void_stmts_gen(struct parser_params*,NODE*); +#define void_stmts(node) void_stmts_gen(parser, node) +static void reduce_nodes_gen(struct parser_params*,NODE**); +#define reduce_nodes(n) reduce_nodes_gen(parser,n) +static void block_dup_check_gen(struct parser_params*,NODE*,NODE*); +#define block_dup_check(n1,n2) block_dup_check_gen(parser,n1,n2) + +static NODE *block_append_gen(struct parser_params*,NODE*,NODE*); +#define block_append(h,t) block_append_gen(parser,h,t) +static NODE *list_append_gen(struct parser_params*,NODE*,NODE*); +#define list_append(l,i) list_append_gen(parser,l,i) +static NODE *list_concat_gen(struct parser_params*,NODE*,NODE*); +#define list_concat(h,t) list_concat_gen(parser,h,t) +static NODE *arg_append_gen(struct parser_params*,NODE*,NODE*); +#define arg_append(h,t) arg_append_gen(parser,h,t) +static NODE *arg_concat_gen(struct parser_params*,NODE*,NODE*); +#define arg_concat(h,t) arg_concat_gen(parser,h,t) +static NODE *literal_concat_gen(struct parser_params*,NODE*,NODE*); +#define literal_concat(h,t) literal_concat_gen(parser,h,t) +static NODE *new_evstr_gen(struct parser_params*,NODE*); +#define new_evstr(n) new_evstr_gen(parser,n) +static NODE *evstr2dstr_gen(struct parser_params*,NODE*); +#define evstr2dstr(n) evstr2dstr_gen(parser,n) +static NODE *splat_array(NODE*); + +static NODE *call_bin_op_gen(struct parser_params*,NODE*,ID,NODE*); +#define call_bin_op(recv,id,arg1) call_bin_op_gen(parser, recv,id,arg1) +static NODE *call_uni_op_gen(struct parser_params*,NODE*,ID); +#define call_uni_op(recv,id) call_uni_op_gen(parser, recv,id) + +static NODE *new_args_gen(struct parser_params*,NODE*,NODE*,ID,NODE*,ID); +#define new_args(f,o,r,p,b) new_args_gen(parser, f,o,r,p,b) + +static NODE *negate_lit(NODE*); +static NODE *ret_args_gen(struct parser_params*,NODE*); +#define ret_args(node) ret_args_gen(parser, node) +static NODE *arg_blk_pass(NODE*,NODE*); +static NODE *new_yield_gen(struct parser_params*,NODE*); +#define new_yield(node) new_yield_gen(parser, node) + +static NODE *gettable_gen(struct parser_params*,ID); +#define gettable(id) gettable_gen(parser,id) +static NODE *assignable_gen(struct parser_params*,ID,NODE*); +#define assignable(id,node) assignable_gen(parser, id, node) + +static NODE *aryset_gen(struct parser_params*,NODE*,NODE*); +#define aryset(node1,node2) aryset_gen(parser, node1, node2) +static NODE *attrset_gen(struct parser_params*,NODE*,ID); +#define attrset(node,id) attrset_gen(parser, node, id) + +static void rb_backref_error_gen(struct parser_params*,NODE*); +#define rb_backref_error(n) rb_backref_error_gen(parser,n) +static NODE *node_assign_gen(struct parser_params*,NODE*,NODE*); +#define node_assign(node1, node2) node_assign_gen(parser, node1, node2) + +static NODE *match_op_gen(struct parser_params*,NODE*,NODE*); +#define match_op(node1,node2) match_op_gen(parser, node1, node2) + +static ID *local_tbl_gen(struct parser_params*); +#define local_tbl() local_tbl_gen(parser) + +static void fixup_nodes(NODE **); + +extern int rb_dvar_defined(ID); +extern int rb_local_defined(ID); +extern int rb_parse_in_eval(void); +extern int rb_parse_in_main(void); + +static VALUE reg_compile_gen(struct parser_params*, VALUE, int); +#define reg_compile(str,options) reg_compile_gen(parser, str, options) +static void reg_fragment_setenc_gen(struct parser_params*, VALUE, int); +#define reg_fragment_setenc(str,options) reg_fragment_setenc_gen(parser, str, options) +static void reg_fragment_check_gen(struct parser_params*, VALUE, int); +#define reg_fragment_check(str,options) reg_fragment_check_gen(parser, str, options) +static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); +#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,regexp,match) + +#define get_id(id) (id) +#define get_value(val) (val) +#else +#define remove_begin(node) (node) +#define rb_dvar_defined(id) 0 +#define rb_local_defined(id) 0 +static ID ripper_get_id(VALUE); +#define get_id(id) ripper_get_id(id) +static VALUE ripper_get_value(VALUE); +#define get_value(val) ripper_get_value(val) +static VALUE assignable_gen(struct parser_params*,VALUE); +#define assignable(lhs,node) assignable_gen(parser, lhs) +#endif /* !RIPPER */ + +static ID formal_argument_gen(struct parser_params*, ID); +#define formal_argument(id) formal_argument_gen(parser, id) +static ID shadowing_lvar_gen(struct parser_params*,ID); +#define shadowing_lvar(name) shadowing_lvar_gen(parser, name) +static void new_bv_gen(struct parser_params*,ID); +#define new_bv(id) new_bv_gen(parser, id) + +static void local_push_gen(struct parser_params*,int); +#define local_push(top) local_push_gen(parser,top) +static void local_pop_gen(struct parser_params*); +#define local_pop() local_pop_gen(parser) +static int local_var_gen(struct parser_params*, ID); +#define local_var(id) local_var_gen(parser, id); +static int arg_var_gen(struct parser_params*, ID); +#define arg_var(id) arg_var_gen(parser, id) +static int local_id_gen(struct parser_params*, ID); +#define local_id(id) local_id_gen(parser, id) +static ID internal_id_gen(struct parser_params*); +#define internal_id() internal_id_gen(parser) + +static const struct vtable *dyna_push_gen(struct parser_params *); +#define dyna_push() dyna_push_gen(parser) +static void dyna_pop_gen(struct parser_params*, const struct vtable *); +#define dyna_pop(node) dyna_pop_gen(parser, node) +static int dyna_in_block_gen(struct parser_params*); +#define dyna_in_block() dyna_in_block_gen(parser) +#define dyna_var(id) local_var(id) +static int dvar_defined_gen(struct parser_params*,ID); +#define dvar_defined(id) dvar_defined_gen(parser, id) +static int dvar_curr_gen(struct parser_params*,ID); +#define dvar_curr(id) dvar_curr_gen(parser, id) + +static int lvar_defined_gen(struct parser_params*, ID); +#define lvar_defined(id) lvar_defined_gen(parser, id) + +#define RE_OPTION_ONCE (1<<16) +#define RE_OPTION_ENCODING_SHIFT 8 +#define RE_OPTION_ENCODING(e) (((e)&0xff)<>RE_OPTION_ENCODING_SHIFT)&0xff) +#define RE_OPTION_ENCODING_NONE(o) ((o)&RE_OPTION_ARG_ENCODING_NONE) +#define RE_OPTION_MASK 0xff +#define RE_OPTION_ARG_ENCODING_NONE 32 + +#define NODE_STRTERM NODE_ZARRAY /* nothing to gc */ +#define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */ +#define SIGN_EXTEND(x,n) (((1<<(n)-1)^((x)&~(~0<<(n))))-(1<<(n)-1)) +#define nd_func u1.id +#if SIZEOF_SHORT == 2 +#define nd_term(node) ((signed short)(node)->u2.id) +#else +#define nd_term(node) SIGN_EXTEND((node)->u2.id, CHAR_BIT*2) +#endif +#define nd_paren(node) (char)((node)->u2.id >> CHAR_BIT*2) +#define nd_nest u3.cnt + +/****** Ripper *******/ + +#ifdef RIPPER +#define RIPPER_VERSION "0.1.0" + +#include "eventids1.c" +#include "eventids2.c" +static ID ripper_id_gets; + +static VALUE ripper_dispatch0(struct parser_params*,ID); +static VALUE ripper_dispatch1(struct parser_params*,ID,VALUE); +static VALUE ripper_dispatch2(struct parser_params*,ID,VALUE,VALUE); +static VALUE ripper_dispatch3(struct parser_params*,ID,VALUE,VALUE,VALUE); +static VALUE ripper_dispatch4(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE); +static VALUE ripper_dispatch5(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE,VALUE); + +#define dispatch0(n) ripper_dispatch0(parser, TOKEN_PASTE(ripper_id_, n)) +#define dispatch1(n,a) ripper_dispatch1(parser, TOKEN_PASTE(ripper_id_, n), a) +#define dispatch2(n,a,b) ripper_dispatch2(parser, TOKEN_PASTE(ripper_id_, n), a, b) +#define dispatch3(n,a,b,c) ripper_dispatch3(parser, TOKEN_PASTE(ripper_id_, n), a, b, c) +#define dispatch4(n,a,b,c,d) ripper_dispatch4(parser, TOKEN_PASTE(ripper_id_, n), a, b, c, d) +#define dispatch5(n,a,b,c,d,e) ripper_dispatch5(parser, TOKEN_PASTE(ripper_id_, n), a, b, c, d, e) + +#define yyparse ripper_yyparse + +static VALUE ripper_intern(const char*); +static VALUE ripper_id2sym(ID); +#ifdef __GNUC__ +#define ripper_id2sym(id) ((id) < 256 && rb_ispunct(id) ? \ + ID2SYM(id) : ripper_id2sym(id)) +#endif + +#define arg_new() dispatch0(args_new) +#define arg_add(l,a) dispatch2(args_add, l, a) +#define arg_prepend(l,a) dispatch2(args_prepend, l, a) +#define arg_add_star(l,a) dispatch2(args_add_star, l, a) +#define arg_add_block(l,b) dispatch2(args_add_block, l, b) +#define arg_add_optblock(l,b) ((b)==Qundef? l : dispatch2(args_add_block, l, b)) +#define bare_assoc(v) dispatch1(bare_assoc_hash, v) +#define arg_add_assocs(l,b) arg_add(l, bare_assoc(b)) + +#define args2mrhs(a) dispatch1(mrhs_new_from_args, a) +#define mrhs_new() dispatch0(mrhs_new) +#define mrhs_add(l,a) dispatch2(mrhs_add, l, a) +#define mrhs_add_star(l,a) dispatch2(mrhs_add_star, l, a) + +#define mlhs_new() dispatch0(mlhs_new) +#define mlhs_add(l,a) dispatch2(mlhs_add, l, a) +#define mlhs_add_star(l,a) dispatch2(mlhs_add_star, l, a) + +#define params_new(pars, opts, rest, pars2, blk) \ + dispatch5(params, pars, opts, rest, pars2, blk) + +#define blockvar_new(p,v) dispatch2(block_var, p, v) +#define blockvar_add_star(l,a) dispatch2(block_var_add_star, l, a) +#define blockvar_add_block(l,a) dispatch2(block_var_add_block, l, a) + +#define method_optarg(m,a) ((a)==Qundef ? m : dispatch2(method_add_arg,m,a)) +#define method_arg(m,a) dispatch2(method_add_arg,m,a) +#define method_add_block(m,b) dispatch2(method_add_block, m, b) + +#define escape_Qundef(x) ((x)==Qundef ? Qnil : (x)) + +#define FIXME 0 + +#endif /* RIPPER */ + +#ifndef RIPPER +# define ifndef_ripper(x) x +#else +# define ifndef_ripper(x) +#endif + +#ifndef RIPPER +# define rb_warn0(fmt) rb_compile_warn(ruby_sourcefile, ruby_sourceline, fmt) +# define rb_warnI(fmt,a) rb_compile_warn(ruby_sourcefile, ruby_sourceline, fmt, a) +# define rb_warnS(fmt,a) rb_compile_warn(ruby_sourcefile, ruby_sourceline, fmt, a) +# define rb_warning0(fmt) rb_compile_warning(ruby_sourcefile, ruby_sourceline, fmt) +# define rb_warningS(fmt,a) rb_compile_warning(ruby_sourcefile, ruby_sourceline, fmt, a) +#else +# define rb_warn0(fmt) ripper_warn0(parser, fmt) +# define rb_warnI(fmt,a) ripper_warnI(parser, fmt, a) +# define rb_warnS(fmt,a) ripper_warnS(parser, fmt, a) +# define rb_warning0(fmt) ripper_warning0(parser, fmt) +# define rb_warningS(fmt,a) ripper_warningS(parser, fmt, a) +static void ripper_warn0(struct parser_params*, const char*); +static void ripper_warnI(struct parser_params*, const char*, int); +#if 0 +static void ripper_warnS(struct parser_params*, const char*, const char*); +#endif +static void ripper_warning0(struct parser_params*, const char*); +static void ripper_warningS(struct parser_params*, const char*, const char*); +#endif + +#ifdef RIPPER +static void ripper_compile_error(struct parser_params*, const char *fmt, ...); +# define rb_compile_error ripper_compile_error +# define compile_error ripper_compile_error +# define PARSER_ARG parser, +#else +# define compile_error parser->nerr++,rb_compile_error +# define PARSER_ARG ruby_sourcefile, ruby_sourceline, +#endif + +/* Older versions of Yacc set YYMAXDEPTH to a very low value by default (150, + for instance). This is too low for Ruby to parse some files, such as + date/format.rb, therefore bump the value up to at least Bison's default. */ +#ifdef OLD_YACC +#ifndef YYMAXDEPTH +#define YYMAXDEPTH 10000 +#endif +#endif + +#ifndef RIPPER +static void token_info_push(struct parser_params*, const char *token); +static void token_info_pop(struct parser_params*, const char *token); +#endif +%} + +%pure_parser +%parse-param {struct parser_params *parser} + +%union { + VALUE val; + NODE *node; + ID id; + int num; + const struct vtable *vars; +} + +/*%%%*/ +%token +/*% +%token +%*/ + keyword_class + keyword_module + keyword_def + keyword_undef + keyword_begin + keyword_rescue + keyword_ensure + keyword_end + keyword_if + keyword_unless + keyword_then + keyword_elsif + keyword_else + keyword_case + keyword_when + keyword_while + keyword_until + keyword_for + keyword_break + keyword_next + keyword_redo + keyword_retry + keyword_in + keyword_do + keyword_do_cond + keyword_do_block + keyword_do_LAMBDA + keyword_return + keyword_yield + keyword_super + keyword_self + keyword_nil + keyword_true + keyword_false + keyword_and + keyword_or + keyword_not + modifier_if + modifier_unless + modifier_while + modifier_until + modifier_rescue + keyword_alias + keyword_defined + keyword_BEGIN + keyword_END + keyword__LINE__ + keyword__FILE__ + keyword__ENCODING__ + +%token tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL +%token tINTEGER tFLOAT tSTRING_CONTENT tCHAR +%token tNTH_REF tBACK_REF +%token tREGEXP_END + +%type singleton strings string string1 xstring regexp +%type string_contents xstring_contents string_content +%type words qwords word_list qword_list word +%type literal numeric dsym cpath +%type top_compstmt top_stmts top_stmt +%type bodystmt compstmt stmts stmt expr arg primary command command_call method_call +%type expr_value arg_value primary_value +%type if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure +%type args call_args opt_call_args +%type paren_args opt_paren_args +%type command_args aref_args opt_block_arg block_arg var_ref var_lhs +%type mrhs superclass block_call block_command +%type f_block_optarg f_block_opt +%type f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list f_margs +%type assoc_list assocs assoc undef_list backref string_dvar for_var +%type block_param opt_block_param block_param_def f_opt +%type bv_decls opt_bv_decl bvar +%type lambda f_larglist lambda_body +%type brace_block cmd_brace_block do_block lhs none fitem +%type mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner +%type fsym variable sym symbol operation operation2 operation3 +%type cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg +/*%%%*/ +/*% +%type program reswords then do dot_or_colon +%*/ +%token tUPLUS /* unary+ */ +%token tUMINUS /* unary- */ +%token tPOW /* ** */ +%token tCMP /* <=> */ +%token tEQ /* == */ +%token tEQQ /* === */ +%token tNEQ /* != */ +%token tGEQ /* >= */ +%token tLEQ /* <= */ +%token tANDOP tOROP /* && and || */ +%token tMATCH tNMATCH /* =~ and !~ */ +%token tDOT2 tDOT3 /* .. and ... */ +%token tAREF tASET /* [] and []= */ +%token tLSHFT tRSHFT /* << and >> */ +%token tCOLON2 /* :: */ +%token tCOLON3 /* :: at EXPR_BEG */ +%token tOP_ASGN /* +=, -= etc. */ +%token tASSOC /* => */ +%token tLPAREN /* ( */ +%token tLPAREN_ARG /* ( */ +%token tRPAREN /* ) */ +%token tLBRACK /* [ */ +%token tLBRACE /* { */ +%token tLBRACE_ARG /* { */ +%token tSTAR /* * */ +%token tAMPER /* & */ +%token tLAMBDA /* -> */ +%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG +%token tSTRING_DBEG tSTRING_DVAR tSTRING_END tLAMBEG + +/* + * precedence table + */ + +%nonassoc tLOWEST +%nonassoc tLBRACE_ARG + +%nonassoc modifier_if modifier_unless modifier_while modifier_until +%left keyword_or keyword_and +%right keyword_not +%nonassoc keyword_defined +%right '=' tOP_ASGN +%left modifier_rescue +%right '?' ':' +%nonassoc tDOT2 tDOT3 +%left tOROP +%left tANDOP +%nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH +%left '>' tGEQ '<' tLEQ +%left '|' '^' +%left '&' +%left tLSHFT tRSHFT +%left '+' '-' +%left '*' '/' '%' +%right tUMINUS_NUM tUMINUS +%right tPOW +%right '!' '~' tUPLUS + +%nonassoc idNULL +%nonassoc idRespond_to +%nonassoc idIFUNC +%nonassoc idCFUNC +%nonassoc idThrowState +%nonassoc id_core_set_method_alias +%nonassoc id_core_set_variable_alias +%nonassoc id_core_undef_method +%nonassoc id_core_define_method +%nonassoc id_core_define_singleton_method +%nonassoc id_core_set_postexe + +%token tLAST_TOKEN + +%% +program : { + lex_state = EXPR_BEG; + /*%%%*/ + $$ = compile_for_eval || rb_parse_in_main(); + local_push($$); + /*% + local_push(0); + %*/ + } + top_compstmt + { + /*%%%*/ + if ($2 && !$1) { + /* last expression should not be void */ + if (nd_type($2) != NODE_BLOCK) void_expr($2); + else { + NODE *node = $2; + while (node->nd_next) { + node = node->nd_next; + } + void_expr(node->nd_head); + } + } + ruby_eval_tree = NEW_SCOPE(0, block_append(ruby_eval_tree, $2)); + /*% + $$ = $2; + parser->result = dispatch1(program, $$); + %*/ + local_pop(); + } + ; + +top_compstmt : top_stmts opt_terms + { + /*%%%*/ + void_stmts($1); + fixup_nodes(&deferred_nodes); + /*% + %*/ + $$ = $1; + } + ; + +top_stmts : none + { + /*%%%*/ + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), + dispatch0(void_stmt)); + %*/ + } + | top_stmt + { + /*%%%*/ + $$ = newline_node($1); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), $1); + %*/ + } + | top_stmts terms top_stmt + { + /*%%%*/ + $$ = block_append($1, newline_node($3)); + /*% + $$ = dispatch2(stmts_add, $1, $3); + %*/ + } + | error top_stmt + { + $$ = remove_begin($2); + } + ; + +top_stmt : stmt + | keyword_BEGIN + { + if (in_def || in_single) { + yyerror("BEGIN in method"); + } + /*%%%*/ + /* local_push(0); */ + /*% + %*/ + } + '{' top_compstmt '}' + { + /*%%%*/ + ruby_eval_tree_begin = block_append(ruby_eval_tree_begin, + $4); + /* NEW_PREEXE($4)); */ + /* local_pop(); */ + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(BEGIN, $4); + %*/ + } + ; + +bodystmt : compstmt + opt_rescue + opt_else + opt_ensure + { + /*%%%*/ + $$ = $1; + if ($2) { + $$ = NEW_RESCUE($1, $2, $3); + } + else if ($3) { + rb_warn0("else without rescue is useless"); + $$ = block_append($$, $3); + } + if ($4) { + if ($$) { + $$ = NEW_ENSURE($$, $4); + } + else { + $$ = block_append($4, NEW_NIL()); + } + } + fixpos($$, $1); + /*% + $$ = dispatch4(bodystmt, + escape_Qundef($1), + escape_Qundef($2), + escape_Qundef($3), + escape_Qundef($4)); + %*/ + } + ; + +compstmt : stmts opt_terms + { + /*%%%*/ + void_stmts($1); + fixup_nodes(&deferred_nodes); + /*% + %*/ + $$ = $1; + } + ; + +stmts : none + { + /*%%%*/ + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), + dispatch0(void_stmt)); + %*/ + } + | stmt + { + /*%%%*/ + $$ = newline_node($1); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), $1); + %*/ + } + | stmts terms stmt + { + /*%%%*/ + $$ = block_append($1, newline_node($3)); + /*% + $$ = dispatch2(stmts_add, $1, $3); + %*/ + } + | error stmt + { + $$ = remove_begin($2); + } + ; + +stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem + { + /*%%%*/ + $$ = NEW_ALIAS($2, $4); + /*% + $$ = dispatch2(alias, $2, $4); + %*/ + } + | keyword_alias tGVAR tGVAR + { + /*%%%*/ + $$ = NEW_VALIAS($2, $3); + /*% + $$ = dispatch2(var_alias, $2, $3); + %*/ + } + | keyword_alias tGVAR tBACK_REF + { + /*%%%*/ + char buf[3]; + + sprintf(buf, "$%c", (char)$3->nd_nth); + $$ = NEW_VALIAS($2, rb_intern(buf)); + /*% + $$ = dispatch2(var_alias, $2, $3); + %*/ + } + | keyword_alias tGVAR tNTH_REF + { + /*%%%*/ + yyerror("can't make alias for the number variables"); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(var_alias, $2, $3); + $$ = dispatch1(alias_error, $$); + %*/ + } + | keyword_undef undef_list + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(undef, $2); + %*/ + } + | stmt modifier_if expr_value + { + /*%%%*/ + $$ = NEW_IF(cond($3), remove_begin($1), 0); + fixpos($$, $3); + /*% + $$ = dispatch2(if_mod, $3, $1); + %*/ + } + | stmt modifier_unless expr_value + { + /*%%%*/ + $$ = NEW_UNLESS(cond($3), remove_begin($1), 0); + fixpos($$, $3); + /*% + $$ = dispatch2(unless_mod, $3, $1); + %*/ + } + | stmt modifier_while expr_value + { + /*%%%*/ + if ($1 && nd_type($1) == NODE_BEGIN) { + $$ = NEW_WHILE(cond($3), $1->nd_body, 0); + } + else { + $$ = NEW_WHILE(cond($3), $1, 1); + } + /*% + $$ = dispatch2(while_mod, $3, $1); + %*/ + } + | stmt modifier_until expr_value + { + /*%%%*/ + if ($1 && nd_type($1) == NODE_BEGIN) { + $$ = NEW_UNTIL(cond($3), $1->nd_body, 0); + } + else { + $$ = NEW_UNTIL(cond($3), $1, 1); + } + /*% + $$ = dispatch2(until_mod, $3, $1); + %*/ + } + | stmt modifier_rescue stmt + { + /*%%%*/ + NODE *resq = NEW_RESBODY(0, remove_begin($3), 0); + $$ = NEW_RESCUE(remove_begin($1), resq, 0); + /*% + $$ = dispatch2(rescue_mod, $3, $1); + %*/ + } + | keyword_END '{' compstmt '}' + { + if (in_def || in_single) { + rb_warn0("END in method; use at_exit"); + } + /*%%%*/ + $$ = NEW_POSTEXE(NEW_NODE( + NODE_SCOPE, 0 /* tbl */, $3 /* body */, 0 /* args */)); + /*% + $$ = dispatch1(END, $3); + %*/ + } + | lhs '=' command_call + { + /*%%%*/ + value_expr($3); + $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, $3); + %*/ + } + | mlhs '=' command_call + { + /*%%%*/ + value_expr($3); + $1->nd_value = $3; + $$ = $1; + /*% + $$ = dispatch2(massign, $1, $3); + %*/ + } + | var_lhs tOP_ASGN command_call + { + /*%%%*/ + value_expr($3); + if ($1) { + ID vid = $1->nd_vid; + if ($2 == tOROP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_OR(gettable(vid), $1); + if (is_asgn_or_id(vid)) { + $$->nd_aid = vid; + } + } + else if ($2 == tANDOP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_AND(gettable(vid), $1); + } + else { + $$ = $1; + $$->nd_value = NEW_CALL(gettable(vid), $2, NEW_LIST($3)); + } + } + else { + $$ = NEW_BEGIN(0); + } + /*% + $$ = dispatch3(opassign, $1, $2, $3); + %*/ + } + | primary_value '[' opt_call_args rbracket tOP_ASGN command_call + { + /*%%%*/ + NODE *args; + + value_expr($6); + if (!$3) $3 = NEW_ZARRAY(); + args = arg_concat($3, $6); + if ($5 == tOROP) { + $5 = 0; + } + else if ($5 == tANDOP) { + $5 = 1; + } + $$ = NEW_OP_ASGN1($1, $5, args); + fixpos($$, $1); + /*% + $$ = dispatch2(aref_field, $1, escape_Qundef($3)); + $$ = dispatch3(opassign, $$, $5, $6); + %*/ + } + | primary_value '.' tIDENTIFIER tOP_ASGN command_call + { + /*%%%*/ + value_expr($5); + if ($4 == tOROP) { + $4 = 0; + } + else if ($4 == tANDOP) { + $4 = 1; + } + $$ = NEW_OP_ASGN2($1, $3, $4, $5); + fixpos($$, $1); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $$, $4, $5); + %*/ + } + | primary_value '.' tCONSTANT tOP_ASGN command_call + { + /*%%%*/ + value_expr($5); + if ($4 == tOROP) { + $4 = 0; + } + else if ($4 == tANDOP) { + $4 = 1; + } + $$ = NEW_OP_ASGN2($1, $3, $4, $5); + fixpos($$, $1); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $$, $4, $5); + %*/ + } + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call + { + /*%%%*/ + value_expr($5); + if ($4 == tOROP) { + $4 = 0; + } + else if ($4 == tANDOP) { + $4 = 1; + } + $$ = NEW_OP_ASGN2($1, $3, $4, $5); + fixpos($$, $1); + /*% + $$ = dispatch3(field, $1, ripper_intern("::"), $3); + $$ = dispatch3(opassign, $$, $4, $5); + %*/ + } + | backref tOP_ASGN command_call + { + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(assign, dispatch1(var_field, $1), $3); + $$ = dispatch1(assign_error, $$); + %*/ + } + | lhs '=' mrhs + { + /*%%%*/ + value_expr($3); + $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, $3); + %*/ + } + | mlhs '=' arg_value + { + /*%%%*/ + $1->nd_value = $3; + $$ = $1; + /*% + $$ = dispatch2(massign, $1, $3); + %*/ + } + | mlhs '=' mrhs + { + /*%%%*/ + $1->nd_value = $3; + $$ = $1; + /*% + $$ = dispatch2(massign, $1, $3); + %*/ + } + | expr + ; + +expr : command_call + | expr keyword_and expr + { + /*%%%*/ + $$ = logop(NODE_AND, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("and"), $3); + %*/ + } + | expr keyword_or expr + { + /*%%%*/ + $$ = logop(NODE_OR, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("or"), $3); + %*/ + } + | keyword_not opt_nl expr + { + /*%%%*/ + $$ = call_uni_op(cond($3), '!'); + /*% + $$ = dispatch2(unary, ripper_intern("not"), $3); + %*/ + } + | '!' command_call + { + /*%%%*/ + $$ = call_uni_op(cond($2), '!'); + /*% + $$ = dispatch2(unary, ripper_id2sym('!'), $2); + %*/ + } + | arg + ; + +expr_value : expr + { + /*%%%*/ + value_expr($1); + $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ + } + ; + +command_call : command + | block_command + | keyword_return call_args + { + /*%%%*/ + $$ = NEW_RETURN(ret_args($2)); + /*% + $$ = dispatch1(return, $2); + %*/ + } + | keyword_break call_args + { + /*%%%*/ + $$ = NEW_BREAK(ret_args($2)); + /*% + $$ = dispatch1(break, $2); + %*/ + } + | keyword_next call_args + { + /*%%%*/ + $$ = NEW_NEXT(ret_args($2)); + /*% + $$ = dispatch1(next, $2); + %*/ + } + ; + +block_command : block_call + | block_call '.' operation2 command_args + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_arg($$, $4); + %*/ + } + | block_call tCOLON2 operation2 command_args + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), $3); + $$ = method_arg($$, $4); + %*/ + } + ; + +cmd_brace_block : tLBRACE_ARG + { + $1 = dyna_push(); + /*%%%*/ + $$ = ruby_sourceline; + /*% + %*/ + } + opt_block_param + compstmt + '}' + { + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $2); + /*% + $$ = dispatch2(brace_block, escape_Qundef($3), $4); + %*/ + dyna_pop($1); + } + ; + +command : operation command_args %prec tLOWEST + { + /*%%%*/ + $$ = NEW_FCALL($1, $2); + fixpos($$, $2); + /*% + $$ = dispatch2(command, $1, $2); + %*/ + } + | operation command_args cmd_brace_block + { + /*%%%*/ + block_dup_check($2,$3); + $3->nd_iter = NEW_FCALL($1, $2); + $$ = $3; + fixpos($$, $2); + /*% + $$ = dispatch2(command, $1, $2); + $$ = method_add_block($$, $3); + %*/ + } + | primary_value '.' operation2 command_args %prec tLOWEST + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_id2sym('.'), $3, $4); + %*/ + } + | primary_value '.' operation2 command_args cmd_brace_block + { + /*%%%*/ + block_dup_check($4,$5); + $5->nd_iter = NEW_CALL($1, $3, $4); + $$ = $5; + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_id2sym('.'), $3, $4); + $$ = method_add_block($$, $5); + %*/ + } + | primary_value tCOLON2 operation2 command_args %prec tLOWEST + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_intern("::"), $3, $4); + %*/ + } + | primary_value tCOLON2 operation2 command_args cmd_brace_block + { + /*%%%*/ + block_dup_check($4,$5); + $5->nd_iter = NEW_CALL($1, $3, $4); + $$ = $5; + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_intern("::"), $3, $4); + $$ = method_add_block($$, $5); + %*/ + } + | keyword_super command_args + { + /*%%%*/ + $$ = NEW_SUPER($2); + fixpos($$, $2); + /*% + $$ = dispatch1(super, $2); + %*/ + } + | keyword_yield command_args + { + /*%%%*/ + $$ = new_yield($2); + fixpos($$, $2); + /*% + $$ = dispatch1(yield, $2); + %*/ + } + ; + +mlhs : mlhs_basic + | tLPAREN mlhs_inner rparen + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +mlhs_inner : mlhs_basic + | tLPAREN mlhs_inner rparen + { + /*%%%*/ + $$ = NEW_MASGN(NEW_LIST($2), 0); + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +mlhs_basic : mlhs_head + { + /*%%%*/ + $$ = NEW_MASGN($1, 0); + /*% + $$ = $1; + %*/ + } + | mlhs_head mlhs_item + { + /*%%%*/ + $$ = NEW_MASGN(list_append($1,$2), 0); + /*% + $$ = mlhs_add($1, $2); + %*/ + } + | mlhs_head tSTAR mlhs_node + { + /*%%%*/ + $$ = NEW_MASGN($1, $3); + /*% + $$ = mlhs_add_star($1, $3); + %*/ + } + | mlhs_head tSTAR mlhs_node ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG($3,$5)); + /*% + $1 = mlhs_add_star($1, $3); + $$ = mlhs_add($1, $5); + %*/ + } + | mlhs_head tSTAR + { + /*%%%*/ + $$ = NEW_MASGN($1, -1); + /*% + $$ = mlhs_add_star($1, Qnil); + %*/ + } + | mlhs_head tSTAR ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG(-1, $4)); + /*% + $$ = mlhs_add_star($1, Qnil); + %*/ + } + | tSTAR mlhs_node + { + /*%%%*/ + $$ = NEW_MASGN(0, $2); + /*% + $$ = mlhs_add_star(mlhs_new(), $2); + %*/ + } + | tSTAR mlhs_node ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG($2,$4)); + /*% + $$ = mlhs_add_star(mlhs_new(), $2); + %*/ + } + | tSTAR + { + /*%%%*/ + $$ = NEW_MASGN(0, -1); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + | tSTAR ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG(-1, $3)); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + ; + +mlhs_item : mlhs_node + | tLPAREN mlhs_inner rparen + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +mlhs_head : mlhs_item ',' + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = mlhs_add(mlhs_new(), $1); + %*/ + } + | mlhs_head mlhs_item ',' + { + /*%%%*/ + $$ = list_append($1, $2); + /*% + $$ = mlhs_add($1, $2); + %*/ + } + ; + +mlhs_post : mlhs_item + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = mlhs_add(mlhs_new(), $1); + %*/ + } + | mlhs_post ',' mlhs_item + { + /*%%%*/ + $$ = list_append($1, $3); + /*% + $$ = mlhs_add($1, $3); + %*/ + } + ; + +mlhs_node : variable + { + $$ = assignable($1, 0); + } + | primary_value '[' opt_call_args rbracket + { + /*%%%*/ + $$ = aryset($1, $3); + /*% + $$ = dispatch2(aref_field, $1, escape_Qundef($3)); + %*/ + } + | primary_value '.' tIDENTIFIER + { + /*%%%*/ + $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ + } + | primary_value tCOLON2 tIDENTIFIER + { + /*%%%*/ + $$ = attrset($1, $3); + /*% + $$ = dispatch2(const_path_field, $1, $3); + %*/ + } + | primary_value '.' tCONSTANT + { + /*%%%*/ + $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ + } + | primary_value tCOLON2 tCONSTANT + { + /*%%%*/ + if (in_def || in_single) + yyerror("dynamic constant assignment"); + $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3)); + /*% + if (in_def || in_single) + yyerror("dynamic constant assignment"); + $$ = dispatch2(const_path_field, $1, $3); + %*/ + } + | tCOLON3 tCONSTANT + { + /*%%%*/ + if (in_def || in_single) + yyerror("dynamic constant assignment"); + $$ = NEW_CDECL(0, 0, NEW_COLON3($2)); + /*% + $$ = dispatch1(top_const_field, $2); + %*/ + } + | backref + { + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_field, $1); + $$ = dispatch1(assign_error, $$); + %*/ + } + ; + +lhs : variable + { + $$ = assignable($1, 0); + /*%%%*/ + if (!$$) $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_field, $$); + %*/ + } + | primary_value '[' opt_call_args rbracket + { + /*%%%*/ + $$ = aryset($1, $3); + /*% + $$ = dispatch2(aref_field, $1, escape_Qundef($3)); + %*/ + } + | primary_value '.' tIDENTIFIER + { + /*%%%*/ + $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ + } + | primary_value tCOLON2 tIDENTIFIER + { + /*%%%*/ + $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_intern("::"), $3); + %*/ + } + | primary_value '.' tCONSTANT + { + /*%%%*/ + $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ + } + | primary_value tCOLON2 tCONSTANT + { + /*%%%*/ + if (in_def || in_single) + yyerror("dynamic constant assignment"); + $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3)); + /*% + $$ = dispatch2(const_path_field, $1, $3); + if (in_def || in_single) { + $$ = dispatch1(assign_error, $$); + } + %*/ + } + | tCOLON3 tCONSTANT + { + /*%%%*/ + if (in_def || in_single) + yyerror("dynamic constant assignment"); + $$ = NEW_CDECL(0, 0, NEW_COLON3($2)); + /*% + $$ = dispatch1(top_const_field, $2); + if (in_def || in_single) { + $$ = dispatch1(assign_error, $$); + } + %*/ + } + | backref + { + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(assign_error, $1); + %*/ + } + ; + +cname : tIDENTIFIER + { + /*%%%*/ + yyerror("class/module name must be CONSTANT"); + /*% + $$ = dispatch1(class_name_error, $1); + %*/ + } + | tCONSTANT + ; + +cpath : tCOLON3 cname + { + /*%%%*/ + $$ = NEW_COLON3($2); + /*% + $$ = dispatch1(top_const_ref, $2); + %*/ + } + | cname + { + /*%%%*/ + $$ = NEW_COLON2(0, $$); + /*% + $$ = dispatch1(const_ref, $1); + %*/ + } + | primary_value tCOLON2 cname + { + /*%%%*/ + $$ = NEW_COLON2($1, $3); + /*% + $$ = dispatch2(const_path_ref, $1, $3); + %*/ + } + ; + +fname : tIDENTIFIER + | tCONSTANT + | tFID + | op + { + /*%%%*/ + lex_state = EXPR_END; + $$ = $1; + /*% + lex_state = EXPR_END; + $$ = $1; + %*/ + } + | reswords + { + /*%%%*/ + lex_state = EXPR_END; + $$ = $1; + /*% + lex_state = EXPR_END; + $$ = $1; + %*/ + } + ; + +fsym : fname + | symbol + ; + +fitem : fsym + { + /*%%%*/ + $$ = NEW_LIT(ID2SYM($1)); + /*% + $$ = dispatch1(symbol_literal, $1); + %*/ + } + | dsym + ; + +undef_list : fitem + { + /*%%%*/ + $$ = NEW_UNDEF($1); + /*% + $$ = rb_ary_new3(1, $1); + %*/ + } + | undef_list ',' {lex_state = EXPR_FNAME;} fitem + { + /*%%%*/ + $$ = block_append($1, NEW_UNDEF($4)); + /*% + rb_ary_push($1, $4); + %*/ + } + ; + +op : '|' { ifndef_ripper($$ = '|'); } + | '^' { ifndef_ripper($$ = '^'); } + | '&' { ifndef_ripper($$ = '&'); } + | tCMP { ifndef_ripper($$ = tCMP); } + | tEQ { ifndef_ripper($$ = tEQ); } + | tEQQ { ifndef_ripper($$ = tEQQ); } + | tMATCH { ifndef_ripper($$ = tMATCH); } + | tNMATCH { ifndef_ripper($$ = tNMATCH); } + | '>' { ifndef_ripper($$ = '>'); } + | tGEQ { ifndef_ripper($$ = tGEQ); } + | '<' { ifndef_ripper($$ = '<'); } + | tLEQ { ifndef_ripper($$ = tLEQ); } + | tNEQ { ifndef_ripper($$ = tNEQ); } + | tLSHFT { ifndef_ripper($$ = tLSHFT); } + | tRSHFT { ifndef_ripper($$ = tRSHFT); } + | '+' { ifndef_ripper($$ = '+'); } + | '-' { ifndef_ripper($$ = '-'); } + | '*' { ifndef_ripper($$ = '*'); } + | tSTAR { ifndef_ripper($$ = '*'); } + | '/' { ifndef_ripper($$ = '/'); } + | '%' { ifndef_ripper($$ = '%'); } + | tPOW { ifndef_ripper($$ = tPOW); } + | '!' { ifndef_ripper($$ = '!'); } + | '~' { ifndef_ripper($$ = '~'); } + | tUPLUS { ifndef_ripper($$ = tUPLUS); } + | tUMINUS { ifndef_ripper($$ = tUMINUS); } + | tAREF { ifndef_ripper($$ = tAREF); } + | tASET { ifndef_ripper($$ = tASET); } + | '`' { ifndef_ripper($$ = '`'); } + ; + +reswords : keyword__LINE__ | keyword__FILE__ | keyword__ENCODING__ + | keyword_BEGIN | keyword_END + | keyword_alias | keyword_and | keyword_begin + | keyword_break | keyword_case | keyword_class | keyword_def + | keyword_defined | keyword_do | keyword_else | keyword_elsif + | keyword_end | keyword_ensure | keyword_false + | keyword_for | keyword_in | keyword_module | keyword_next + | keyword_nil | keyword_not | keyword_or | keyword_redo + | keyword_rescue | keyword_retry | keyword_return | keyword_self + | keyword_super | keyword_then | keyword_true | keyword_undef + | keyword_when | keyword_yield | keyword_if | keyword_unless + | keyword_while | keyword_until + ; + +arg : lhs '=' arg + { + /*%%%*/ + value_expr($3); + $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, $3); + %*/ + } + | lhs '=' arg modifier_rescue arg + { + /*%%%*/ + value_expr($3); + $3 = NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0); + $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, dispatch2(rescue_mod, $3, $5)); + %*/ + } + | var_lhs tOP_ASGN arg + { + /*%%%*/ + value_expr($3); + if ($1) { + ID vid = $1->nd_vid; + if ($2 == tOROP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_OR(gettable(vid), $1); + if (is_asgn_or_id(vid)) { + $$->nd_aid = vid; + } + } + else if ($2 == tANDOP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_AND(gettable(vid), $1); + } + else { + $$ = $1; + $$->nd_value = NEW_CALL(gettable(vid), $2, NEW_LIST($3)); + } + } + else { + $$ = NEW_BEGIN(0); + } + /*% + $$ = dispatch3(opassign, $1, $2, $3); + %*/ + } + | var_lhs tOP_ASGN arg modifier_rescue arg + { + /*%%%*/ + value_expr($3); + $3 = NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0); + if ($1) { + ID vid = $1->nd_vid; + if ($2 == tOROP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_OR(gettable(vid), $1); + if (is_asgn_or_id(vid)) { + $$->nd_aid = vid; + } + } + else if ($2 == tANDOP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_AND(gettable(vid), $1); + } + else { + $$ = $1; + $$->nd_value = NEW_CALL(gettable(vid), $2, NEW_LIST($3)); + } + } + else { + $$ = NEW_BEGIN(0); + } + /*% + $3 = dispatch2(rescue_mod, $3, $5); + $$ = dispatch3(opassign, $1, $2, $3); + %*/ + } + | primary_value '[' opt_call_args rbracket tOP_ASGN arg + { + /*%%%*/ + NODE *args; + + value_expr($6); + if (!$3) $3 = NEW_ZARRAY(); + args = arg_concat($3, $6); + if ($5 == tOROP) { + $5 = 0; + } + else if ($5 == tANDOP) { + $5 = 1; + } + $$ = NEW_OP_ASGN1($1, $5, args); + fixpos($$, $1); + /*% + $1 = dispatch2(aref_field, $1, escape_Qundef($3)); + $$ = dispatch3(opassign, $1, $5, $6); + %*/ + } + | primary_value '.' tIDENTIFIER tOP_ASGN arg + { + /*%%%*/ + value_expr($5); + if ($4 == tOROP) { + $4 = 0; + } + else if ($4 == tANDOP) { + $4 = 1; + } + $$ = NEW_OP_ASGN2($1, $3, $4, $5); + fixpos($$, $1); + /*% + $1 = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $1, $4, $5); + %*/ + } + | primary_value '.' tCONSTANT tOP_ASGN arg + { + /*%%%*/ + value_expr($5); + if ($4 == tOROP) { + $4 = 0; + } + else if ($4 == tANDOP) { + $4 = 1; + } + $$ = NEW_OP_ASGN2($1, $3, $4, $5); + fixpos($$, $1); + /*% + $1 = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $1, $4, $5); + %*/ + } + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg + { + /*%%%*/ + value_expr($5); + if ($4 == tOROP) { + $4 = 0; + } + else if ($4 == tANDOP) { + $4 = 1; + } + $$ = NEW_OP_ASGN2($1, $3, $4, $5); + fixpos($$, $1); + /*% + $1 = dispatch3(field, $1, ripper_intern("::"), $3); + $$ = dispatch3(opassign, $1, $4, $5); + %*/ + } + | primary_value tCOLON2 tCONSTANT tOP_ASGN arg + { + /*%%%*/ + yyerror("constant re-assignment"); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(const_path_field, $1, $3); + $$ = dispatch3(opassign, $$, $4, $5); + $$ = dispatch1(assign_error, $$); + %*/ + } + | tCOLON3 tCONSTANT tOP_ASGN arg + { + /*%%%*/ + yyerror("constant re-assignment"); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(top_const_field, $2); + $$ = dispatch3(opassign, $$, $3, $4); + $$ = dispatch1(assign_error, $$); + %*/ + } + | backref tOP_ASGN arg + { + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_field, $1); + $$ = dispatch3(opassign, $$, $2, $3); + $$ = dispatch1(assign_error, $$); + %*/ + } + | arg tDOT2 arg + { + /*%%%*/ + value_expr($1); + value_expr($3); + $$ = NEW_DOT2($1, $3); + if (nd_type($1) == NODE_LIT && FIXNUM_P($1->nd_lit) && + nd_type($3) == NODE_LIT && FIXNUM_P($3->nd_lit)) { + deferred_nodes = list_append(deferred_nodes, $$); + } + /*% + $$ = dispatch2(dot2, $1, $3); + %*/ + } + | arg tDOT3 arg + { + /*%%%*/ + value_expr($1); + value_expr($3); + $$ = NEW_DOT3($1, $3); + if (nd_type($1) == NODE_LIT && FIXNUM_P($1->nd_lit) && + nd_type($3) == NODE_LIT && FIXNUM_P($3->nd_lit)) { + deferred_nodes = list_append(deferred_nodes, $$); + } + /*% + $$ = dispatch2(dot3, $1, $3); + %*/ + } + | arg '+' arg + { + /*%%%*/ + $$ = call_bin_op($1, '+', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('+'), $3); + %*/ + } + | arg '-' arg + { + /*%%%*/ + $$ = call_bin_op($1, '-', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('-'), $3); + %*/ + } + | arg '*' arg + { + /*%%%*/ + $$ = call_bin_op($1, '*', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('*'), $3); + %*/ + } + | arg '/' arg + { + /*%%%*/ + $$ = call_bin_op($1, '/', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('/'), $3); + %*/ + } + | arg '%' arg + { + /*%%%*/ + $$ = call_bin_op($1, '%', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('%'), $3); + %*/ + } + | arg tPOW arg + { + /*%%%*/ + $$ = call_bin_op($1, tPOW, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("**"), $3); + %*/ + } + | tUMINUS_NUM tINTEGER tPOW arg + { + /*%%%*/ + $$ = NEW_CALL(call_bin_op($2, tPOW, $4), tUMINUS, 0); + /*% + $$ = dispatch3(binary, $2, ripper_intern("**"), $4); + $$ = dispatch2(unary, ripper_intern("-@"), $$); + %*/ + } + | tUMINUS_NUM tFLOAT tPOW arg + { + /*%%%*/ + $$ = NEW_CALL(call_bin_op($2, tPOW, $4), tUMINUS, 0); + /*% + $$ = dispatch3(binary, $2, ripper_intern("**"), $4); + $$ = dispatch2(unary, ripper_intern("-@"), $$); + %*/ + } + | tUPLUS arg + { + /*%%%*/ + $$ = call_uni_op($2, tUPLUS); + /*% + $$ = dispatch2(unary, ripper_intern("+@"), $2); + %*/ + } + | tUMINUS arg + { + /*%%%*/ + $$ = call_uni_op($2, tUMINUS); + /*% + $$ = dispatch2(unary, ripper_intern("-@"), $2); + %*/ + } + | arg '|' arg + { + /*%%%*/ + $$ = call_bin_op($1, '|', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('|'), $3); + %*/ + } + | arg '^' arg + { + /*%%%*/ + $$ = call_bin_op($1, '^', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('^'), $3); + %*/ + } + | arg '&' arg + { + /*%%%*/ + $$ = call_bin_op($1, '&', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('&'), $3); + %*/ + } + | arg tCMP arg + { + /*%%%*/ + $$ = call_bin_op($1, tCMP, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("<=>"), $3); + %*/ + } + | arg '>' arg + { + /*%%%*/ + $$ = call_bin_op($1, '>', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('>'), $3); + %*/ + } + | arg tGEQ arg + { + /*%%%*/ + $$ = call_bin_op($1, tGEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern(">="), $3); + %*/ + } + | arg '<' arg + { + /*%%%*/ + $$ = call_bin_op($1, '<', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('<'), $3); + %*/ + } + | arg tLEQ arg + { + /*%%%*/ + $$ = call_bin_op($1, tLEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("<="), $3); + %*/ + } + | arg tEQ arg + { + /*%%%*/ + $$ = call_bin_op($1, tEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("=="), $3); + %*/ + } + | arg tEQQ arg + { + /*%%%*/ + $$ = call_bin_op($1, tEQQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("==="), $3); + %*/ + } + | arg tNEQ arg + { + /*%%%*/ + $$ = call_bin_op($1, tNEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("!="), $3); + %*/ + } + | arg tMATCH arg + { + /*%%%*/ + $$ = match_op($1, $3); + if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { + $$ = reg_named_capture_assign($1->nd_lit, $$); + } + /*% + $$ = dispatch3(binary, $1, ripper_intern("=~"), $3); + %*/ + } + | arg tNMATCH arg + { + /*%%%*/ + $$ = call_bin_op($1, tNMATCH, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("!~"), $3); + %*/ + } + | '!' arg + { + /*%%%*/ + $$ = call_uni_op(cond($2), '!'); + /*% + $$ = dispatch2(unary, ID2SYM('!'), $2); + %*/ + } + | '~' arg + { + /*%%%*/ + $$ = call_uni_op($2, '~'); + /*% + $$ = dispatch2(unary, ID2SYM('~'), $2); + %*/ + } + | arg tLSHFT arg + { + /*%%%*/ + $$ = call_bin_op($1, tLSHFT, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("<<"), $3); + %*/ + } + | arg tRSHFT arg + { + /*%%%*/ + $$ = call_bin_op($1, tRSHFT, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern(">>"), $3); + %*/ + } + | arg tANDOP arg + { + /*%%%*/ + $$ = logop(NODE_AND, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("&&"), $3); + %*/ + } + | arg tOROP arg + { + /*%%%*/ + $$ = logop(NODE_OR, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("||"), $3); + %*/ + } + | keyword_defined opt_nl {in_defined = 1;} arg + { + /*%%%*/ + in_defined = 0; + $$ = NEW_DEFINED($4); + /*% + in_defined = 0; + $$ = dispatch1(defined, $4); + %*/ + } + | arg '?' arg opt_nl ':' arg + { + /*%%%*/ + value_expr($1); + $$ = NEW_IF(cond($1), $3, $6); + fixpos($$, $1); + /*% + $$ = dispatch3(ifop, $1, $3, $6); + %*/ + } + | primary + { + $$ = $1; + } + ; + +arg_value : arg + { + /*%%%*/ + value_expr($1); + $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ + } + ; + +aref_args : none + | args trailer + { + $$ = $1; + } + | args ',' assocs trailer + { + /*%%%*/ + $$ = arg_append($1, NEW_HASH($3)); + /*% + $$ = arg_add_assocs($1, $3); + %*/ + } + | assocs trailer + { + /*%%%*/ + $$ = NEW_LIST(NEW_HASH($1)); + /*% + $$ = arg_add_assocs(arg_new(), $1); + %*/ + } + ; + +paren_args : '(' opt_call_args rparen + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(arg_paren, escape_Qundef($2)); + %*/ + } + ; + +opt_paren_args : none + | paren_args + ; + +opt_call_args : none + | call_args + ; + +call_args : command + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = arg_add(arg_new(), $1); + %*/ + } + | args opt_block_arg + { + /*%%%*/ + $$ = arg_blk_pass($1, $2); + /*% + $$ = arg_add_optblock($1, $2); + %*/ + } + | assocs opt_block_arg + { + /*%%%*/ + $$ = NEW_LIST(NEW_HASH($1)); + $$ = arg_blk_pass($$, $2); + /*% + $$ = arg_add_assocs(arg_new(), $1); + $$ = arg_add_optblock($$, $2); + %*/ + } + | args ',' assocs opt_block_arg + { + /*%%%*/ + $$ = arg_append($1, NEW_HASH($3)); + $$ = arg_blk_pass($$, $4); + /*% + $$ = arg_add_optblock(arg_add_assocs($1, $3), $4); + %*/ + } + | block_arg + /*%c%*/ + /*%c + { + $$ = arg_add_block(arg_new(), $1); + } + %*/ + ; + +command_args : { + $$ = cmdarg_stack; + CMDARG_PUSH(1); + } + call_args + { + /* CMDARG_POP() */ + cmdarg_stack = $1; + $$ = $2; + } + ; + +block_arg : tAMPER arg_value + { + /*%%%*/ + $$ = NEW_BLOCK_PASS($2); + /*% + $$ = $2; + %*/ + } + ; + +opt_block_arg : ',' block_arg + { + $$ = $2; + } + | ',' + { + $$ = 0; + } + | none + { + $$ = 0; + } + ; + +args : arg_value + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = arg_add(arg_new(), $1); + %*/ + } + | tSTAR arg_value + { + /*%%%*/ + $$ = NEW_SPLAT($2); + /*% + $$ = arg_add_star(arg_new(), $2); + %*/ + } + | args ',' arg_value + { + /*%%%*/ + NODE *n1; + if ((n1 = splat_array($1)) != 0) { + $$ = list_append(n1, $3); + } + else { + $$ = arg_append($1, $3); + } + /*% + $$ = arg_add($1, $3); + %*/ + } + | args ',' tSTAR arg_value + { + /*%%%*/ + NODE *n1; + if ((nd_type($4) == NODE_ARRAY) && (n1 = splat_array($1)) != 0) { + $$ = list_concat(n1, $4); + } + else { + $$ = arg_concat($1, $4); + } + /*% + $$ = arg_add_star($1, $4); + %*/ + } + ; + +mrhs : args ',' arg_value + { + /*%%%*/ + NODE *n1; + if ((n1 = splat_array($1)) != 0) { + $$ = list_append(n1, $3); + } + else { + $$ = arg_append($1, $3); + } + /*% + $$ = mrhs_add(args2mrhs($1), $3); + %*/ + } + | args ',' tSTAR arg_value + { + /*%%%*/ + NODE *n1; + if (nd_type($4) == NODE_ARRAY && + (n1 = splat_array($1)) != 0) { + $$ = list_concat(n1, $4); + } + else { + $$ = arg_concat($1, $4); + } + /*% + $$ = mrhs_add_star(args2mrhs($1), $4); + %*/ + } + | tSTAR arg_value + { + /*%%%*/ + $$ = NEW_SPLAT($2); + /*% + $$ = mrhs_add_star(mrhs_new(), $2); + %*/ + } + ; + +primary : literal + | strings + | xstring + | regexp + | words + | qwords + | var_ref + | backref + | tFID + { + /*%%%*/ + $$ = NEW_FCALL($1, 0); + /*% + $$ = method_arg(dispatch1(fcall, $1), arg_new()); + %*/ + } + | k_begin + { + /*%%%*/ + $$ = ruby_sourceline; + /*% + %*/ + } + bodystmt + k_end + { + /*%%%*/ + if ($3 == NULL) { + $$ = NEW_NIL(); + } + else { + if (nd_type($3) == NODE_RESCUE || + nd_type($3) == NODE_ENSURE) + nd_set_line($3, $2); + $$ = NEW_BEGIN($3); + } + nd_set_line($$, $2); + /*% + $$ = dispatch1(begin, $3); + %*/ + } + | tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} rparen + { + rb_warning0("(...) interpreted as grouped expression"); + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(paren, $2); + %*/ + } + | tLPAREN compstmt ')' + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(paren, $2); + %*/ + } + | primary_value tCOLON2 tCONSTANT + { + /*%%%*/ + $$ = NEW_COLON2($1, $3); + /*% + $$ = dispatch2(const_path_ref, $1, $3); + %*/ + } + | tCOLON3 tCONSTANT + { + /*%%%*/ + $$ = NEW_COLON3($2); + /*% + $$ = dispatch1(top_const_ref, $2); + %*/ + } + | tLBRACK aref_args ']' + { + /*%%%*/ + if ($2 == 0) { + $$ = NEW_ZARRAY(); /* zero length array*/ + } + else { + $$ = $2; + } + /*% + $$ = dispatch1(array, escape_Qundef($2)); + %*/ + } + | tLBRACE assoc_list '}' + { + /*%%%*/ + $$ = NEW_HASH($2); + /*% + $$ = dispatch1(hash, escape_Qundef($2)); + %*/ + } + | keyword_return + { + /*%%%*/ + $$ = NEW_RETURN(0); + /*% + $$ = dispatch0(return0); + %*/ + } + | keyword_yield '(' call_args rparen + { + /*%%%*/ + $$ = new_yield($3); + /*% + $$ = dispatch1(yield, dispatch1(paren, $3)); + %*/ + } + | keyword_yield '(' rparen + { + /*%%%*/ + $$ = NEW_YIELD(0, Qfalse); + /*% + $$ = dispatch1(yield, dispatch1(paren, arg_new())); + %*/ + } + | keyword_yield + { + /*%%%*/ + $$ = NEW_YIELD(0, Qfalse); + /*% + $$ = dispatch0(yield0); + %*/ + } + | keyword_defined opt_nl '(' {in_defined = 1;} expr rparen + { + /*%%%*/ + in_defined = 0; + $$ = NEW_DEFINED($5); + /*% + in_defined = 0; + $$ = dispatch1(defined, $5); + %*/ + } + | keyword_not '(' expr rparen + { + /*%%%*/ + $$ = call_uni_op(cond($3), '!'); + /*% + $$ = dispatch2(unary, ripper_intern("not"), $3); + %*/ + } + | keyword_not '(' rparen + { + /*%%%*/ + $$ = call_uni_op(cond(NEW_NIL()), '!'); + /*% + $$ = dispatch2(unary, ripper_intern("not"), Qnil); + %*/ + } + | operation brace_block + { + /*%%%*/ + $2->nd_iter = NEW_FCALL($1, 0); + $$ = $2; + fixpos($2->nd_iter, $2); + /*% + $$ = method_arg(dispatch1(fcall, $1), arg_new()); + $$ = method_add_block($$, $2); + %*/ + } + | method_call + | method_call brace_block + { + /*%%%*/ + block_dup_check($1->nd_args, $2); + $2->nd_iter = $1; + $$ = $2; + fixpos($$, $1); + /*% + $$ = method_add_block($1, $2); + %*/ + } + | tLAMBDA lambda + { + $$ = $2; + } + | k_if expr_value then + compstmt + if_tail + k_end + { + /*%%%*/ + $$ = NEW_IF(cond($2), $4, $5); + fixpos($$, $2); + /*% + $$ = dispatch3(if, $2, $4, escape_Qundef($5)); + %*/ + } + | k_unless expr_value then + compstmt + opt_else + k_end + { + /*%%%*/ + $$ = NEW_UNLESS(cond($2), $4, $5); + fixpos($$, $2); + /*% + $$ = dispatch3(unless, $2, $4, escape_Qundef($5)); + %*/ + } + | k_while {COND_PUSH(1);} expr_value do {COND_POP();} + compstmt + k_end + { + /*%%%*/ + $$ = NEW_WHILE(cond($3), $6, 1); + fixpos($$, $3); + /*% + $$ = dispatch2(while, $3, $6); + %*/ + } + | k_until {COND_PUSH(1);} expr_value do {COND_POP();} + compstmt + k_end + { + /*%%%*/ + $$ = NEW_UNTIL(cond($3), $6, 1); + fixpos($$, $3); + /*% + $$ = dispatch2(until, $3, $6); + %*/ + } + | k_case expr_value opt_terms + case_body + k_end + { + /*%%%*/ + $$ = NEW_CASE($2, $4); + fixpos($$, $2); + /*% + $$ = dispatch2(case, $2, $4); + %*/ + } + | k_case opt_terms case_body k_end + { + /*%%%*/ + $$ = NEW_CASE(0, $3); + /*% + $$ = dispatch2(case, Qnil, $3); + %*/ + } + | k_for for_var keyword_in + {COND_PUSH(1);} + expr_value do + {COND_POP();} + compstmt + k_end + { + /*%%%*/ + /* + * for a, b, c in e + * #=> + * e.each{|*x| a, b, c = x + * + * for a in e + * #=> + * e.each{|x| a, = x} + */ + ID id = internal_id(); + ID *tbl = ALLOC_N(ID, 2); + NODE *m = NEW_ARGS_AUX(0, 0); + NODE *args, *scope; + + if (nd_type($2) == NODE_MASGN) { + /* if args.length == 1 && args[0].kind_of?(Array) + * args = args[0] + * end + */ + NODE *one = NEW_LIST(NEW_LIT(INT2FIX(1))); + NODE *zero = NEW_LIST(NEW_LIT(INT2FIX(0))); + m->nd_next = block_append( + NEW_IF( + NEW_NODE(NODE_AND, + NEW_CALL(NEW_CALL(NEW_DVAR(id), rb_intern("length"), 0), + rb_intern("=="), one), + NEW_CALL(NEW_CALL(NEW_DVAR(id), rb_intern("[]"), zero), + rb_intern("kind_of?"), NEW_LIST(NEW_LIT(rb_cArray))), + 0), + NEW_DASGN_CURR(id, + NEW_CALL(NEW_DVAR(id), rb_intern("[]"), zero)), + 0), + node_assign($2, NEW_DVAR(id))); + + args = new_args(m, 0, id, 0, 0); + } + else { + if (nd_type($2) == NODE_LASGN || + nd_type($2) == NODE_DASGN || + nd_type($2) == NODE_DASGN_CURR) { + $2->nd_value = NEW_DVAR(id); + m->nd_plen = 1; + m->nd_next = $2; + args = new_args(m, 0, 0, 0, 0); + } + else { + m->nd_next = node_assign(NEW_MASGN(NEW_LIST($2), 0), NEW_DVAR(id)); + args = new_args(m, 0, id, 0, 0); + } + } + scope = NEW_NODE(NODE_SCOPE, tbl, $8, args); + tbl[0] = 1; tbl[1] = id; + $$ = NEW_FOR(0, $5, scope); + fixpos($$, $2); + /*% + $$ = dispatch3(for, $2, $5, $8); + %*/ + } + | k_class cpath superclass + { + if (in_def || in_single) + yyerror("class definition in method body"); + local_push(0); + /*%%%*/ + $$ = ruby_sourceline; + /*% + %*/ + } + bodystmt + k_end + { + /*%%%*/ + $$ = NEW_CLASS($2, $5, $3); + nd_set_line($$, $4); + /*% + $$ = dispatch3(class, $2, $3, $5); + %*/ + local_pop(); + } + | k_class tLSHFT expr + { + $$ = in_def; + in_def = 0; + } + term + { + $$ = in_single; + in_single = 0; + local_push(0); + } + bodystmt + k_end + { + /*%%%*/ + $$ = NEW_SCLASS($3, $7); + fixpos($$, $3); + /*% + $$ = dispatch2(sclass, $3, $7); + %*/ + local_pop(); + in_def = $4; + in_single = $6; + } + | k_module cpath + { + if (in_def || in_single) + yyerror("module definition in method body"); + local_push(0); + /*%%%*/ + $$ = ruby_sourceline; + /*% + %*/ + } + bodystmt + k_end + { + /*%%%*/ + $$ = NEW_MODULE($2, $4); + nd_set_line($$, $3); + /*% + $$ = dispatch2(module, $2, $4); + %*/ + local_pop(); + } + | k_def fname + { + $$ = cur_mid; + cur_mid = $2; + in_def++; + local_push(0); + } + f_arglist + bodystmt + k_end + { + /*%%%*/ + NODE *body = remove_begin($5); + reduce_nodes(&body); + $$ = NEW_DEFN($2, $4, body, NOEX_PRIVATE); + fixpos($$, $4); + /*% + $$ = dispatch3(def, $2, $4, $5); + %*/ + local_pop(); + in_def--; + cur_mid = $3; + } + | k_def singleton dot_or_colon {lex_state = EXPR_FNAME;} fname + { + in_single++; + lex_state = EXPR_END; /* force for args */ + local_push(0); + } + f_arglist + bodystmt + k_end + { + /*%%%*/ + NODE *body = remove_begin($8); + reduce_nodes(&body); + $$ = NEW_DEFS($2, $5, $7, body); + fixpos($$, $2); + /*% + $$ = dispatch5(defs, $2, $3, $5, $7, $8); + %*/ + local_pop(); + in_single--; + } + | keyword_break + { + /*%%%*/ + $$ = NEW_BREAK(0); + /*% + $$ = dispatch1(break, arg_new()); + %*/ + } + | keyword_next + { + /*%%%*/ + $$ = NEW_NEXT(0); + /*% + $$ = dispatch1(next, arg_new()); + %*/ + } + | keyword_redo + { + /*%%%*/ + $$ = NEW_REDO(); + /*% + $$ = dispatch0(redo); + %*/ + } + | keyword_retry + { + /*%%%*/ + $$ = NEW_RETRY(); + /*% + $$ = dispatch0(retry); + %*/ + } + ; + +primary_value : primary + { + /*%%%*/ + value_expr($1); + $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ + } + ; + +k_begin : keyword_begin + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "begin"); +#endif + } + +k_if : keyword_if + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "if"); +#endif + } + +k_unless : keyword_unless + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "unless"); +#endif + } + +k_while : keyword_while + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "while"); +#endif + } + +k_until : keyword_until + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "until"); +#endif + } + +k_case : keyword_case + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "case"); +#endif + } + +k_for : keyword_for + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "for"); +#endif + } + +k_class : keyword_class + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "class"); +#endif + } + +k_module : keyword_module + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "module"); +#endif + } + +k_def : keyword_def + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_push(parser, "def"); +#endif + } + +k_end : keyword_end + { +#ifndef RIPPER + if (RTEST(ruby_verbose)) token_info_pop(parser, "end"); /* POP */ +#endif + } + +then : term + /*%c%*/ + /*%c + { $$ = Qnil; } + %*/ + | keyword_then + | term keyword_then + /*%c%*/ + /*%c + { $$ = $2; } + %*/ + ; + +do : term + /*%c%*/ + /*%c + { $$ = Qnil; } + %*/ + | keyword_do_cond + ; + +if_tail : opt_else + | keyword_elsif expr_value then + compstmt + if_tail + { + /*%%%*/ + $$ = NEW_IF(cond($2), $4, $5); + fixpos($$, $2); + /*% + $$ = dispatch3(elsif, $2, $4, escape_Qundef($5)); + %*/ + } + ; + +opt_else : none + | keyword_else compstmt + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(else, $2); + %*/ + } + ; + +for_var : lhs + | mlhs + ; + +f_marg : f_norm_arg + { + $$ = assignable($1, 0); + /*%%%*/ + /*% + $$ = dispatch1(mlhs_paren, $$); + %*/ + } + | tLPAREN f_margs rparen + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +f_marg_list : f_marg + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = mlhs_add(mlhs_new(), $1); + %*/ + } + | f_marg_list ',' f_marg + { + /*%%%*/ + $$ = list_append($1, $3); + /*% + $$ = mlhs_add($1, $3); + %*/ + } + ; + +f_margs : f_marg_list + { + /*%%%*/ + $$ = NEW_MASGN($1, 0); + /*% + $$ = $1; + %*/ + } + | f_marg_list ',' tSTAR f_norm_arg + { + $$ = assignable($4, 0); + /*%%%*/ + $$ = NEW_MASGN($1, $$); + /*% + $$ = mlhs_add_star($1, $$); + %*/ + } + | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list + { + $$ = assignable($4, 0); + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG($$, $6)); + /*% + $$ = mlhs_add_star($1, $$); + %*/ + } + | f_marg_list ',' tSTAR + { + /*%%%*/ + $$ = NEW_MASGN($1, -1); + /*% + $$ = mlhs_add_star($1, Qnil); + %*/ + } + | f_marg_list ',' tSTAR ',' f_marg_list + { + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG(-1, $5)); + /*% + $$ = mlhs_add_star($1, $5); + %*/ + } + | tSTAR f_norm_arg + { + $$ = assignable($2, 0); + /*%%%*/ + $$ = NEW_MASGN(0, $$); + /*% + $$ = mlhs_add_star(mlhs_new(), $$); + %*/ + } + | tSTAR f_norm_arg ',' f_marg_list + { + $$ = assignable($2, 0); + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG($$, $4)); + /*% + #if 0 + TODO: Check me + #endif + $$ = mlhs_add_star($$, $4); + %*/ + } + | tSTAR + { + /*%%%*/ + $$ = NEW_MASGN(0, -1); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + | tSTAR ',' f_marg_list + { + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG(-1, $3)); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + ; + +block_param : f_arg ',' f_block_optarg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, 0, $6); + /*% + $$ = params_new($1, $3, $5, Qnil, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, $7, $8); + /*% + $$ = params_new($1, $3, $5, $7, escape_Qundef($8)); + %*/ + } + | f_arg ',' f_block_optarg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, 0, $4); + /*% + $$ = params_new($1, $3, Qnil, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' f_block_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, $5, $6); + /*% + $$ = params_new($1, $3, Qnil, $5, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, 0, $4); + /*% + $$ = params_new($1, Qnil, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' + { + /*%%%*/ + $$ = new_args($1, 0, 1, 0, 0); + /*% + $$ = params_new($1, Qnil, Qnil, Qnil, Qnil); + dispatch1(excessed_comma, $$); + %*/ + } + | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, $5, $6); + /*% + $$ = params_new($1, Qnil, $3, $5, escape_Qundef($6)); + %*/ + } + | f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, 0, 0, $2); + /*% + $$ = params_new($1, Qnil,Qnil, Qnil, escape_Qundef($2)); + %*/ + } + | f_block_optarg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, 0, $4); + /*% + $$ = params_new(Qnil, $1, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, $5, $6); + /*% + $$ = params_new(Qnil, $1, $3, $5, escape_Qundef($6)); + %*/ + } + | f_block_optarg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, 0, $2); + /*% + $$ = params_new(Qnil, $1, Qnil, Qnil,escape_Qundef($2)); + %*/ + } + | f_block_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, $3, $4); + /*% + $$ = params_new(Qnil, $1, Qnil, $3, escape_Qundef($4)); + %*/ + } + | f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, 0, $2); + /*% + $$ = params_new(Qnil, Qnil, $1, Qnil, escape_Qundef($2)); + %*/ + } + | f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, $3, $4); + /*% + $$ = params_new(Qnil, Qnil, $1, $3, escape_Qundef($4)); + %*/ + } + | f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, 0, 0, $1); + /*% + $$ = params_new(Qnil, Qnil, Qnil, Qnil, $1); + %*/ + } + ; + +opt_block_param : none + | block_param_def + { + command_start = Qtrue; + } + ; + +block_param_def : '|' opt_bv_decl '|' + { + /*%%%*/ + $$ = 0; + /*% + $$ = blockvar_new(params_new(Qnil,Qnil,Qnil,Qnil,Qnil), + escape_Qundef($2)); + %*/ + } + | tOROP + { + /*%%%*/ + $$ = 0; + /*% + $$ = blockvar_new(params_new(Qnil,Qnil,Qnil,Qnil,Qnil), + Qnil); + %*/ + } + | '|' block_param opt_bv_decl '|' + { + /*%%%*/ + $$ = $2; + /*% + $$ = blockvar_new(escape_Qundef($2), escape_Qundef($3)); + %*/ + } + ; + + +opt_bv_decl : none + | ';' bv_decls + { + /*%%%*/ + $$ = 0; + /*% + $$ = $2; + %*/ + } + ; + +bv_decls : bvar + /*%c%*/ + /*%c + { + $$ = rb_ary_new3(1, $1); + } + %*/ + | bv_decls ',' bvar + /*%c%*/ + /*%c + { + rb_ary_push($$, $3); + } + %*/ + ; + +bvar : tIDENTIFIER + { + new_bv(get_id($1)); + /*%%%*/ + /*% + $$ = get_value($1); + %*/ + } + | f_bad_arg + { + $$ = 0; + } + ; + +lambda : { + $$ = dyna_push(); + } + { + $$ = lpar_beg; + lpar_beg = ++paren_nest; + } + f_larglist + lambda_body + { + lpar_beg = $2; + /*%%%*/ + $$ = $3; + $$->nd_body = NEW_SCOPE($3->nd_head, $4); + /*% + $$ = dispatch2(lambda, $3, $4); + %*/ + dyna_pop($1); + } + ; + +f_larglist : '(' f_args opt_bv_decl rparen + { + /*%%%*/ + $$ = NEW_LAMBDA($2); + /*% + $$ = dispatch1(paren, $2); + %*/ + } + | f_args + { + /*%%%*/ + $$ = NEW_LAMBDA($1); + /*% + $$ = $1; + %*/ + } + ; + +lambda_body : tLAMBEG compstmt '}' + { + $$ = $2; + } + | keyword_do_LAMBDA compstmt keyword_end + { + $$ = $2; + } + ; + +do_block : keyword_do_block + { + $1 = dyna_push(); + /*%%%*/ + $$ = ruby_sourceline; + /*% %*/ + } + opt_block_param + compstmt + keyword_end + { + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $2); + /*% + $$ = dispatch2(do_block, escape_Qundef($3), $4); + %*/ + dyna_pop($1); + } + ; + +block_call : command do_block + { + /*%%%*/ + if (nd_type($1) == NODE_YIELD) { + compile_error(PARSER_ARG "block given to yield"); + } + else { + block_dup_check($1->nd_args, $2); + } + $2->nd_iter = $1; + $$ = $2; + fixpos($$, $1); + /*% + $$ = method_add_block($1, $2); + %*/ + } + | block_call '.' operation2 opt_paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_optarg($$, $4); + %*/ + } + | block_call tCOLON2 operation2 opt_paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), $3); + $$ = method_optarg($$, $4); + %*/ + } + ; + +method_call : operation paren_args + { + /*%%%*/ + $$ = NEW_FCALL($1, $2); + fixpos($$, $2); + /*% + $$ = method_arg(dispatch1(fcall, $1), $2); + %*/ + } + | primary_value '.' operation2 opt_paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_optarg($$, $4); + %*/ + } + | primary_value tCOLON2 operation2 paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_optarg($$, $4); + %*/ + } + | primary_value tCOLON2 operation3 + { + /*%%%*/ + $$ = NEW_CALL($1, $3, 0); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), $3); + %*/ + } + | primary_value '.' paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, rb_intern("call"), $3); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), + ripper_intern("call")); + $$ = method_optarg($$, $3); + %*/ + } + | primary_value tCOLON2 paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, rb_intern("call"), $3); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), + ripper_intern("call")); + $$ = method_optarg($$, $3); + %*/ + } + | keyword_super paren_args + { + /*%%%*/ + $$ = NEW_SUPER($2); + /*% + $$ = dispatch1(super, $2); + %*/ + } + | keyword_super + { + /*%%%*/ + $$ = NEW_ZSUPER(); + /*% + $$ = dispatch0(zsuper); + %*/ + } + | primary_value '[' opt_call_args rbracket + { + /*%%%*/ + if ($1 && nd_type($1) == NODE_SELF) + $$ = NEW_FCALL(tAREF, $3); + else + $$ = NEW_CALL($1, tAREF, $3); + fixpos($$, $1); + /*% + $$ = dispatch2(aref, $1, escape_Qundef($3)); + %*/ + } + ; + +brace_block : '{' + { + $1 = dyna_push(); + /*%%%*/ + $$ = ruby_sourceline; + /*% + %*/ + } + opt_block_param + compstmt '}' + { + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $2); + /*% + $$ = dispatch2(brace_block, escape_Qundef($3), $4); + %*/ + dyna_pop($1); + } + | keyword_do + { + $1 = dyna_push(); + /*%%%*/ + $$ = ruby_sourceline; + /*% + %*/ + } + opt_block_param + compstmt keyword_end + { + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $2); + /*% + $$ = dispatch2(do_block, escape_Qundef($3), $4); + %*/ + dyna_pop($1); + } + ; + +case_body : keyword_when args then + compstmt + cases + { + /*%%%*/ + $$ = NEW_WHEN($2, $4, $5); + /*% + $$ = dispatch3(when, $2, $4, escape_Qundef($5)); + %*/ + } + ; + +cases : opt_else + | case_body + ; + +opt_rescue : keyword_rescue exc_list exc_var then + compstmt + opt_rescue + { + /*%%%*/ + if ($3) { + $3 = node_assign($3, NEW_ERRINFO()); + $5 = block_append($3, $5); + } + $$ = NEW_RESBODY($2, $5, $6); + fixpos($$, $2?$2:$5); + /*% + $$ = dispatch4(rescue, + escape_Qundef($2), + escape_Qundef($3), + escape_Qundef($5), + escape_Qundef($6)); + %*/ + } + | none + ; + +exc_list : arg_value + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = rb_ary_new3(1, $1); + %*/ + } + | mrhs + { + /*%%%*/ + if (!($$ = splat_array($1))) $$ = $1; + /*% + $$ = $1; + %*/ + } + | none + ; + +exc_var : tASSOC lhs + { + $$ = $2; + } + | none + ; + +opt_ensure : keyword_ensure compstmt + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(ensure, $2); + %*/ + } + | none + ; + +literal : numeric + | symbol + { + /*%%%*/ + $$ = NEW_LIT(ID2SYM($1)); + /*% + $$ = dispatch1(symbol_literal, $1); + %*/ + } + | dsym + ; + +strings : string + { + /*%%%*/ + NODE *node = $1; + if (!node) { + node = NEW_STR(STR_NEW0()); + } + else { + node = evstr2dstr(node); + } + $$ = node; + /*% + $$ = $1; + %*/ + } + ; + +string : tCHAR + | string1 + | string string1 + { + /*%%%*/ + $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(string_concat, $1, $2); + %*/ + } + ; + +string1 : tSTRING_BEG string_contents tSTRING_END + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(string_literal, $2); + %*/ + } + ; + +xstring : tXSTRING_BEG xstring_contents tSTRING_END + { + /*%%%*/ + NODE *node = $2; + if (!node) { + node = NEW_XSTR(STR_NEW0()); + } + else { + switch (nd_type(node)) { + case NODE_STR: + nd_set_type(node, NODE_XSTR); + break; + case NODE_DSTR: + nd_set_type(node, NODE_DXSTR); + break; + default: + node = NEW_NODE(NODE_DXSTR, STR_NEW0(), 1, NEW_LIST(node)); + break; + } + } + $$ = node; + /*% + $$ = dispatch1(xstring_literal, $2); + %*/ + } + ; + +regexp : tREGEXP_BEG xstring_contents tREGEXP_END + { + /*%%%*/ + int options = $3; + NODE *node = $2; + NODE *list; + if (!node) { + node = NEW_LIT(reg_compile(STR_NEW0(), options)); + } + else switch (nd_type(node)) { + case NODE_STR: + { + VALUE src = node->nd_lit; + nd_set_type(node, NODE_LIT); + node->nd_lit = reg_compile(src, options); + } + break; + default: + node = NEW_NODE(NODE_DSTR, STR_NEW0(), 1, NEW_LIST(node)); + case NODE_DSTR: + if (options & RE_OPTION_ONCE) { + nd_set_type(node, NODE_DREGX_ONCE); + } + else { + nd_set_type(node, NODE_DREGX); + } + node->nd_cflag = options & RE_OPTION_MASK; + reg_fragment_check(node->nd_lit, options); + for (list = node->nd_next; list; list = list->nd_next) { + if (nd_type(list->nd_head) == NODE_STR) { + reg_fragment_check(list->nd_head->nd_lit, options); + } + } + break; + } + $$ = node; + /*% + $$ = dispatch2(regexp_literal, $2, $3); + %*/ + } + ; + +words : tWORDS_BEG ' ' tSTRING_END + { + /*%%%*/ + $$ = NEW_ZARRAY(); + /*% + $$ = dispatch0(words_new); + %*/ + } + | tWORDS_BEG word_list tSTRING_END + { + $$ = $2; + } + ; + +word_list : /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = dispatch0(words_new); + %*/ + } + | word_list word ' ' + { + /*%%%*/ + $$ = list_append($1, evstr2dstr($2)); + /*% + $$ = dispatch2(words_add, $1, $2); + %*/ + } + ; + +word : string_content + /*%c%*/ + /*%c + { + $$ = dispatch0(word_new); + $$ = dispatch2(word_add, $$, $1); + } + %*/ + | word string_content + { + /*%%%*/ + $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(word_add, $1, $2); + %*/ + } + ; + +qwords : tQWORDS_BEG ' ' tSTRING_END + { + /*%%%*/ + $$ = NEW_ZARRAY(); + /*% + $$ = dispatch0(qwords_new); + %*/ + } + | tQWORDS_BEG qword_list tSTRING_END + { + $$ = $2; + } + ; + +qword_list : /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = dispatch0(qwords_new); + %*/ + } + | qword_list tSTRING_CONTENT ' ' + { + /*%%%*/ + $$ = list_append($1, $2); + /*% + $$ = dispatch2(qwords_add, $1, $2); + %*/ + } + ; + +string_contents : /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = dispatch0(string_content); + %*/ + } + | string_contents string_content + { + /*%%%*/ + $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(string_add, $1, $2); + %*/ + } + ; + +xstring_contents: /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = dispatch0(xstring_new); + %*/ + } + | xstring_contents string_content + { + /*%%%*/ + $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(xstring_add, $1, $2); + %*/ + } + ; + +string_content : tSTRING_CONTENT + | tSTRING_DVAR + { + $$ = lex_strterm; + lex_strterm = 0; + lex_state = EXPR_BEG; + } + string_dvar + { + /*%%%*/ + lex_strterm = $2; + $$ = NEW_EVSTR($3); + /*% + lex_strterm = $2; + $$ = dispatch1(string_dvar, $3); + %*/ + } + | tSTRING_DBEG + { + $$ = lex_strterm; + lex_strterm = 0; + lex_state = EXPR_BEG; + COND_PUSH(0); + CMDARG_PUSH(0); + } + compstmt '}' + { + lex_strterm = $2; + COND_LEXPOP(); + CMDARG_LEXPOP(); + /*%%%*/ + if ($3) $3->flags &= ~NODE_FL_NEWLINE; + $$ = new_evstr($3); + /*% + $$ = dispatch1(string_embexpr, $3); + %*/ + } + ; + +string_dvar : tGVAR + { + /*%%%*/ + $$ = NEW_GVAR($1); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } + | tIVAR + { + /*%%%*/ + $$ = NEW_IVAR($1); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } + | tCVAR + { + /*%%%*/ + $$ = NEW_CVAR($1); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } + | backref + ; + +symbol : tSYMBEG sym + { + /*%%%*/ + lex_state = EXPR_END; + $$ = $2; + /*% + lex_state = EXPR_END; + $$ = dispatch1(symbol, $2); + %*/ + } + ; + +sym : fname + | tIVAR + | tGVAR + | tCVAR + ; + +dsym : tSYMBEG xstring_contents tSTRING_END + { + /*%%%*/ + lex_state = EXPR_END; + if (!($$ = $2)) { + $$ = NEW_LIT(ID2SYM(rb_intern(""))); + } + else { + VALUE lit; + + switch (nd_type($$)) { + case NODE_DSTR: + nd_set_type($$, NODE_DSYM); + break; + case NODE_STR: + lit = $$->nd_lit; + $$->nd_lit = ID2SYM(rb_intern_str(lit)); + nd_set_type($$, NODE_LIT); + break; + default: + $$ = NEW_NODE(NODE_DSYM, STR_NEW0(), 1, NEW_LIST($$)); + break; + } + } + /*% + lex_state = EXPR_END; + $$ = dispatch1(dyna_symbol, $2); + %*/ + } + ; + +numeric : tINTEGER + | tFLOAT + | tUMINUS_NUM tINTEGER %prec tLOWEST + { + /*%%%*/ + $$ = negate_lit($2); + /*% + $$ = dispatch2(unary, ripper_intern("-@"), $2); + %*/ + } + | tUMINUS_NUM tFLOAT %prec tLOWEST + { + /*%%%*/ + $$ = negate_lit($2); + /*% + $$ = dispatch2(unary, ripper_intern("-@"), $2); + %*/ + } + ; + +variable : tIDENTIFIER + | tIVAR + | tGVAR + | tCONSTANT + | tCVAR + | keyword_nil {ifndef_ripper($$ = keyword_nil);} + | keyword_self {ifndef_ripper($$ = keyword_self);} + | keyword_true {ifndef_ripper($$ = keyword_true);} + | keyword_false {ifndef_ripper($$ = keyword_false);} + | keyword__FILE__ {ifndef_ripper($$ = keyword__FILE__);} + | keyword__LINE__ {ifndef_ripper($$ = keyword__LINE__);} + | keyword__ENCODING__ {ifndef_ripper($$ = keyword__ENCODING__);} + ; + +var_ref : variable + { + /*%%%*/ + if (!($$ = gettable($1))) $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } + ; + +var_lhs : variable + { + $$ = assignable($1, 0); + /*%%%*/ + /*% + $$ = dispatch1(var_field, $$); + %*/ + } + ; + +backref : tNTH_REF + | tBACK_REF + ; + +superclass : term + { + /*%%%*/ + $$ = 0; + /*% + $$ = Qnil; + %*/ + } + | '<' + { + lex_state = EXPR_BEG; + } + expr_value term + { + $$ = $3; + } + | error term + { + /*%%%*/ + yyerrok; + $$ = 0; + /*% + yyerrok; + $$ = Qnil; + %*/ + } + ; + +f_arglist : '(' f_args rparen + { + /*%%%*/ + $$ = $2; + lex_state = EXPR_BEG; + command_start = Qtrue; + /*% + $$ = dispatch1(paren, $2); + lex_state = EXPR_BEG; + command_start = Qtrue; + %*/ + } + | f_args term + { + $$ = $1; + } + ; + +f_args : f_arg ',' f_optarg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, 0, $6); + /*% + $$ = params_new($1, $3, $5, Qnil, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, $7, $8); + /*% + $$ = params_new($1, $3, $5, $7, escape_Qundef($8)); + %*/ + } + | f_arg ',' f_optarg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, 0, $4); + /*% + $$ = params_new($1, $3, Qnil, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' f_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, $5, $6); + /*% + $$ = params_new($1, $3, Qnil, $5, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, 0, $4); + /*% + $$ = params_new($1, Qnil, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, $5, $6); + /*% + $$ = params_new($1, Qnil, $3, $5, escape_Qundef($6)); + %*/ + } + | f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, 0, 0, $2); + /*% + $$ = params_new($1, Qnil, Qnil, Qnil,escape_Qundef($2)); + %*/ + } + | f_optarg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, 0, $4); + /*% + $$ = params_new(Qnil, $1, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, $5, $6); + /*% + $$ = params_new(Qnil, $1, $3, $5, escape_Qundef($6)); + %*/ + } + | f_optarg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, 0, $2); + /*% + $$ = params_new(Qnil, $1, Qnil, Qnil,escape_Qundef($2)); + %*/ + } + | f_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, $3, $4); + /*% + $$ = params_new(Qnil, $1, Qnil, $3, escape_Qundef($4)); + %*/ + } + | f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, 0, $2); + /*% + $$ = params_new(Qnil, Qnil, $1, Qnil,escape_Qundef($2)); + %*/ + } + | f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, $3, $4); + /*% + $$ = params_new(Qnil, Qnil, $1, $3, escape_Qundef($4)); + %*/ + } + | f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, 0, 0, $1); + /*% + $$ = params_new(Qnil, Qnil, Qnil, Qnil, $1); + %*/ + } + | /* none */ + { + /*%%%*/ + $$ = new_args(0, 0, 0, 0, 0); + /*% + $$ = params_new(Qnil, Qnil, Qnil, Qnil, Qnil); + %*/ + } + ; + +f_bad_arg : tCONSTANT + { + /*%%%*/ + yyerror("formal argument cannot be a constant"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ + } + | tIVAR + { + /*%%%*/ + yyerror("formal argument cannot be an instance variable"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ + } + | tGVAR + { + /*%%%*/ + yyerror("formal argument cannot be a global variable"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ + } + | tCVAR + { + /*%%%*/ + yyerror("formal argument cannot be a class variable"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ + } + ; + +f_norm_arg : f_bad_arg + | tIDENTIFIER + { + formal_argument(get_id($1)); + $$ = $1; + } + ; + +f_arg_item : f_norm_arg + { + arg_var(get_id($1)); + /*%%%*/ + $$ = NEW_ARGS_AUX($1, 1); + /*% + $$ = get_value($1); + %*/ + } + | tLPAREN f_margs rparen + { + ID tid = internal_id(); + arg_var(tid); + /*%%%*/ + if (dyna_in_block()) { + $2->nd_value = NEW_DVAR(tid); + } + else { + $2->nd_value = NEW_LVAR(tid); + } + $$ = NEW_ARGS_AUX(tid, 1); + $$->nd_next = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +f_arg : f_arg_item + /*%c%*/ + /*%c + { + $$ = rb_ary_new3(1, $1); + } + c%*/ + | f_arg ',' f_arg_item + { + /*%%%*/ + $$ = $1; + $$->nd_plen++; + $$->nd_next = block_append($$->nd_next, $3->nd_next); + rb_gc_force_recycle((VALUE)$3); + /*% + $$ = rb_ary_push($1, $3); + %*/ + } + ; + +f_opt : tIDENTIFIER '=' arg_value + { + arg_var(formal_argument(get_id($1))); + $$ = assignable($1, $3); + /*%%%*/ + $$ = NEW_OPT_ARG(0, $$); + /*% + $$ = rb_assoc_new($$, $3); + %*/ + } + ; + +f_block_opt : tIDENTIFIER '=' primary_value + { + arg_var(formal_argument(get_id($1))); + $$ = assignable($1, $3); + /*%%%*/ + $$ = NEW_OPT_ARG(0, $$); + /*% + $$ = rb_assoc_new($$, $3); + %*/ + } + ; + +f_block_optarg : f_block_opt + { + /*%%%*/ + $$ = $1; + /*% + $$ = rb_ary_new3(1, $1); + %*/ + } + | f_block_optarg ',' f_block_opt + { + /*%%%*/ + NODE *opts = $1; + + while (opts->nd_next) { + opts = opts->nd_next; + } + opts->nd_next = $3; + $$ = $1; + /*% + $$ = rb_ary_push($1, $3); + %*/ + } + ; + +f_optarg : f_opt + { + /*%%%*/ + $$ = $1; + /*% + $$ = rb_ary_new3(1, $1); + %*/ + } + | f_optarg ',' f_opt + { + /*%%%*/ + NODE *opts = $1; + + while (opts->nd_next) { + opts = opts->nd_next; + } + opts->nd_next = $3; + $$ = $1; + /*% + $$ = rb_ary_push($1, $3); + %*/ + } + ; + +restarg_mark : '*' + | tSTAR + ; + +f_rest_arg : restarg_mark tIDENTIFIER + { + /*%%%*/ + if (!is_local_id($2)) + yyerror("rest argument must be local variable"); + /*% %*/ + arg_var(shadowing_lvar(get_id($2))); + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(rest_param, $2); + %*/ + } + | restarg_mark + { + /*%%%*/ + $$ = internal_id(); + arg_var($$); + /*% + $$ = dispatch1(rest_param, Qnil); + %*/ + } + ; + +blkarg_mark : '&' + | tAMPER + ; + +f_block_arg : blkarg_mark tIDENTIFIER + { + /*%%%*/ + if (!is_local_id($2)) + yyerror("block argument must be local variable"); + else if (!dyna_in_block() && local_id($2)) + yyerror("duplicated block argument name"); + /*% %*/ + arg_var(shadowing_lvar(get_id($2))); + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(blockarg, $2); + %*/ + } + ; + +opt_f_block_arg : ',' f_block_arg + { + $$ = $2; + } + | none + { + /*%%%*/ + $$ = 0; + /*% + $$ = Qundef; + %*/ + } + ; + +singleton : var_ref + { + /*%%%*/ + value_expr($1); + $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ + } + | '(' {lex_state = EXPR_BEG;} expr rparen + { + /*%%%*/ + if ($3 == 0) { + yyerror("can't define singleton method for ()."); + } + else { + switch (nd_type($3)) { + case NODE_STR: + case NODE_DSTR: + case NODE_XSTR: + case NODE_DXSTR: + case NODE_DREGX: + case NODE_LIT: + case NODE_ARRAY: + case NODE_ZARRAY: + yyerror("can't define singleton method for literals"); + default: + value_expr($3); + break; + } + } + $$ = $3; + /*% + $$ = dispatch1(paren, $3); + %*/ + } + ; + +assoc_list : none + | assocs trailer + { + /*%%%*/ + $$ = $1; + /*% + $$ = dispatch1(assoclist_from_args, $1); + %*/ + } + ; + +assocs : assoc + /*%c%*/ + /*%c + { + $$ = rb_ary_new3(1, $1); + } + %*/ + | assocs ',' assoc + { + /*%%%*/ + $$ = list_concat($1, $3); + /*% + $$ = rb_ary_push($1, $3); + %*/ + } + ; + +assoc : arg_value tASSOC arg_value + { + /*%%%*/ + $$ = list_append(NEW_LIST($1), $3); + /*% + $$ = dispatch2(assoc_new, $1, $3); + %*/ + } + | tLABEL arg_value + { + /*%%%*/ + $$ = list_append(NEW_LIST(NEW_LIT(ID2SYM($1))), $2); + /*% + $$ = dispatch2(assoc_new, $1, $2); + %*/ + } + ; + +operation : tIDENTIFIER + | tCONSTANT + | tFID + ; + +operation2 : tIDENTIFIER + | tCONSTANT + | tFID + | op + ; + +operation3 : tIDENTIFIER + | tFID + | op + ; + +dot_or_colon : '.' + /*%c%*/ + /*%c + { $$ = $1; } + %*/ + | tCOLON2 + /*%c%*/ + /*%c + { $$ = $1; } + %*/ + ; + +opt_terms : /* none */ + | terms + ; + +opt_nl : /* none */ + | '\n' + ; + +rparen : opt_nl ')' + ; + +rbracket : opt_nl ']' + ; + +trailer : /* none */ + | '\n' + | ',' + ; + +term : ';' {yyerrok;} + | '\n' + ; + +terms : term + | terms ';' {yyerrok;} + ; + +none : /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = Qundef; + %*/ + } + ; +%% +# undef parser +# undef yylex +# undef yylval +# define yylval (*((YYSTYPE*)(parser->parser_yylval))) + +static int parser_regx_options(struct parser_params*); +static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); +static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc); +static int parser_parse_string(struct parser_params*,NODE*); +static int parser_here_document(struct parser_params*,NODE*); + + +# define nextc() parser_nextc(parser) +# define pushback(c) parser_pushback(parser, c) +# define newtok() parser_newtok(parser) +# define tokspace(n) parser_tokspace(parser, n) +# define tokadd(c) parser_tokadd(parser, c) +# define tok_hex(numlen) parser_tok_hex(parser, numlen) +# define read_escape(flags,e) parser_read_escape(parser, flags, e) +# define tokadd_escape(e) parser_tokadd_escape(parser, e) +# define regx_options() parser_regx_options(parser) +# define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,f,t,p,n,e) +# define parse_string(n) parser_parse_string(parser,n) +# define tokaddmbc(c, enc) parser_tokaddmbc(parser, c, enc) +# define here_document(n) parser_here_document(parser,n) +# define heredoc_identifier() parser_heredoc_identifier(parser) +# define heredoc_restore(n) parser_heredoc_restore(parser,n) +# define whole_match_p(e,l,i) parser_whole_match_p(parser,e,l,i) + +#ifndef RIPPER +# define set_yylval_str(x) yylval.node = NEW_STR(x) +# define set_yylval_num(x) yylval.num = x +# define set_yylval_id(x) yylval.id = x +# define set_yylval_name(x) yylval.id = x +# define set_yylval_literal(x) yylval.node = NEW_LIT(x) +# define set_yylval_node(x) yylval.node = x +# define yylval_id() yylval.id +#else +static inline VALUE +ripper_yylval_id(ID x) +{ + return (VALUE)NEW_LASGN(x, ID2SYM(x)); +} +# define set_yylval_str(x) (void)(x) +# define set_yylval_num(x) (void)(x) +# define set_yylval_id(x) (void)(x) +# define set_yylval_name(x) (void)(yylval.val = ripper_yylval_id(x)) +# define set_yylval_literal(x) (void)(x) +# define set_yylval_node(x) (void)(x) +# define yylval_id() yylval.id +#endif + +#ifndef RIPPER +#define ripper_flush(p) (void)(p) +#else +#define ripper_flush(p) (p->tokp = p->parser_lex_p) + +#define yylval_rval *(TYPE(yylval.val) == T_NODE ? &yylval.node->nd_rval : &yylval.val) + +static int +ripper_has_scan_event(struct parser_params *parser) +{ + + if (lex_p < parser->tokp) rb_raise(rb_eRuntimeError, "lex_p < tokp"); + return lex_p > parser->tokp; +} + +static VALUE +ripper_scan_event_val(struct parser_params *parser, int t) +{ + VALUE str = STR_NEW(parser->tokp, lex_p - parser->tokp); + VALUE rval = ripper_dispatch1(parser, ripper_token2eventid(t), str); + ripper_flush(parser); + return rval; +} + +static void +ripper_dispatch_scan_event(struct parser_params *parser, int t) +{ + if (!ripper_has_scan_event(parser)) return; + yylval_rval = ripper_scan_event_val(parser, t); +} + +static void +ripper_dispatch_ignored_scan_event(struct parser_params *parser, int t) +{ + if (!ripper_has_scan_event(parser)) return; + (void)ripper_scan_event_val(parser, t); +} + +static void +ripper_dispatch_delayed_token(struct parser_params *parser, int t) +{ + int saved_line = ruby_sourceline; + const char *saved_tokp = parser->tokp; + + ruby_sourceline = parser->delayed_line; + parser->tokp = lex_pbeg + parser->delayed_col; + yylval_rval = ripper_dispatch1(parser, ripper_token2eventid(t), parser->delayed); + parser->delayed = Qnil; + ruby_sourceline = saved_line; + parser->tokp = saved_tokp; +} +#endif /* RIPPER */ + +#include "ruby/regex.h" +#include "ruby/util.h" + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) +#endif + +#define parser_encoding_name() (parser->enc->name) +#define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) +#define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc) +#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) +#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) + +#define parser_isascii() ISASCII(*(lex_p-1)) + +#ifndef RIPPER +static int +token_info_get_column(struct parser_params *parser, const char *token) +{ + int column = 1; + const char *p, *pend = lex_p - strlen(token); + for (p = lex_pbeg; p < pend; p++) { + if (*p == '\t') { + column = (((column - 1) / 8) + 1) * 8; + } + column++; + } + return column; +} + +static int +token_info_has_nonspaces(struct parser_params *parser, const char *token) +{ + const char *p, *pend = lex_p - strlen(token); + for (p = lex_pbeg; p < pend; p++) { + if (*p != ' ' && *p != '\t') { + return 1; + } + } + return 0; +} + +static void +token_info_push(struct parser_params *parser, const char *token) +{ + token_info *ptinfo; + + if (compile_for_eval) return; + ptinfo = ALLOC(token_info); + ptinfo->token = token; + ptinfo->linenum = ruby_sourceline; + ptinfo->column = token_info_get_column(parser, token); + ptinfo->nonspc = token_info_has_nonspaces(parser, token); + ptinfo->next = parser->parser_token_info; + + parser->parser_token_info = ptinfo; +} + +static void +token_info_pop(struct parser_params *parser, const char *token) +{ + int linenum; + token_info *ptinfo = parser->parser_token_info; + + if (!ptinfo) return; + parser->parser_token_info = ptinfo->next; + if (token_info_get_column(parser, token) == ptinfo->column) { /* OK */ + goto finish; + } + linenum = ruby_sourceline; + if (linenum == ptinfo->linenum) { /* SKIP */ + goto finish; + } + if (token_info_has_nonspaces(parser, token) || ptinfo->nonspc) { /* SKIP */ + goto finish; + } + rb_compile_warning(ruby_sourcefile, linenum, + "mismatched indentations at '%s' with '%s' at %d", + token, ptinfo->token, ptinfo->linenum); + + finish: + xfree(ptinfo); +} +#endif /* RIPPER */ + +static int +parser_yyerror(struct parser_params *parser, const char *msg) +{ +#ifndef RIPPER + const int max_line_margin = 30; + const char *p, *pe; + char *buf; + int len, i; + + compile_error(PARSER_ARG "%s", msg); + p = lex_p; + while (lex_pbeg <= p) { + if (*p == '\n') break; + p--; + } + p++; + + pe = lex_p; + while (pe < lex_pend) { + if (*pe == '\n') break; + pe++; + } + + len = pe - p; + if (len > 4) { + char *p2; + const char *pre = "", *post = ""; + + if (len > max_line_margin * 2 + 10) { + if (lex_p - p > max_line_margin) { + p = rb_enc_prev_char(p, lex_p - max_line_margin, pe, rb_enc_get(lex_lastline)); + pre = "..."; + } + if (pe - lex_p > max_line_margin) { + pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, pe, rb_enc_get(lex_lastline)); + post = "..."; + } + len = pe - p; + } + buf = ALLOCA_N(char, len+2); + MEMCPY(buf, p, char, len); + buf[len] = '\0'; + rb_compile_error_append("%s%s%s", pre, buf, post); + + i = lex_p - p; + p2 = buf; pe = buf + len; + + while (p2 < pe) { + if (*p2 != '\t') *p2 = ' '; + p2++; + } + buf[i] = '^'; + buf[i+1] = '\0'; + rb_compile_error_append("%s%s", pre, buf); + } +#else + dispatch1(parse_error, STR_NEW2(msg)); +#endif /* !RIPPER */ + return 0; +} + +static void parser_prepare(struct parser_params *parser); + +#ifndef RIPPER +VALUE ruby_suppress_tracing(VALUE (*func)(VALUE, int), VALUE arg, int always); + +static VALUE +debug_lines(const char *f) +{ + if (rb_const_defined_at(rb_cObject, rb_intern("SCRIPT_LINES__"))) { + VALUE hash = rb_const_get_at(rb_cObject, rb_intern("SCRIPT_LINES__")); + if (TYPE(hash) == T_HASH) { + VALUE fname = rb_str_new2(f); + VALUE lines = rb_ary_new(); + rb_hash_aset(hash, fname, lines); + return lines; + } + } + return 0; +} + +static VALUE +coverage(const char *f, int n) +{ + extern VALUE rb_get_coverages(void); + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages) && RBASIC(coverages)->klass == 0) { + VALUE fname = rb_str_new2(f); + VALUE lines = rb_ary_new2(n); + int i; + RBASIC(lines)->klass = 0; + for (i = 0; i < n; i++) RARRAY_PTR(lines)[i] = Qnil; + RARRAY(lines)->as.heap.len = n; + rb_hash_aset(coverages, fname, lines); + return lines; + } + return 0; +} + +static int +e_option_supplied(struct parser_params *parser) +{ + if (strcmp(ruby_sourcefile, "-e") == 0) + return Qtrue; + return Qfalse; +} + +static VALUE +yycompile0(VALUE arg, int tracing) +{ + int n; + NODE *tree; + struct parser_params *parser = (struct parser_params *)arg; + + if (!compile_for_eval && rb_safe_level() == 0) { + ruby_debug_lines = debug_lines(ruby_sourcefile); + if (ruby_debug_lines && ruby_sourceline > 0) { + VALUE str = STR_NEW0(); + n = ruby_sourceline; + do { + rb_ary_push(ruby_debug_lines, str); + } while (--n); + } + + if (!e_option_supplied(parser)) { + ruby_coverage = coverage(ruby_sourcefile, ruby_sourceline); + } + } + + parser_prepare(parser); + deferred_nodes = 0; + n = yyparse((void*)parser); + ruby_debug_lines = 0; + ruby_coverage = 0; + compile_for_eval = 0; + + lex_strterm = 0; + lex_p = lex_pbeg = lex_pend = 0; + lex_lastline = lex_nextline = 0; + if (parser->nerr) { + return 0; + } + tree = ruby_eval_tree; + if (!tree) { + tree = NEW_NIL(); + } + if (ruby_eval_tree_begin) { + NODE *scope = ruby_eval_tree; + + if (scope) { + scope->nd_body = NEW_PRELUDE(ruby_eval_tree_begin, scope->nd_body); + } + tree = scope; + } + else { + tree = ruby_eval_tree; + } + return (VALUE)tree; +} + +static NODE* +yycompile(struct parser_params *parser, const char *f, int line) +{ + ruby_sourcefile = ruby_strdup(f); + ruby_sourceline = line - 1; + return (NODE *)ruby_suppress_tracing(yycompile0, (VALUE)parser, Qtrue); +} +#endif /* !RIPPER */ + +static rb_encoding * +must_be_ascii_compatible(VALUE s) +{ + rb_encoding *enc = rb_enc_get(s); + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eArgError, "invalid source encoding"); + } + return enc; +} + +static VALUE +lex_get_str(struct parser_params *parser, VALUE s) +{ + char *beg, *end, *pend; + rb_encoding *enc = must_be_ascii_compatible(s); + + beg = RSTRING_PTR(s); + if (lex_gets_ptr) { + if (RSTRING_LEN(s) == lex_gets_ptr) return Qnil; + beg += lex_gets_ptr; + } + pend = RSTRING_PTR(s) + RSTRING_LEN(s); + end = beg; + while (end < pend) { + if (*end++ == '\n') break; + } + lex_gets_ptr = end - RSTRING_PTR(s); + return rb_enc_str_new(beg, end - beg, enc); +} + +static VALUE +lex_getline(struct parser_params *parser) +{ + VALUE line = (*parser->parser_lex_gets)(parser, parser->parser_lex_input); + if (NIL_P(line)) return line; + must_be_ascii_compatible(line); +#ifndef RIPPER + if (ruby_debug_lines) { + rb_ary_push(ruby_debug_lines, line); + } + if (ruby_coverage) { + rb_ary_push(ruby_coverage, Qnil); + } +#endif + return line; +} + +#ifndef RIPPER +static NODE* +parser_compile_string(volatile VALUE vparser, const char *f, VALUE s, int line) +{ + struct parser_params *parser; + NODE *node; + volatile VALUE tmp; + + Data_Get_Struct(vparser, struct parser_params, parser); + lex_gets = lex_get_str; + lex_gets_ptr = 0; + lex_input = s; + lex_pbeg = lex_p = lex_pend = 0; + compile_for_eval = rb_parse_in_eval(); + + node = yycompile(parser, f, line); + tmp = vparser; /* prohibit tail call optimization */ + + return node; +} + +NODE* +rb_compile_string(const char *f, VALUE s, int line) +{ + must_be_ascii_compatible(s); + return parser_compile_string(rb_parser_new(), f, s, line); +} + +NODE* +rb_parser_compile_string(volatile VALUE vparser, const char *f, VALUE s, int line) +{ + must_be_ascii_compatible(s); + return parser_compile_string(vparser, f, s, line); +} + +NODE* +rb_compile_cstr(const char *f, const char *s, int len, int line) +{ + VALUE str = rb_str_new(s, len); + return parser_compile_string(rb_parser_new(), f, str, line); +} + +NODE* +rb_parser_compile_cstr(volatile VALUE vparser, const char *f, const char *s, int len, int line) +{ + VALUE str = rb_str_new(s, len); + return parser_compile_string(vparser, f, str, line); +} + +static VALUE +lex_io_gets(struct parser_params *parser, VALUE io) +{ + return rb_io_gets(io); +} + +NODE* +rb_compile_file(const char *f, VALUE file, int start) +{ + VALUE volatile vparser = rb_parser_new(); + + return rb_parser_compile_file(vparser, f, file, start); +} + +NODE* +rb_parser_compile_file(volatile VALUE vparser, const char *f, VALUE file, int start) +{ + struct parser_params *parser; + volatile VALUE tmp; + NODE *node; + + Data_Get_Struct(vparser, struct parser_params, parser); + lex_gets = lex_io_gets; + lex_input = file; + lex_pbeg = lex_p = lex_pend = 0; + compile_for_eval = rb_parse_in_eval(); + + node = yycompile(parser, f, start); + tmp = vparser; /* prohibit tail call optimization */ + + return node; +} +#endif /* !RIPPER */ + +#define STR_FUNC_ESCAPE 0x01 +#define STR_FUNC_EXPAND 0x02 +#define STR_FUNC_REGEXP 0x04 +#define STR_FUNC_QWORDS 0x08 +#define STR_FUNC_SYMBOL 0x10 +#define STR_FUNC_INDENT 0x20 + +enum string_type { + str_squote = (0), + str_dquote = (STR_FUNC_EXPAND), + str_xquote = (STR_FUNC_EXPAND), + str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), + str_sword = (STR_FUNC_QWORDS), + str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), + str_ssym = (STR_FUNC_SYMBOL), + str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND) +}; + +static VALUE +parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0) +{ + VALUE str; + + str = rb_enc_str_new(p, n, enc); + if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) { + if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { + } + else if (enc0 == rb_usascii_encoding() && enc != rb_utf8_encoding()) { + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + } + + return str; +} + +#define lex_goto_eol(parser) (parser->parser_lex_p = parser->parser_lex_pend) +#define peek(c) (lex_p < lex_pend && (c) == *lex_p) + +static inline int +parser_nextc(struct parser_params *parser) +{ + int c; + + if (lex_p == lex_pend) { + VALUE v = lex_nextline; + lex_nextline = 0; + if (!v) { + if (parser->eofp) + return -1; + + if (!lex_input || NIL_P(v = lex_getline(parser))) { + parser->eofp = Qtrue; + lex_goto_eol(parser); + return -1; + } + } + { +#ifdef RIPPER + if (parser->tokp < lex_pend) { + if (NIL_P(parser->delayed)) { + parser->delayed = rb_str_buf_new(1024); + rb_str_buf_cat(parser->delayed, + parser->tokp, lex_pend - parser->tokp); + parser->delayed_line = ruby_sourceline; + parser->delayed_col = parser->tokp - lex_pbeg; + } + else { + rb_str_buf_cat(parser->delayed, + parser->tokp, lex_pend - parser->tokp); + } + } +#endif + if (heredoc_end > 0) { + ruby_sourceline = heredoc_end; + heredoc_end = 0; + } + ruby_sourceline++; + parser->line_count++; + lex_pbeg = lex_p = RSTRING_PTR(v); + lex_pend = lex_p + RSTRING_LEN(v); + ripper_flush(parser); + lex_lastline = v; + } + } + c = (unsigned char)*lex_p++; + if (c == '\r' && peek('\n')) { + lex_p++; + c = '\n'; + } + + return c; +} + +static void +parser_pushback(struct parser_params *parser, int c) +{ + if (c == -1) return; + lex_p--; + if (lex_p > lex_pbeg && lex_p[0] == '\n' && lex_p[-1] == '\r') { + lex_p--; + } +} + +#define was_bol() (lex_p == lex_pbeg + 1) + +#define tokfix() (tokenbuf[tokidx]='\0') +#define tok() tokenbuf +#define toklen() tokidx +#define toklast() (tokidx>0?tokenbuf[tokidx-1]:0) + +static char* +parser_newtok(struct parser_params *parser) +{ + tokidx = 0; + if (!tokenbuf) { + toksiz = 60; + tokenbuf = ALLOC_N(char, 60); + } + if (toksiz > 4096) { + toksiz = 60; + REALLOC_N(tokenbuf, char, 60); + } + return tokenbuf; +} + +static char * +parser_tokspace(struct parser_params *parser, int n) +{ + tokidx += n; + + if (tokidx >= toksiz) { + do {toksiz *= 2;} while (toksiz < tokidx); + REALLOC_N(tokenbuf, char, toksiz); + } + return &tokenbuf[tokidx-n]; +} + +static void +parser_tokadd(struct parser_params *parser, int c) +{ + tokenbuf[tokidx++] = (char)c; + if (tokidx >= toksiz) { + toksiz *= 2; + REALLOC_N(tokenbuf, char, toksiz); + } +} + +static int +parser_tok_hex(struct parser_params *parser, int *numlen) +{ + int c; + + c = scan_hex(lex_p, 2, numlen); + if (!*numlen) { + yyerror("invalid hex escape"); + return 0; + } + lex_p += *numlen; + return c; +} + +#define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n)) + +static int +parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp, + int string_literal, int symbol_literal, int regexp_literal) +{ + /* + * If string_literal is true, then we allow multiple codepoints + * in \u{}, and add the codepoints to the current token. + * Otherwise we're parsing a character literal and return a single + * codepoint without adding it + */ + + int codepoint; + int numlen; + + if (regexp_literal) { tokadd('\\'); tokadd('u'); } + + if (peek('{')) { /* handle \u{...} form */ + do { + if (regexp_literal) { tokadd(*lex_p); } + nextc(); + codepoint = scan_hex(lex_p, 6, &numlen); + if (numlen == 0) { + yyerror("invalid Unicode escape"); + return 0; + } + if (codepoint > 0x10ffff) { + yyerror("invalid Unicode codepoint (too large)"); + return 0; + } + lex_p += numlen; + if (regexp_literal) { + tokcopy(numlen); + } + else if (codepoint >= 0x80) { + *encp = UTF8_ENC(); + if (string_literal) tokaddmbc(codepoint, *encp); + } + else if (string_literal) { + if (codepoint == 0 && symbol_literal) { + yyerror("symbol cannot contain '\\u{0}'"); + return 0; + } + + tokadd(codepoint); + } + } while (string_literal && (peek(' ') || peek('\t'))); + + if (!peek('}')) { + yyerror("unterminated Unicode escape"); + return 0; + } + + if (regexp_literal) { tokadd('}'); } + nextc(); + } + else { /* handle \uxxxx form */ + codepoint = scan_hex(lex_p, 4, &numlen); + if (numlen < 4) { + yyerror("invalid Unicode escape"); + return 0; + } + lex_p += 4; + if (regexp_literal) { + tokcopy(4); + } + else if (codepoint >= 0x80) { + *encp = UTF8_ENC(); + if (string_literal) tokaddmbc(codepoint, *encp); + } + else if (string_literal) { + if (codepoint == 0 && symbol_literal) { + yyerror("symbol cannot contain '\\u0000'"); + return 0; + } + + tokadd(codepoint); + } + } + + return codepoint; +} + +#define ESCAPE_CONTROL 1 +#define ESCAPE_META 2 + +static int +parser_read_escape(struct parser_params *parser, int flags, + rb_encoding **encp) +{ + int c; + int numlen; + + switch (c = nextc()) { + case '\\': /* Backslash */ + return c; + + case 'n': /* newline */ + return '\n'; + + case 't': /* horizontal tab */ + return '\t'; + + case 'r': /* carriage-return */ + return '\r'; + + case 'f': /* form-feed */ + return '\f'; + + case 'v': /* vertical tab */ + return '\13'; + + case 'a': /* alarm(bell) */ + return '\007'; + + case 'e': /* escape */ + return 033; + + case '0': case '1': case '2': case '3': /* octal constant */ + case '4': case '5': case '6': case '7': + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; + { + int numlen; + + pushback(c); + c = scan_oct(lex_p, 3, &numlen); + lex_p += numlen; + } + return c; + + case 'x': /* hex constant */ + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; + c = tok_hex(&numlen); + if (numlen == 0) return 0; + return c; + + case 'b': /* backspace */ + return '\010'; + + case 's': /* space */ + return ' '; + + case 'M': + if (flags & ESCAPE_META) goto eof; + if ((c = nextc()) != '-') { + pushback(c); + goto eof; + } + if ((c = nextc()) == '\\') { + return read_escape(flags|ESCAPE_META, encp) | 0x80; + } + else if (c == -1 || !ISASCII(c)) goto eof; + else { + return ((c & 0xff) | 0x80); + } + + case 'C': + if ((c = nextc()) != '-') { + pushback(c); + goto eof; + } + case 'c': + if (flags & ESCAPE_CONTROL) goto eof; + if ((c = nextc())== '\\') { + c = read_escape(flags|ESCAPE_CONTROL, encp); + } + else if (c == '?') + return 0177; + else if (c == -1 || !ISASCII(c)) goto eof; + return c & 0x9f; + + eof: + case -1: + yyerror("Invalid escape character syntax"); + return '\0'; + + default: + return c; + } +} + +static void +parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc) +{ + int len = rb_enc_codelen(c, enc); + rb_enc_mbcput(c, tokspace(len), enc); +} + +static int +parser_tokadd_escape(struct parser_params *parser, rb_encoding **encp) +{ + int c; + int flags = 0; + + first: + switch (c = nextc()) { + case '\n': + return 0; /* just ignore */ + + case '0': case '1': case '2': case '3': /* octal constant */ + case '4': case '5': case '6': case '7': + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; + { + int numlen; + int oct; + + oct = scan_oct(--lex_p, 3, &numlen); + if (numlen == 0) goto eof; + lex_p += numlen; + tokcopy(numlen + 1); + } + return 0; + + case 'x': /* hex constant */ + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; + { + int numlen; + int hex; + + hex = tok_hex(&numlen); + if (numlen == 0) goto eof; + tokcopy(numlen + 2); + } + return 0; + + case 'M': + if (flags & ESCAPE_META) goto eof; + if ((c = nextc()) != '-') { + pushback(c); + goto eof; + } + tokcopy(3); + flags |= ESCAPE_META; + goto escaped; + + case 'C': + if (flags & ESCAPE_CONTROL) goto eof; + if ((c = nextc()) != '-') { + pushback(c); + goto eof; + } + tokcopy(3); + goto escaped; + + case 'c': + if (flags & ESCAPE_CONTROL) goto eof; + tokcopy(2); + flags |= ESCAPE_CONTROL; + escaped: + if ((c = nextc()) == '\\') { + goto first; + } + else if (c == -1) goto eof; + tokadd(c); + return 0; + + eof: + case -1: + yyerror("Invalid escape character syntax"); + return -1; + + default: + tokadd('\\'); + tokadd(c); + } + return 0; +} + +extern int rb_char_to_option_kcode(int c, int *option, int *kcode); + +static int +parser_regx_options(struct parser_params *parser) +{ + int kcode = 0; + int options = 0; + int c, opt, kc; + + newtok(); + while (c = nextc(), ISALPHA(c)) { + if (c == 'o') { + options |= RE_OPTION_ONCE; + } + else if (rb_char_to_option_kcode(c, &opt, &kc)) { + options |= opt; + if (kc >= 0) kcode = c; + } + else { + tokadd(c); + } + } + pushback(c); + if (toklen()) { + tokfix(); + compile_error(PARSER_ARG "unknown regexp option%s - %s", + toklen() > 1 ? "s" : "", tok()); + } + return options | RE_OPTION_ENCODING(kcode); +} + +static void +dispose_string(VALUE str) +{ + /* TODO: should use another API? */ + if (RBASIC(str)->flags & RSTRING_NOEMBED) + xfree(RSTRING_PTR(str)); + rb_gc_force_recycle(str); +} + +static int +parser_tokadd_mbchar(struct parser_params *parser, int c) +{ + int len = parser_precise_mbclen(); + if (!MBCLEN_CHARFOUND_P(len)) { + compile_error(PARSER_ARG "invalid multibyte char (%s)", parser_encoding_name()); + return -1; + } + tokadd(c); + lex_p += --len; + if (len > 0) tokcopy(len); + return c; +} + +#define tokadd_mbchar(c) parser_tokadd_mbchar(parser, c) + +static int +parser_tokadd_string(struct parser_params *parser, + int func, int term, int paren, long *nest, + rb_encoding **encp) +{ + int c; + int has_nonascii = 0; + rb_encoding *enc = *encp; + char *errbuf = 0; + static const char mixed_msg[] = "%s mixed within %s source"; + +#define mixed_error(enc1, enc2) if (!errbuf) { \ + int len = sizeof(mixed_msg) - 4; \ + len += strlen(rb_enc_name(enc1)); \ + len += strlen(rb_enc_name(enc2)); \ + errbuf = ALLOCA_N(char, len); \ + snprintf(errbuf, len, mixed_msg, \ + rb_enc_name(enc1), \ + rb_enc_name(enc2)); \ + yyerror(errbuf); \ + } +#define mixed_escape(beg, enc1, enc2) do { \ + const char *pos = lex_p; \ + lex_p = beg; \ + mixed_error(enc1, enc2); \ + lex_p = pos; \ + } while (0) + + while ((c = nextc()) != -1) { + if (paren && c == paren) { + ++*nest; + } + else if (c == term) { + if (!nest || !*nest) { + pushback(c); + break; + } + --*nest; + } + else if ((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) { + int c2 = *lex_p; + if (c2 == '$' || c2 == '@' || c2 == '{') { + pushback(c); + break; + } + } + else if (c == '\\') { + const char *beg = lex_p - 1; + c = nextc(); + switch (c) { + case '\n': + if (func & STR_FUNC_QWORDS) break; + if (func & STR_FUNC_EXPAND) continue; + tokadd('\\'); + break; + + case '\\': + if (func & STR_FUNC_ESCAPE) tokadd(c); + break; + + case 'u': + if ((func & STR_FUNC_EXPAND) == 0) { + tokadd('\\'); + break; + } + parser_tokadd_utf8(parser, &enc, 1, + func & STR_FUNC_SYMBOL, + func & STR_FUNC_REGEXP); + if (has_nonascii && enc != *encp) { + mixed_escape(beg, enc, *encp); + } + continue; + + default: + if (func & STR_FUNC_REGEXP) { + pushback(c); + if ((c = tokadd_escape(&enc)) < 0) + return -1; + if (has_nonascii && enc != *encp) { + mixed_escape(beg, enc, *encp); + } + continue; + } + else if (func & STR_FUNC_EXPAND) { + pushback(c); + if (func & STR_FUNC_ESCAPE) tokadd('\\'); + c = read_escape(0, &enc); + } + else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { + /* ignore backslashed spaces in %w */ + } + else if (c != term && !(paren && c == paren)) { + tokadd('\\'); + } + } + } + else if (!parser_isascii()) { + has_nonascii = 1; + if (enc != *encp) { + mixed_error(enc, *encp); + continue; + } + if (tokadd_mbchar(c) == -1) return -1; + continue; + } + else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { + pushback(c); + break; + } + if (!c && (func & STR_FUNC_SYMBOL)) { + func &= ~STR_FUNC_SYMBOL; + compile_error(PARSER_ARG "symbol cannot contain '\\0'"); + continue; + } + if (c & 0x80) { + has_nonascii = 1; + if (enc != *encp) { + mixed_error(enc, *encp); + continue; + } + } + tokadd(c); + } + *encp = enc; + return c; +} + +#define NEW_STRTERM(func, term, paren) \ + rb_node_newnode(NODE_STRTERM, (func), (term) | ((paren) << (CHAR_BIT * 2)), 0) + +static int +parser_parse_string(struct parser_params *parser, NODE *quote) +{ + int func = quote->nd_func; + int term = nd_term(quote); + int paren = nd_paren(quote); + int c, space = 0; + rb_encoding *enc = parser->enc; + + if (func == -1) return tSTRING_END; + c = nextc(); + if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { + do {c = nextc();} while (ISSPACE(c)); + space = 1; + } + if (c == term && !quote->nd_nest) { + if (func & STR_FUNC_QWORDS) { + quote->nd_func = -1; + return ' '; + } + if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; + set_yylval_num(regx_options()); + return tREGEXP_END; + } + if (space) { + pushback(c); + return ' '; + } + newtok(); + if ((func & STR_FUNC_EXPAND) && c == '#') { + switch (c = nextc()) { + case '$': + case '@': + pushback(c); + return tSTRING_DVAR; + case '{': + return tSTRING_DBEG; + } + tokadd('#'); + } + pushback(c); + if (tokadd_string(func, term, paren, "e->nd_nest, + &enc) == -1) { + ruby_sourceline = nd_line(quote); + if (func & STR_FUNC_REGEXP) { + if (parser->eofp) + compile_error(PARSER_ARG "unterminated regexp meets end of file"); + return tREGEXP_END; + } + else { + if (parser->eofp) + compile_error(PARSER_ARG "unterminated string meets end of file"); + return tSTRING_END; + } + } + + tokfix(); + set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); + return tSTRING_CONTENT; +} + +static int +parser_heredoc_identifier(struct parser_params *parser) +{ + int c = nextc(), term, func = 0, len; + + if (c == '-') { + c = nextc(); + func = STR_FUNC_INDENT; + } + switch (c) { + case '\'': + func |= str_squote; goto quoted; + case '"': + func |= str_dquote; goto quoted; + case '`': + func |= str_xquote; + quoted: + newtok(); + tokadd(func); + term = c; + while ((c = nextc()) != -1 && c != term) { + if (tokadd_mbchar(c) == -1) return 0; + } + if (c == -1) { + compile_error(PARSER_ARG "unterminated here document identifier"); + return 0; + } + break; + + default: + if (!parser_is_identchar()) { + pushback(c); + if (func & STR_FUNC_INDENT) { + pushback('-'); + } + return 0; + } + newtok(); + term = '"'; + tokadd(func |= str_dquote); + do { + if (tokadd_mbchar(c) == -1) return 0; + } while ((c = nextc()) != -1 && parser_is_identchar()); + pushback(c); + break; + } + + tokfix(); +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tHEREDOC_BEG); +#endif + len = lex_p - lex_pbeg; + lex_goto_eol(parser); + lex_strterm = rb_node_newnode(NODE_HEREDOC, + STR_NEW(tok(), toklen()), /* nd_lit */ + len, /* nd_nth */ + lex_lastline); /* nd_orig */ + nd_set_line(lex_strterm, ruby_sourceline); + ripper_flush(parser); + return term == '`' ? tXSTRING_BEG : tSTRING_BEG; +} + +static void +parser_heredoc_restore(struct parser_params *parser, NODE *here) +{ + VALUE line; + + line = here->nd_orig; + lex_lastline = line; + lex_pbeg = RSTRING_PTR(line); + lex_pend = lex_pbeg + RSTRING_LEN(line); + lex_p = lex_pbeg + here->nd_nth; + heredoc_end = ruby_sourceline; + ruby_sourceline = nd_line(here); + dispose_string(here->nd_lit); + rb_gc_force_recycle((VALUE)here); + ripper_flush(parser); +} + +static int +parser_whole_match_p(struct parser_params *parser, + const char *eos, int len, int indent) +{ + const char *p = lex_pbeg; + int n; + + if (indent) { + while (*p && ISSPACE(*p)) p++; + } + n= lex_pend - (p + len); + if (n < 0 || (n > 0 && p[len] != '\n' && p[len] != '\r')) return Qfalse; + if (strncmp(eos, p, len) == 0) return Qtrue; + return Qfalse; +} + +static int +parser_here_document(struct parser_params *parser, NODE *here) +{ + int c, func, indent = 0; + const char *eos, *p, *pend; + long len; + VALUE str = 0; + rb_encoding *enc = parser->enc; + + eos = RSTRING_PTR(here->nd_lit); + len = RSTRING_LEN(here->nd_lit) - 1; + indent = (func = *eos++) & STR_FUNC_INDENT; + + if ((c = nextc()) == -1) { + error: + compile_error(PARSER_ARG "can't find string \"%s\" anywhere before EOF", eos); +#ifdef RIPPER + if (NIL_P(parser->delayed)) { + ripper_dispatch_scan_event(parser, tSTRING_CONTENT); + } + else { + if (str || + ((len = lex_p - parser->tokp) > 0 && + (str = STR_NEW3(parser->tokp, len, enc, func), 1))) { + rb_str_append(parser->delayed, str); + } + ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); + } + lex_goto_eol(parser); +#endif + restore: + heredoc_restore(lex_strterm); + lex_strterm = 0; + return 0; + } + if (was_bol() && whole_match_p(eos, len, indent)) { + heredoc_restore(lex_strterm); + return tSTRING_END; + } + + if (!(func & STR_FUNC_EXPAND)) { + do { + p = RSTRING_PTR(lex_lastline); + pend = lex_pend; + if (pend > p) { + switch (pend[-1]) { + case '\n': + if (--pend == p || pend[-1] != '\r') { + pend++; + break; + } + case '\r': + --pend; + } + } + if (str) + rb_str_cat(str, p, pend - p); + else + str = STR_NEW(p, pend - p); + if (pend < lex_pend) rb_str_cat(str, "\n", 1); + lex_goto_eol(parser); + if (nextc() == -1) { + if (str) dispose_string(str); + goto error; + } + } while (!whole_match_p(eos, len, indent)); + } + else { + /* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/ + newtok(); + if (c == '#') { + switch (c = nextc()) { + case '$': + case '@': + pushback(c); + return tSTRING_DVAR; + case '{': + return tSTRING_DBEG; + } + tokadd('#'); + } + do { + pushback(c); + if ((c = tokadd_string(func, '\n', 0, NULL, &enc)) == -1) { + if (parser->eofp) goto error; + goto restore; + } + if (c != '\n') { + set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); + return tSTRING_CONTENT; + } + tokadd(nextc()); + /* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/ + if ((c = nextc()) == -1) goto error; + } while (!whole_match_p(eos, len, indent)); + str = STR_NEW3(tok(), toklen(), enc, func); + } +#ifdef RIPPER + if (!NIL_P(parser->delayed)) + ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); + lex_goto_eol(parser); + ripper_dispatch_ignored_scan_event(parser, tHEREDOC_END); +#endif + heredoc_restore(lex_strterm); + lex_strterm = NEW_STRTERM(-1, 0, 0); + set_yylval_str(str); + return tSTRING_CONTENT; +} + +#include "lex.c" + +static void +arg_ambiguous_gen(struct parser_params *parser) +{ +#ifndef RIPPER + rb_warning0("ambiguous first argument; put parentheses or even spaces"); +#else + dispatch0(arg_ambiguous); +#endif +} +#define arg_ambiguous() arg_ambiguous_gen(parser) + +static ID +formal_argument_gen(struct parser_params *parser, ID lhs) +{ +#ifndef RIPPER + if (!is_local_id(lhs)) + yyerror("formal argument must be local variable"); +#endif + shadowing_lvar(lhs); + return lhs; +} + +static int +lvar_defined_gen(struct parser_params *parser, ID id) +{ + return (dyna_in_block() && dvar_defined(id)) || local_id(id); +} + +/* emacsen -*- hack */ +static int +parser_encode_length(struct parser_params *parser, const char *name, int len) +{ + int nlen; + + if (len > 5 && name[nlen = len - 5] == '-') { + if (rb_memcicmp(name + nlen + 1, "unix", 4) == 0) + return nlen; + } + if (len > 4 && name[nlen = len - 5] == '-') { + if (rb_memcicmp(name + nlen + 1, "dos", 3) == 0) + return nlen; + if (rb_memcicmp(name + nlen + 1, "mac", 3) == 0) + return nlen; + } + return len; +} + +static void +parser_set_encode(struct parser_params *parser, const char *name) +{ + int idx = rb_enc_find_index(name); + rb_encoding *enc; + VALUE excargs[3]; + + if (idx < 0) { + VALUE rb_make_backtrace(void); + VALUE rb_make_exception(int, VALUE*); + + excargs[1] = rb_sprintf("unknown encoding name: %s", name); + error: + excargs[0] = rb_eArgError; + excargs[2] = rb_make_backtrace(); + rb_ary_unshift(excargs[2], rb_sprintf("%s:%d", ruby_sourcefile, ruby_sourceline)); + rb_exc_raise(rb_make_exception(3, excargs)); + } + enc = rb_enc_from_index(idx); + if (!rb_enc_asciicompat(enc)) { + excargs[1] = rb_sprintf("%s is not ASCII compatible", rb_enc_name(enc)); + goto error; + } + parser->enc = enc; +} + +static int +comment_at_top(struct parser_params *parser) +{ + const char *p = lex_pbeg, *pend = lex_p - 1; + if (parser->line_count != (parser->has_shebang ? 2 : 1)) return 0; + while (p < pend) { + if (!ISSPACE(*p)) return 0; + p++; + } + return 1; +} + +#ifndef RIPPER +typedef int (*rb_magic_comment_length_t)(struct parser_params *parser, const char *name, int len); +typedef void (*rb_magic_comment_setter_t)(struct parser_params *parser, const char *name, const char *val); + +static void +magic_comment_encoding(struct parser_params *parser, const char *name, const char *val) +{ + if (!comment_at_top(parser)) { + rb_warningS("encoding '%s' is ignored, valid only in the first line except for shebang line.", + val); + return; + } + parser_set_encode(parser, val); +} + +struct magic_comment { + const char *name; + rb_magic_comment_setter_t func; + rb_magic_comment_length_t length; +}; + +static const struct magic_comment magic_comments[] = { + {"coding", magic_comment_encoding, parser_encode_length}, + {"encoding", magic_comment_encoding, parser_encode_length}, +}; +#endif + +static const char * +magic_comment_marker(const char *str, int len) +{ + int i = 2; + + while (i < len) { + switch (str[i]) { + case '-': + if (str[i-1] == '*' && str[i-2] == '-') { + return str + i + 1; + } + i += 2; + break; + case '*': + if (i + 1 >= len) return 0; + if (str[i+1] != '-') { + i += 4; + } + else if (str[i-1] != '-') { + i += 2; + } + else { + return str + i + 2; + } + break; + default: + i += 3; + break; + } + } + return 0; +} + +static int +parser_magic_comment(struct parser_params *parser, const char *str, int len) +{ + VALUE name = 0, val = 0; + const char *beg, *end, *vbeg, *vend; +#define str_copy(_s, _p, _n) ((_s) \ + ? (rb_str_resize((_s), (_n)), \ + MEMCPY(RSTRING_PTR(_s), (_p), char, (_n)), (_s)) \ + : ((_s) = STR_NEW((_p), (_n)))) + + if (len <= 7) return Qfalse; + if (!(beg = magic_comment_marker(str, len))) return Qfalse; + if (!(end = magic_comment_marker(beg, str + len - beg))) return Qfalse; + str = beg; + len = end - beg - 3; + + /* %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*" */ + while (len > 0) { +#ifndef RIPPER + const struct magic_comment *p = magic_comments; +#endif + char *s; + int i, n = 0; + + for (; len > 0 && *str; str++, --len) { + switch (*str) { + case '\'': case '"': case ':': case ';': + continue; + } + if (!ISSPACE(*str)) break; + } + for (beg = str; len > 0; str++, --len) { + switch (*str) { + case '\'': case '"': case ':': case ';': + break; + default: + if (ISSPACE(*str)) break; + continue; + } + break; + } + for (end = str; len > 0 && ISSPACE(*str); str++, --len); + if (!len) break; + if (*str != ':') continue; + + do str++; while (--len > 0 && ISSPACE(*str)); + if (!len) break; + if (*str == '"') { + for (vbeg = ++str; --len > 0 && *str != '"'; str++) { + if (*str == '\\') { + --len; + ++str; + } + } + vend = str; + if (len) { + --len; + ++str; + } + } + else { + for (vbeg = str; len > 0 && *str != '"' && *str != ';' && !ISSPACE(*str); --len, str++); + vend = str; + } + while (len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++; + + n = end - beg; + str_copy(name, beg, n); + s = RSTRING_PTR(name); + for (i = 0; i < n; ++i) { + if (*s == '-') *s = '_'; + } +#ifndef RIPPER + do { + if (STRNCASECMP(p->name, s, n) == 0) { + n = vend - vbeg; + if (p->length) { + n = (*p->length)(parser, vbeg, n); + } + str_copy(val, vbeg, n); + (*p->func)(parser, s, RSTRING_PTR(val)); + break; + } + } while (++p < magic_comments + sizeof(magic_comments) / sizeof(*p)); +#else + dispatch2(magic_comment, name, val); +#endif + } + + return Qtrue; +} + +static void +set_file_encoding(struct parser_params *parser, const char *str, const char *send) +{ + int sep = 0; + const char *beg = str; + VALUE s; + + for (;;) { + if (send - str <= 6) return; + switch (str[6]) { + case 'C': case 'c': str += 6; continue; + case 'O': case 'o': str += 5; continue; + case 'D': case 'd': str += 4; continue; + case 'I': case 'i': str += 3; continue; + case 'N': case 'n': str += 2; continue; + case 'G': case 'g': str += 1; continue; + case '=': case ':': + sep = 1; + str += 6; + break; + default: + str += 6; + if (ISSPACE(*str)) break; + continue; + } + if (STRNCASECMP(str-6, "coding", 6) == 0) break; + } + for (;;) { + do { + if (++str >= send) return; + } while (ISSPACE(*str)); + if (sep) break; + if (*str != '=' && *str != ':') return; + sep = 1; + str++; + } + beg = str; + while ((*str == '-' || *str == '_' || ISALNUM(*str)) && ++str < send); + s = rb_str_new(beg, parser_encode_length(parser, beg, str - beg)); + parser_set_encode(parser, RSTRING_PTR(s)); + rb_str_resize(s, 0); +} + +static void +parser_prepare(struct parser_params *parser) +{ + int c = nextc(); + switch (c) { + case '#': + if (peek('!')) parser->has_shebang = 1; + break; + case 0xef: /* UTF-8 BOM marker */ + if (lex_pend - lex_p >= 2 && + (unsigned char)lex_p[0] == 0xbb && + (unsigned char)lex_p[1] == 0xbf) { + parser->enc = rb_utf8_encoding(); + lex_p += 2; + lex_pbeg = lex_p; + return; + } + break; + case EOF: + return; + } + pushback(c); + parser->enc = rb_enc_get(lex_lastline); +} + +#define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG) +#define IS_BEG() (lex_state == EXPR_BEG || lex_state == EXPR_MID || lex_state == EXPR_VALUE || lex_state == EXPR_CLASS) + +static int +parser_yylex(struct parser_params *parser) +{ + register int c; + int space_seen = 0; + int cmd_state; + enum lex_state_e last_state; + rb_encoding *enc; + int mb; +#ifdef RIPPER + int fallthru = Qfalse; +#endif + + if (lex_strterm) { + int token; + if (nd_type(lex_strterm) == NODE_HEREDOC) { + token = here_document(lex_strterm); + if (token == tSTRING_END) { + lex_strterm = 0; + lex_state = EXPR_END; + } + } + else { + token = parse_string(lex_strterm); + if (token == tSTRING_END || token == tREGEXP_END) { + rb_gc_force_recycle((VALUE)lex_strterm); + lex_strterm = 0; + lex_state = EXPR_END; + } + } + return token; + } + cmd_state = command_start; + command_start = Qfalse; + retry: + switch (c = nextc()) { + case '\0': /* NUL */ + case '\004': /* ^D */ + case '\032': /* ^Z */ + case -1: /* end of script. */ + return 0; + + /* white spaces */ + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + space_seen++; +#ifdef RIPPER + while ((c = nextc())) { + switch (c) { + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + break; + default: + goto outofloop; + } + } + outofloop: + pushback(c); + ripper_dispatch_scan_event(parser, tSP); +#endif + goto retry; + + case '#': /* it's a comment */ + /* no magic_comment in shebang line */ + if (!parser_magic_comment(parser, lex_p, lex_pend - lex_p)) { + if (comment_at_top(parser)) { + set_file_encoding(parser, lex_p, lex_pend); + } + } + lex_p = lex_pend; +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tCOMMENT); + fallthru = Qtrue; +#endif + /* fall through */ + case '\n': + switch (lex_state) { + case EXPR_BEG: + case EXPR_FNAME: + case EXPR_DOT: + case EXPR_CLASS: + case EXPR_VALUE: +#ifdef RIPPER + if (!fallthru) { + ripper_dispatch_scan_event(parser, tIGNORED_NL); + } + fallthru = Qfalse; +#endif + goto retry; + default: + break; + } + while ((c = nextc())) { + switch (c) { + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + space_seen++; + break; + case '.': { + if ((c = nextc()) != '.') { + pushback(c); + pushback('.'); + goto retry; + } + } + default: + --ruby_sourceline; + lex_nextline = lex_lastline; + case -1: /* EOF no decrement*/ + lex_goto_eol(parser); +#ifdef RIPPER + if (c != -1) { + parser->tokp = lex_p; + } +#endif + goto normal_newline; + } + } + normal_newline: + command_start = Qtrue; + lex_state = EXPR_BEG; + return '\n'; + + case '*': + if ((c = nextc()) == '*') { + if ((c = nextc()) == '=') { + set_yylval_id(tPOW); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + c = tPOW; + } + else { + if (c == '=') { + set_yylval_id('*'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + if (IS_ARG() && space_seen && !ISSPACE(c)) { + rb_warning0("`*' interpreted as argument prefix"); + c = tSTAR; + } + else if (IS_BEG()) { + c = tSTAR; + } + else { + c = '*'; + } + } + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + return c; + + case '!': + c = nextc(); + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + lex_state = EXPR_ARG; + if (c == '@') { + return '!'; + } + } + else { + lex_state = EXPR_BEG; + } + if (c == '=') { + return tNEQ; + } + if (c == '~') { + return tNMATCH; + } + pushback(c); + return '!'; + + case '=': + if (was_bol()) { + /* skip embedded rd document */ + if (strncmp(lex_p, "begin", 5) == 0 && ISSPACE(lex_p[5])) { +#ifdef RIPPER + int first_p = Qtrue; + + lex_goto_eol(parser); + ripper_dispatch_scan_event(parser, tEMBDOC_BEG); +#endif + for (;;) { + lex_goto_eol(parser); +#ifdef RIPPER + if (!first_p) { + ripper_dispatch_scan_event(parser, tEMBDOC); + } + first_p = Qfalse; +#endif + c = nextc(); + if (c == -1) { + compile_error(PARSER_ARG "embedded document meets end of file"); + return 0; + } + if (c != '=') continue; + if (strncmp(lex_p, "end", 3) == 0 && + (lex_p + 3 == lex_pend || ISSPACE(lex_p[3]))) { + break; + } + } + lex_goto_eol(parser); +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tEMBDOC_END); +#endif + goto retry; + } + } + + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + if ((c = nextc()) == '=') { + if ((c = nextc()) == '=') { + return tEQQ; + } + pushback(c); + return tEQ; + } + if (c == '~') { + return tMATCH; + } + else if (c == '>') { + return tASSOC; + } + pushback(c); + return '='; + + case '<': + c = nextc(); + if (c == '<' && + lex_state != EXPR_END && + lex_state != EXPR_DOT && + lex_state != EXPR_ENDARG && + lex_state != EXPR_CLASS && + (!IS_ARG() || space_seen)) { + int token = heredoc_identifier(); + if (token) return token; + } + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + if (c == '=') { + if ((c = nextc()) == '>') { + return tCMP; + } + pushback(c); + return tLEQ; + } + if (c == '<') { + if ((c = nextc()) == '=') { + set_yylval_id(tLSHFT); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + return tLSHFT; + } + pushback(c); + return '<'; + + case '>': + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + if ((c = nextc()) == '=') { + return tGEQ; + } + if (c == '>') { + if ((c = nextc()) == '=') { + set_yylval_id(tRSHFT); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + return tRSHFT; + } + pushback(c); + return '>'; + + case '"': + lex_strterm = NEW_STRTERM(str_dquote, '"', 0); + return tSTRING_BEG; + + case '`': + if (lex_state == EXPR_FNAME) { + lex_state = EXPR_END; + return c; + } + if (lex_state == EXPR_DOT) { + if (cmd_state) + lex_state = EXPR_CMDARG; + else + lex_state = EXPR_ARG; + return c; + } + lex_strterm = NEW_STRTERM(str_xquote, '`', 0); + return tXSTRING_BEG; + + case '\'': + lex_strterm = NEW_STRTERM(str_squote, '\'', 0); + return tSTRING_BEG; + + case '?': + if (lex_state == EXPR_END || lex_state == EXPR_ENDARG) { + lex_state = EXPR_VALUE; + return '?'; + } + c = nextc(); + if (c == -1) { + compile_error(PARSER_ARG "incomplete character syntax"); + return 0; + } + if (rb_enc_isspace(c, parser->enc)) { + if (!IS_ARG()) { + int c2 = 0; + switch (c) { + case ' ': + c2 = 's'; + break; + case '\n': + c2 = 'n'; + break; + case '\t': + c2 = 't'; + break; + case '\v': + c2 = 'v'; + break; + case '\r': + c2 = 'r'; + break; + case '\f': + c2 = 'f'; + break; + } + if (c2) { + rb_warnI("invalid character syntax; use ?\\%c", c2); + } + } + ternary: + pushback(c); + lex_state = EXPR_VALUE; + return '?'; + } + newtok(); + enc = parser->enc; + if (!parser_isascii()) { + if (tokadd_mbchar(c) == -1) return 0; + } + else if ((rb_enc_isalnum(c, parser->enc) || c == '_') && + lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser->enc)) { + goto ternary; + } + else if (c == '\\') { + if (peek('u')) { + nextc(); + c = parser_tokadd_utf8(parser, &enc, 0, 0, 0); + if (0x80 <= c) { + tokaddmbc(c, enc); + } + else { + tokadd(c); + } + } + else { + c = read_escape(0, &enc); + tokadd(c); + } + } + else { + tokadd(c); + } + tokfix(); + set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0)); + lex_state = EXPR_END; + return tCHAR; + + case '&': + if ((c = nextc()) == '&') { + lex_state = EXPR_BEG; + if ((c = nextc()) == '=') { + set_yylval_id(tANDOP); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + return tANDOP; + } + else if (c == '=') { + set_yylval_id('&'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + if (IS_ARG() && space_seen && !ISSPACE(c)) { + rb_warning0("`&' interpreted as argument prefix"); + c = tAMPER; + } + else if (IS_BEG()) { + c = tAMPER; + } + else { + c = '&'; + } + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; + } + return c; + + case '|': + if ((c = nextc()) == '|') { + lex_state = EXPR_BEG; + if ((c = nextc()) == '=') { + set_yylval_id(tOROP); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + return tOROP; + } + if (c == '=') { + set_yylval_id('|'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + lex_state = EXPR_ARG; + } + else { + lex_state = EXPR_BEG; + } + pushback(c); + return '|'; + + case '+': + c = nextc(); + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + lex_state = EXPR_ARG; + if (c == '@') { + return tUPLUS; + } + pushback(c); + return '+'; + } + if (c == '=') { + set_yylval_id('+'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + if (IS_BEG() || + (IS_ARG() && space_seen && !ISSPACE(c))) { + if (IS_ARG()) arg_ambiguous(); + lex_state = EXPR_BEG; + pushback(c); + if (c != -1 && ISDIGIT(c)) { + c = '+'; + goto start_num; + } + return tUPLUS; + } + lex_state = EXPR_BEG; + pushback(c); + return '+'; + + case '-': + c = nextc(); + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + lex_state = EXPR_ARG; + if (c == '@') { + return tUMINUS; + } + pushback(c); + return '-'; + } + if (c == '=') { + set_yylval_id('-'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + if (c == '>') { + lex_state = EXPR_ARG; + return tLAMBDA; + } + if (IS_BEG() || + (IS_ARG() && space_seen && !ISSPACE(c))) { + if (IS_ARG()) arg_ambiguous(); + lex_state = EXPR_BEG; + pushback(c); + if (c != -1 && ISDIGIT(c)) { + return tUMINUS_NUM; + } + return tUMINUS; + } + lex_state = EXPR_BEG; + pushback(c); + return '-'; + + case '.': + lex_state = EXPR_BEG; + if ((c = nextc()) == '.') { + if ((c = nextc()) == '.') { + return tDOT3; + } + pushback(c); + return tDOT2; + } + pushback(c); + if (c != -1 && ISDIGIT(c)) { + yyerror("no . floating literal anymore; put 0 before dot"); + } + lex_state = EXPR_DOT; + return '.'; + + start_num: + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + int is_float, seen_point, seen_e, nondigit; + + is_float = seen_point = seen_e = nondigit = 0; + lex_state = EXPR_END; + newtok(); + if (c == '-' || c == '+') { + tokadd(c); + c = nextc(); + } + if (c == '0') { + int start = toklen(); + c = nextc(); + if (c == 'x' || c == 'X') { + /* hexadecimal */ + c = nextc(); + if (c != -1 && ISXDIGIT(c)) { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (!ISXDIGIT(c)) break; + nondigit = 0; + tokadd(c); + } while ((c = nextc()) != -1); + } + pushback(c); + tokfix(); + if (toklen() == start) { + yyerror("numeric literal without digits"); + } + else if (nondigit) goto trailing_uc; + set_yylval_literal(rb_cstr_to_inum(tok(), 16, Qfalse)); + return tINTEGER; + } + if (c == 'b' || c == 'B') { + /* binary */ + c = nextc(); + if (c == '0' || c == '1') { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (c != '0' && c != '1') break; + nondigit = 0; + tokadd(c); + } while ((c = nextc()) != -1); + } + pushback(c); + tokfix(); + if (toklen() == start) { + yyerror("numeric literal without digits"); + } + else if (nondigit) goto trailing_uc; + set_yylval_literal(rb_cstr_to_inum(tok(), 2, Qfalse)); + return tINTEGER; + } + if (c == 'd' || c == 'D') { + /* decimal */ + c = nextc(); + if (c != -1 && ISDIGIT(c)) { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (!ISDIGIT(c)) break; + nondigit = 0; + tokadd(c); + } while ((c = nextc()) != -1); + } + pushback(c); + tokfix(); + if (toklen() == start) { + yyerror("numeric literal without digits"); + } + else if (nondigit) goto trailing_uc; + set_yylval_literal(rb_cstr_to_inum(tok(), 10, Qfalse)); + return tINTEGER; + } + if (c == '_') { + /* 0_0 */ + goto octal_number; + } + if (c == 'o' || c == 'O') { + /* prefixed octal */ + c = nextc(); + if (c == -1 || c == '_' || !ISDIGIT(c)) { + yyerror("numeric literal without digits"); + } + } + if (c >= '0' && c <= '7') { + /* octal */ + octal_number: + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (c < '0' || c > '9') break; + if (c > '7') goto invalid_octal; + nondigit = 0; + tokadd(c); + } while ((c = nextc()) != -1); + if (toklen() > start) { + pushback(c); + tokfix(); + if (nondigit) goto trailing_uc; + set_yylval_literal(rb_cstr_to_inum(tok(), 8, Qfalse)); + return tINTEGER; + } + if (nondigit) { + pushback(c); + goto trailing_uc; + } + } + if (c > '7' && c <= '9') { + invalid_octal: + yyerror("Invalid octal digit"); + } + else if (c == '.' || c == 'e' || c == 'E') { + tokadd('0'); + } + else { + pushback(c); + set_yylval_literal(INT2FIX(0)); + return tINTEGER; + } + } + + for (;;) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + nondigit = 0; + tokadd(c); + break; + + case '.': + if (nondigit) goto trailing_uc; + if (seen_point || seen_e) { + goto decode_num; + } + else { + int c0 = nextc(); + if (c0 == -1 || !ISDIGIT(c0)) { + pushback(c0); + goto decode_num; + } + c = c0; + } + tokadd('.'); + tokadd(c); + is_float++; + seen_point++; + nondigit = 0; + break; + + case 'e': + case 'E': + if (nondigit) { + pushback(c); + c = nondigit; + goto decode_num; + } + if (seen_e) { + goto decode_num; + } + tokadd(c); + seen_e++; + is_float++; + nondigit = c; + c = nextc(); + if (c != '-' && c != '+') continue; + tokadd(c); + nondigit = c; + break; + + case '_': /* `_' in number just ignored */ + if (nondigit) goto decode_num; + nondigit = c; + break; + + default: + goto decode_num; + } + c = nextc(); + } + + decode_num: + pushback(c); + tokfix(); + if (nondigit) { + char tmp[30]; + trailing_uc: + sprintf(tmp, "trailing `%c' in number", nondigit); + yyerror(tmp); + } + if (is_float) { + double d = strtod(tok(), 0); + if (errno == ERANGE) { + rb_warningS("Float %s out of range", tok()); + errno = 0; + } + set_yylval_literal(DBL2NUM(d)); + return tFLOAT; + } + set_yylval_literal(rb_cstr_to_inum(tok(), 10, Qfalse)); + return tINTEGER; + } + + case ')': + case ']': + paren_nest--; + case '}': + COND_LEXPOP(); + CMDARG_LEXPOP(); + if (c == ')') + lex_state = EXPR_END; + else + lex_state = EXPR_ENDARG; + return c; + + case ':': + c = nextc(); + if (c == ':') { + if (IS_BEG() || + lex_state == EXPR_CLASS || (IS_ARG() && space_seen)) { + lex_state = EXPR_BEG; + return tCOLON3; + } + lex_state = EXPR_DOT; + return tCOLON2; + } + if (lex_state == EXPR_END || lex_state == EXPR_ENDARG || (c != -1 && ISSPACE(c))) { + pushback(c); + lex_state = EXPR_BEG; + return ':'; + } + switch (c) { + case '\'': + lex_strterm = NEW_STRTERM(str_ssym, c, 0); + break; + case '"': + lex_strterm = NEW_STRTERM(str_dsym, c, 0); + break; + default: + pushback(c); + break; + } + lex_state = EXPR_FNAME; + return tSYMBEG; + + case '/': + if (IS_BEG()) { + lex_strterm = NEW_STRTERM(str_regexp, '/', 0); + return tREGEXP_BEG; + } + if ((c = nextc()) == '=') { + set_yylval_id('/'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + pushback(c); + if (IS_ARG() && space_seen) { + if (!ISSPACE(c)) { + arg_ambiguous(); + lex_strterm = NEW_STRTERM(str_regexp, '/', 0); + return tREGEXP_BEG; + } + } + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + return '/'; + + case '^': + if ((c = nextc()) == '=') { + set_yylval_id('^'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + pushback(c); + return '^'; + + case ';': + lex_state = EXPR_BEG; + command_start = Qtrue; + return ';'; + + case ',': + lex_state = EXPR_BEG; + return ','; + + case '~': + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + if ((c = nextc()) != '@') { + pushback(c); + } + lex_state = EXPR_ARG; + } + else { + lex_state = EXPR_BEG; + } + return '~'; + + case '(': + if (IS_BEG()) { + c = tLPAREN; + } + else if (space_seen) { + if (IS_ARG()) { + c = tLPAREN_ARG; + } + } + paren_nest++; + COND_PUSH(0); + CMDARG_PUSH(0); + lex_state = EXPR_BEG; + return c; + + case '[': + paren_nest++; + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + lex_state = EXPR_ARG; + if ((c = nextc()) == ']') { + if ((c = nextc()) == '=') { + return tASET; + } + pushback(c); + return tAREF; + } + pushback(c); + return '['; + } + else if (IS_BEG()) { + c = tLBRACK; + } + else if (IS_ARG() && space_seen) { + c = tLBRACK; + } + lex_state = EXPR_BEG; + COND_PUSH(0); + CMDARG_PUSH(0); + return c; + + case '{': + if (lpar_beg && lpar_beg == paren_nest) { + lex_state = EXPR_BEG; + lpar_beg = 0; + --paren_nest; + return tLAMBEG; + } + if (IS_ARG() || lex_state == EXPR_END) + c = '{'; /* block (primary) */ + else if (lex_state == EXPR_ENDARG) + c = tLBRACE_ARG; /* block (expr) */ + else + c = tLBRACE; /* hash */ + COND_PUSH(0); + CMDARG_PUSH(0); + lex_state = EXPR_BEG; + if (c != tLBRACE) command_start = Qtrue; + return c; + + case '\\': + c = nextc(); + if (c == '\n') { + space_seen = 1; +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tSP); +#endif + goto retry; /* skip \\n */ + } + pushback(c); + return '\\'; + + case '%': + if (IS_BEG()) { + int term; + int paren; + + c = nextc(); + quotation: + if (c == -1 || !ISALNUM(c)) { + term = c; + c = 'Q'; + } + else { + term = nextc(); + if (rb_enc_isalnum(term, parser->enc) || !parser_isascii()) { + yyerror("unknown type of %string"); + return 0; + } + } + if (c == -1 || term == -1) { + compile_error(PARSER_ARG "unterminated quoted string meets end of file"); + return 0; + } + paren = term; + if (term == '(') term = ')'; + else if (term == '[') term = ']'; + else if (term == '{') term = '}'; + else if (term == '<') term = '>'; + else paren = 0; + + switch (c) { + case 'Q': + lex_strterm = NEW_STRTERM(str_dquote, term, paren); + return tSTRING_BEG; + + case 'q': + lex_strterm = NEW_STRTERM(str_squote, term, paren); + return tSTRING_BEG; + + case 'W': + lex_strterm = NEW_STRTERM(str_dword, term, paren); + do {c = nextc();} while (ISSPACE(c)); + pushback(c); + return tWORDS_BEG; + + case 'w': + lex_strterm = NEW_STRTERM(str_sword, term, paren); + do {c = nextc();} while (ISSPACE(c)); + pushback(c); + return tQWORDS_BEG; + + case 'x': + lex_strterm = NEW_STRTERM(str_xquote, term, paren); + return tXSTRING_BEG; + + case 'r': + lex_strterm = NEW_STRTERM(str_regexp, term, paren); + return tREGEXP_BEG; + + case 's': + lex_strterm = NEW_STRTERM(str_ssym, term, paren); + lex_state = EXPR_FNAME; + return tSYMBEG; + + default: + yyerror("unknown type of %string"); + return 0; + } + } + if ((c = nextc()) == '=') { + set_yylval_id('%'); + lex_state = EXPR_BEG; + return tOP_ASGN; + } + if (IS_ARG() && space_seen && !ISSPACE(c)) { + goto quotation; + } + switch (lex_state) { + case EXPR_FNAME: case EXPR_DOT: + lex_state = EXPR_ARG; break; + default: + lex_state = EXPR_BEG; break; + } + pushback(c); + return '%'; + + case '$': + last_state = lex_state; + lex_state = EXPR_END; + newtok(); + c = nextc(); + switch (c) { + case '_': /* $_: last read line string */ + c = nextc(); + if (parser_is_identchar()) { + tokadd('$'); + tokadd('_'); + break; + } + pushback(c); + c = '_'; + /* fall through */ + case '~': /* $~: match-data */ + case '*': /* $*: argv */ + case '$': /* $$: pid */ + case '?': /* $?: last status */ + case '!': /* $!: error string */ + case '@': /* $@: error position */ + case '/': /* $/: input record separator */ + case '\\': /* $\: output record separator */ + case ';': /* $;: field separator */ + case ',': /* $,: output field separator */ + case '.': /* $.: last read line number */ + case '=': /* $=: ignorecase */ + case ':': /* $:: load path */ + case '<': /* $<: reading filename */ + case '>': /* $>: default output handle */ + case '\"': /* $": already loaded files */ + tokadd('$'); + tokadd(c); + tokfix(); + set_yylval_name(rb_intern(tok())); + return tGVAR; + + case '-': + tokadd('$'); + tokadd(c); + c = nextc(); + if (parser_is_identchar()) { + if (tokadd_mbchar(c) == -1) return 0; + } + else { + pushback(c); + } + gvar: + tokfix(); + set_yylval_name(rb_intern(tok())); + return tGVAR; + + case '&': /* $&: last match */ + case '`': /* $`: string before last match */ + case '\'': /* $': string after last match */ + case '+': /* $+: string matches last paren. */ + if (last_state == EXPR_FNAME) { + tokadd('$'); + tokadd(c); + goto gvar; + } + set_yylval_node(NEW_BACK_REF(c)); + return tBACK_REF; + + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + tokadd('$'); + do { + tokadd(c); + c = nextc(); + } while (c != -1 && ISDIGIT(c)); + pushback(c); + if (last_state == EXPR_FNAME) goto gvar; + tokfix(); + set_yylval_node(NEW_NTH_REF(atoi(tok()+1))); + return tNTH_REF; + + default: + if (!parser_is_identchar()) { + pushback(c); + return '$'; + } + case '0': + tokadd('$'); + } + break; + + case '@': + c = nextc(); + newtok(); + tokadd('@'); + if (c == '@') { + tokadd('@'); + c = nextc(); + } + if (c != -1 && ISDIGIT(c)) { + if (tokidx == 1) { + compile_error(PARSER_ARG "`@%c' is not allowed as an instance variable name", c); + } + else { + compile_error(PARSER_ARG "`@@%c' is not allowed as a class variable name", c); + } + return 0; + } + if (!parser_is_identchar()) { + pushback(c); + return '@'; + } + break; + + case '_': + if (was_bol() && whole_match_p("__END__", 7, 0)) { + ruby__end__seen = 1; + parser->eofp = Qtrue; +#ifndef RIPPER + return -1; +#else + lex_goto_eol(parser); + ripper_dispatch_scan_event(parser, k__END__); + return 0; +#endif + } + newtok(); + break; + + default: + if (!parser_is_identchar()) { + rb_compile_error(PARSER_ARG "Invalid char `\\x%02X' in expression", c); + goto retry; + } + + newtok(); + break; + } + + mb = ENC_CODERANGE_7BIT; + do { + if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN; + if (tokadd_mbchar(c) == -1) return 0; + c = nextc(); + } while (parser_is_identchar()); + switch (tok()[0]) { + case '@': case '$': + pushback(c); + break; + default: + if ((c == '!' || c == '?') && !peek('=')) { + tokadd(c); + } + else { + pushback(c); + } + } + tokfix(); + + { + int result = 0; + + last_state = lex_state; + switch (tok()[0]) { + case '$': + lex_state = EXPR_END; + result = tGVAR; + break; + case '@': + lex_state = EXPR_END; + if (tok()[1] == '@') + result = tCVAR; + else + result = tIVAR; + break; + + default: + if (toklast() == '!' || toklast() == '?') { + result = tFID; + } + else { + if (lex_state == EXPR_FNAME) { + if ((c = nextc()) == '=' && !peek('~') && !peek('>') && + (!peek('=') || (lex_p + 1 < lex_pend && lex_p[1] == '>'))) { + result = tIDENTIFIER; + tokadd(c); + tokfix(); + } + else { + pushback(c); + } + } + if (result == 0 && ISUPPER(tok()[0])) { + result = tCONSTANT; + } + else { + result = tIDENTIFIER; + } + } + + if ((lex_state == EXPR_BEG && !cmd_state) || + lex_state == EXPR_ARG || + lex_state == EXPR_CMDARG) { + if (peek(':') && !(lex_p + 1 < lex_pend && lex_p[1] == ':')) { + lex_state = EXPR_BEG; + nextc(); + set_yylval_name(TOK_INTERN(!ENC_SINGLE(mb))); + return tLABEL; + } + } + if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) { + const struct kwtable *kw; + + /* See if it is a reserved word. */ + kw = rb_reserved_word(tok(), toklen()); + if (kw) { + enum lex_state_e state = lex_state; + lex_state = kw->state; + if (state == EXPR_FNAME) { + set_yylval_name(rb_intern(kw->name)); + return kw->id[0]; + } + if (kw->id[0] == keyword_do) { + command_start = Qtrue; + if (lpar_beg && lpar_beg == paren_nest) { + lpar_beg = 0; + --paren_nest; + return keyword_do_LAMBDA; + } + if (COND_P()) return keyword_do_cond; + if (CMDARG_P() && state != EXPR_CMDARG) + return keyword_do_block; + if (state == EXPR_ENDARG || state == EXPR_BEG) + return keyword_do_block; + return keyword_do; + } + if (state == EXPR_BEG || state == EXPR_VALUE) + return kw->id[0]; + else { + if (kw->id[0] != kw->id[1]) + lex_state = EXPR_BEG; + return kw->id[1]; + } + } + } + + if (IS_BEG() || + lex_state == EXPR_DOT || + IS_ARG()) { + if (cmd_state) { + lex_state = EXPR_CMDARG; + } + else { + lex_state = EXPR_ARG; + } + } + else { + lex_state = EXPR_END; + } + } + { + ID ident = TOK_INTERN(!ENC_SINGLE(mb)); + + set_yylval_name(ident); + if (last_state != EXPR_DOT && is_local_id(ident) && lvar_defined(ident)) { + lex_state = EXPR_END; + } + } + return result; + } +} + +#if YYPURE +static int +yylex(void *lval, void *p) +#else +yylex(void *p) +#endif +{ + struct parser_params *parser = (struct parser_params*)p; + int t; + +#if YYPURE + parser->parser_yylval = lval; + parser->parser_yylval->val = Qundef; +#endif + t = parser_yylex(parser); +#ifdef RIPPER + if (!NIL_P(parser->delayed)) { + ripper_dispatch_delayed_token(parser, t); + } + if (t != 0) + ripper_dispatch_scan_event(parser, t); +#endif + + return t; +} + +#ifndef RIPPER +static NODE* +node_newnode(struct parser_params *parser, enum node_type type, VALUE a0, VALUE a1, VALUE a2) +{ + NODE *n = (rb_node_newnode)(type, a0, a1, a2); + nd_set_line(n, ruby_sourceline); + return n; +} + +enum node_type +nodetype(NODE *node) /* for debug */ +{ + return (enum node_type)nd_type(node); +} + +int +nodeline(NODE *node) +{ + return nd_line(node); +} + +static NODE* +newline_node(NODE *node) +{ + if (node) { + node = remove_begin(node); + node->flags |= NODE_FL_NEWLINE; + } + return node; +} + +static void +fixpos(NODE *node, NODE *orig) +{ + if (!node) return; + if (!orig) return; + if (orig == (NODE*)1) return; + nd_set_line(node, nd_line(orig)); +} + +static void +parser_warning(struct parser_params *parser, NODE *node, const char *mesg) +{ + rb_compile_warning(ruby_sourcefile, nd_line(node), "%s", mesg); +} +#define parser_warning(node, mesg) parser_warning(parser, node, mesg) + +static void +parser_warn(struct parser_params *parser, NODE *node, const char *mesg) +{ + rb_compile_warn(ruby_sourcefile, nd_line(node), "%s", mesg); +} +#define parser_warn(node, mesg) parser_warn(parser, node, mesg) + +static NODE* +block_append_gen(struct parser_params *parser, NODE *head, NODE *tail) +{ + NODE *end, *h = head, *nd; + + if (tail == 0) return head; + + if (h == 0) return tail; + switch (nd_type(h)) { + case NODE_LIT: + case NODE_STR: + case NODE_SELF: + case NODE_TRUE: + case NODE_FALSE: + case NODE_NIL: + parser_warning(h, "unused literal ignored"); + return tail; + default: + h = end = NEW_BLOCK(head); + end->nd_end = end; + fixpos(end, head); + head = end; + break; + case NODE_BLOCK: + end = h->nd_end; + break; + } + + nd = end->nd_head; + switch (nd_type(nd)) { + case NODE_RETURN: + case NODE_BREAK: + case NODE_NEXT: + case NODE_REDO: + case NODE_RETRY: + if (RTEST(ruby_verbose)) { + parser_warning(nd, "statement not reached"); + } + break; + + default: + break; + } + + if (nd_type(tail) != NODE_BLOCK) { + tail = NEW_BLOCK(tail); + tail->nd_end = tail; + } + end->nd_next = tail; + h->nd_end = tail->nd_end; + return head; +} + +/* append item to the list */ +static NODE* +list_append_gen(struct parser_params *parser, NODE *list, NODE *item) +{ + NODE *last; + + if (list == 0) return NEW_LIST(item); + if (list->nd_next) { + last = list->nd_next->nd_end; + } + else { + last = list; + } + + list->nd_alen += 1; + last->nd_next = NEW_LIST(item); + list->nd_next->nd_end = last->nd_next; + return list; +} + +/* concat two lists */ +static NODE* +list_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) +{ + NODE *last; + + if (head->nd_next) { + last = head->nd_next->nd_end; + } + else { + last = head; + } + + head->nd_alen += tail->nd_alen; + last->nd_next = tail; + if (tail->nd_next) { + head->nd_next->nd_end = tail->nd_next->nd_end; + } + else { + head->nd_next->nd_end = tail; + } + + return head; +} + +static int +literal_concat0(struct parser_params *parser, VALUE head, VALUE tail) +{ + if (!rb_enc_compatible(head, tail)) { + compile_error(PARSER_ARG "string literal encodings differ (%s / %s)", + rb_enc_name(rb_enc_get(head)), + rb_enc_name(rb_enc_get(tail))); + rb_str_resize(head, 0); + rb_str_resize(tail, 0); + return 0; + } + rb_str_buf_append(head, tail); + return 1; +} + +/* concat two string literals */ +static NODE * +literal_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) +{ + enum node_type htype; + + if (!head) return tail; + if (!tail) return head; + + htype = nd_type(head); + if (htype == NODE_EVSTR) { + NODE *node = NEW_DSTR(STR_NEW0()); + head = list_append(node, head); + } + switch (nd_type(tail)) { + case NODE_STR: + if (htype == NODE_STR) { + if (!literal_concat0(parser, head->nd_lit, tail->nd_lit)) { + error: + rb_gc_force_recycle((VALUE)head); + rb_gc_force_recycle((VALUE)tail); + return 0; + } + rb_gc_force_recycle((VALUE)tail); + } + else { + list_append(head, tail); + } + break; + + case NODE_DSTR: + if (htype == NODE_STR) { + if (!literal_concat0(parser, head->nd_lit, tail->nd_lit)) + goto error; + tail->nd_lit = head->nd_lit; + rb_gc_force_recycle((VALUE)head); + head = tail; + } + else if (NIL_P(tail->nd_lit)) { + list_concat(head, tail->nd_next); + rb_gc_force_recycle((VALUE)tail); + } + else { + nd_set_type(tail, NODE_ARRAY); + tail->nd_head = NEW_STR(tail->nd_lit); + list_concat(head, tail); + } + break; + + case NODE_EVSTR: + if (htype == NODE_STR) { + nd_set_type(head, NODE_DSTR); + head->nd_alen = 1; + } + list_append(head, tail); + break; + } + return head; +} + +static NODE * +evstr2dstr_gen(struct parser_params *parser, NODE *node) +{ + if (nd_type(node) == NODE_EVSTR) { + node = list_append(NEW_DSTR(STR_NEW0()), node); + } + return node; +} + +static NODE * +new_evstr_gen(struct parser_params *parser, NODE *node) +{ + NODE *head = node; + + if (node) { + switch (nd_type(node)) { + case NODE_STR: case NODE_DSTR: case NODE_EVSTR: + return node; + } + } + return NEW_EVSTR(head); +} + +static NODE * +call_bin_op_gen(struct parser_params *parser, NODE *recv, ID id, NODE *arg1) +{ + value_expr(recv); + value_expr(arg1); + return NEW_CALL(recv, id, NEW_LIST(arg1)); +} + +static NODE * +call_uni_op_gen(struct parser_params *parser, NODE *recv, ID id) +{ + value_expr(recv); + return NEW_CALL(recv, id, 0); +} + +static NODE* +match_op_gen(struct parser_params *parser, NODE *node1, NODE *node2) +{ + value_expr(node1); + value_expr(node2); + if (node1) { + switch (nd_type(node1)) { + case NODE_DREGX: + case NODE_DREGX_ONCE: + return NEW_MATCH2(node1, node2); + + case NODE_LIT: + if (TYPE(node1->nd_lit) == T_REGEXP) { + return NEW_MATCH2(node1, node2); + } + } + } + + if (node2) { + switch (nd_type(node2)) { + case NODE_DREGX: + case NODE_DREGX_ONCE: + return NEW_MATCH3(node2, node1); + + case NODE_LIT: + if (TYPE(node2->nd_lit) == T_REGEXP) { + return NEW_MATCH3(node2, node1); + } + } + } + + return NEW_CALL(node1, tMATCH, NEW_LIST(node2)); +} + +static NODE* +gettable_gen(struct parser_params *parser, ID id) +{ + if (id == keyword_self) { + return NEW_SELF(); + } + else if (id == keyword_nil) { + return NEW_NIL(); + } + else if (id == keyword_true) { + return NEW_TRUE(); + } + else if (id == keyword_false) { + return NEW_FALSE(); + } + else if (id == keyword__FILE__) { + return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile), + rb_filesystem_encoding())); + } + else if (id == keyword__LINE__) { + return NEW_LIT(INT2FIX(ruby_sourceline)); + } + else if (id == keyword__ENCODING__) { + return NEW_LIT(rb_enc_from_encoding(parser->enc)); + } + else if (is_local_id(id)) { + if (dyna_in_block() && dvar_defined(id)) return NEW_DVAR(id); + if (local_id(id)) return NEW_LVAR(id); + /* method call without arguments */ + return NEW_VCALL(id); + } + else if (is_global_id(id)) { + return NEW_GVAR(id); + } + else if (is_instance_id(id)) { + return NEW_IVAR(id); + } + else if (is_const_id(id)) { + return NEW_CONST(id); + } + else if (is_class_id(id)) { + return NEW_CVAR(id); + } + compile_error(PARSER_ARG "identifier %s is not valid to get", rb_id2name(id)); + return 0; +} +#endif /* !RIPPER */ + +#ifdef RIPPER +static VALUE +assignable_gen(struct parser_params *parser, VALUE lhs) +#else +static NODE* +assignable_gen(struct parser_params *parser, ID id, NODE *val) +#endif +{ +#ifdef RIPPER + ID id = get_id(lhs); +# define RETURN(x) return get_value(lhs) +# define ERROR(x) dispatch1(assign_error, lhs) +#else +# define RETURN(x) return x +# define ERROR(x) yyerror(x) +#endif + if (!id) RETURN(0); + if (id == keyword_self) { + ERROR("Can't change the value of self"); + } + else if (id == keyword_nil) { + ERROR("Can't assign to nil"); + } + else if (id == keyword_true) { + ERROR("Can't assign to true"); + } + else if (id == keyword_false) { + ERROR("Can't assign to false"); + } + else if (id == keyword__FILE__) { + ERROR("Can't assign to __FILE__"); + } + else if (id == keyword__LINE__) { + ERROR("Can't assign to __LINE__"); + } + else if (id == keyword__ENCODING__) { + ERROR("Can't assign to __ENCODING__"); + } + else if (is_local_id(id)) { + if (dyna_in_block()) { + if (dvar_curr(id)) { + RETURN(NEW_DASGN_CURR(id, val)); + } + else if (dvar_defined(id)) { + RETURN(NEW_DASGN(id, val)); + } + else if (local_id(id)) { + RETURN(NEW_LASGN(id, val)); + } + else { + dyna_var(id); + RETURN(NEW_DASGN_CURR(id, val)); + } + } + else { + if (!local_id(id)) { + local_var(id); + } + RETURN(NEW_LASGN(id, val)); + } + } + else if (is_global_id(id)) { + RETURN(NEW_GASGN(id, val)); + } + else if (is_instance_id(id)) { + RETURN(NEW_IASGN(id, val)); + } + else if (is_const_id(id)) { + if (!in_def && !in_single) + RETURN(NEW_CDECL(id, val, 0)); + ERROR("dynamic constant assignment"); + } + else if (is_class_id(id)) { + RETURN(NEW_CVASGN(id, val)); + } + else { + compile_error(PARSER_ARG "identifier %s is not valid to set", rb_id2name(id)); + } + RETURN(0); +#undef RETURN +#undef ERROR +} + +static ID +shadowing_lvar_gen(struct parser_params *parser, ID name) +{ + ID uscore; + + CONST_ID(uscore, "_"); + if (uscore == name) return name; + if (dyna_in_block()) { + if (dvar_curr(name)) { + yyerror("duplicated argument name"); + } + else if (dvar_defined(name) || local_id(name)) { + rb_warningS("shadowing outer local variable - %s", rb_id2name(name)); + vtable_add(lvtbl->vars, name); + } + } + else { + if (local_id(name)) { + yyerror("duplicated argument name"); + } + } + return name; +} + +static void +new_bv_gen(struct parser_params *parser, ID name) +{ + if (!name) return; + if (!is_local_id(name)) { + compile_error(PARSER_ARG "invalid local variable - %s", + rb_id2name(name)); + return; + } + shadowing_lvar(name); + dyna_var(name); +} + +#ifndef RIPPER +static NODE * +aryset_gen(struct parser_params *parser, NODE *recv, NODE *idx) +{ + if (recv && nd_type(recv) == NODE_SELF) + recv = (NODE *)1; + return NEW_ATTRASGN(recv, tASET, idx); +} + +static void +block_dup_check_gen(struct parser_params *parser, NODE *node1, NODE *node2) +{ + if (node2 && node1 && nd_type(node1) == NODE_BLOCK_PASS) { + compile_error(PARSER_ARG "both block arg and actual block given"); + } +} + +ID +rb_id_attrset(ID id) +{ + id &= ~ID_SCOPE_MASK; + id |= ID_ATTRSET; + return id; +} + +static NODE * +attrset_gen(struct parser_params *parser, NODE *recv, ID id) +{ + if (recv && nd_type(recv) == NODE_SELF) + recv = (NODE *)1; + return NEW_ATTRASGN(recv, rb_id_attrset(id), 0); +} + +static void +rb_backref_error_gen(struct parser_params *parser, NODE *node) +{ + switch (nd_type(node)) { + case NODE_NTH_REF: + compile_error(PARSER_ARG "Can't set variable $%ld", node->nd_nth); + break; + case NODE_BACK_REF: + compile_error(PARSER_ARG "Can't set variable $%c", (int)node->nd_nth); + break; + } +} + +static NODE * +arg_concat_gen(struct parser_params *parser, NODE *node1, NODE *node2) +{ + if (!node2) return node1; + switch (nd_type(node1)) { + case NODE_BLOCK_PASS: + node1->nd_iter = arg_concat(node1->nd_iter, node2); + return node1; + case NODE_ARGSPUSH: + if (nd_type(node2) != NODE_ARRAY) break; + node1->nd_body = list_concat(NEW_LIST(node1->nd_body), node2); + nd_set_type(node1, NODE_ARGSCAT); + return node1; + case NODE_ARGSCAT: + if (nd_type(node2) != NODE_ARRAY || + nd_type(node1->nd_body) != NODE_ARRAY) break; + node1->nd_body = list_concat(node1->nd_body, node2); + return node1; + } + return NEW_ARGSCAT(node1, node2); +} + +static NODE * +arg_append_gen(struct parser_params *parser, NODE *node1, NODE *node2) +{ + if (!node1) return NEW_LIST(node2); + switch (nd_type(node1)) { + case NODE_ARRAY: + return list_append(node1, node2); + case NODE_BLOCK_PASS: + node1->nd_head = arg_append(node1->nd_head, node2); + return node1; + case NODE_ARGSPUSH: + node1->nd_body = list_append(NEW_LIST(node1->nd_body), node2); + nd_set_type(node1, NODE_ARGSCAT); + return node1; + } + return NEW_ARGSPUSH(node1, node2); +} + +static NODE * +splat_array(NODE* node) +{ + if (nd_type(node) == NODE_SPLAT) node = node->nd_head; + if (nd_type(node) == NODE_ARRAY) return node; + return 0; +} + +static NODE * +node_assign_gen(struct parser_params *parser, NODE *lhs, NODE *rhs) +{ + if (!lhs) return 0; + + switch (nd_type(lhs)) { + case NODE_GASGN: + case NODE_IASGN: + case NODE_IASGN2: + case NODE_LASGN: + case NODE_DASGN: + case NODE_DASGN_CURR: + case NODE_MASGN: + case NODE_CDECL: + case NODE_CVASGN: + lhs->nd_value = rhs; + break; + + case NODE_ATTRASGN: + case NODE_CALL: + lhs->nd_args = arg_append(lhs->nd_args, rhs); + break; + + default: + /* should not happen */ + break; + } + + return lhs; +} + +static int +value_expr_gen(struct parser_params *parser, NODE *node) +{ + int cond = 0; + + if (!node) { + rb_warning0("empty expression"); + } + while (node) { + switch (nd_type(node)) { + case NODE_DEFN: + case NODE_DEFS: + parser_warning(node, "void value expression"); + return Qfalse; + + case NODE_RETURN: + case NODE_BREAK: + case NODE_NEXT: + case NODE_REDO: + case NODE_RETRY: + if (!cond) yyerror("void value expression"); + /* or "control never reach"? */ + return Qfalse; + + case NODE_BLOCK: + while (node->nd_next) { + node = node->nd_next; + } + node = node->nd_head; + break; + + case NODE_BEGIN: + node = node->nd_body; + break; + + case NODE_IF: + if (!node->nd_body) { + node = node->nd_else; + break; + } + else if (!node->nd_else) { + node = node->nd_body; + break; + } + if (!value_expr(node->nd_body)) return Qfalse; + node = node->nd_else; + break; + + case NODE_AND: + case NODE_OR: + cond = 1; + node = node->nd_2nd; + break; + + default: + return Qtrue; + } + } + + return Qtrue; +} + +static void +void_expr_gen(struct parser_params *parser, NODE *node) +{ + const char *useless = 0; + + if (!RTEST(ruby_verbose)) return; + + if (!node) return; + switch (nd_type(node)) { + case NODE_CALL: + switch (node->nd_mid) { + case '+': + case '-': + case '*': + case '/': + case '%': + case tPOW: + case tUPLUS: + case tUMINUS: + case '|': + case '^': + case '&': + case tCMP: + case '>': + case tGEQ: + case '<': + case tLEQ: + case tEQ: + case tNEQ: + useless = rb_id2name(node->nd_mid); + break; + } + break; + + case NODE_LVAR: + case NODE_DVAR: + case NODE_GVAR: + case NODE_IVAR: + case NODE_CVAR: + case NODE_NTH_REF: + case NODE_BACK_REF: + useless = "a variable"; + break; + case NODE_CONST: + useless = "a constant"; + break; + case NODE_LIT: + case NODE_STR: + case NODE_DSTR: + case NODE_DREGX: + case NODE_DREGX_ONCE: + useless = "a literal"; + break; + case NODE_COLON2: + case NODE_COLON3: + useless = "::"; + break; + case NODE_DOT2: + useless = ".."; + break; + case NODE_DOT3: + useless = "..."; + break; + case NODE_SELF: + useless = "self"; + break; + case NODE_NIL: + useless = "nil"; + break; + case NODE_TRUE: + useless = "true"; + break; + case NODE_FALSE: + useless = "false"; + break; + case NODE_DEFINED: + useless = "defined?"; + break; + } + + if (useless) { + int line = ruby_sourceline; + + ruby_sourceline = nd_line(node); + rb_warnS("useless use of %s in void context", useless); + ruby_sourceline = line; + } +} + +static void +void_stmts_gen(struct parser_params *parser, NODE *node) +{ + if (!RTEST(ruby_verbose)) return; + if (!node) return; + if (nd_type(node) != NODE_BLOCK) return; + + for (;;) { + if (!node->nd_next) return; + void_expr0(node->nd_head); + node = node->nd_next; + } +} + +static NODE * +remove_begin(NODE *node) +{ + NODE **n = &node, *n1 = node; + while (n1 && nd_type(n1) == NODE_BEGIN && n1->nd_body) { + *n = n1 = n1->nd_body; + } + return node; +} + +static void +reduce_nodes_gen(struct parser_params *parser, NODE **body) +{ + NODE *node = *body; + + if (!node) { + *body = NEW_NIL(); + return; + } +#define subnodes(n1, n2) \ + ((!node->n1) ? (node->n2 ? (body = &node->n2, 1) : 0) : \ + (!node->n2) ? (body = &node->n1, 1) : \ + (reduce_nodes(&node->n1), body = &node->n2, 1)) + + while (node) { + int newline = node->flags & NODE_FL_NEWLINE; + switch (nd_type(node)) { + end: + case NODE_NIL: + *body = 0; + return; + case NODE_RETURN: + *body = node = node->nd_stts; + if (newline && node) node->flags |= NODE_FL_NEWLINE; + continue; + case NODE_BEGIN: + *body = node = node->nd_body; + if (newline && node) node->flags |= NODE_FL_NEWLINE; + continue; + case NODE_BLOCK: + body = &node->nd_end->nd_head; + break; + case NODE_IF: + if (subnodes(nd_body, nd_else)) break; + return; + case NODE_CASE: + body = &node->nd_body; + break; + case NODE_WHEN: + if (!subnodes(nd_body, nd_next)) goto end; + break; + case NODE_ENSURE: + if (!subnodes(nd_head, nd_resq)) goto end; + break; + case NODE_RESCUE: + if (!subnodes(nd_head, nd_resq)) goto end; + break; + default: + return; + } + node = *body; + if (newline && node) node->flags |= NODE_FL_NEWLINE; + } + +#undef subnodes +} + +static int +assign_in_cond(struct parser_params *parser, NODE *node) +{ + switch (nd_type(node)) { + case NODE_MASGN: + yyerror("multiple assignment in conditional"); + return 1; + + case NODE_LASGN: + case NODE_DASGN: + case NODE_GASGN: + case NODE_IASGN: + break; + + default: + return 0; + } + + switch (nd_type(node->nd_value)) { + case NODE_LIT: + case NODE_STR: + case NODE_NIL: + case NODE_TRUE: + case NODE_FALSE: + /* reports always */ + parser_warn(node->nd_value, "found = in conditional, should be =="); + return 1; + + case NODE_DSTR: + case NODE_XSTR: + case NODE_DXSTR: + case NODE_EVSTR: + case NODE_DREGX: + default: + break; + } + return 1; +} + +static void +warn_unless_e_option(struct parser_params *parser, NODE *node, const char *str) +{ + if (!e_option_supplied(parser)) parser_warn(node, str); +} + +static void +warning_unless_e_option(struct parser_params *parser, NODE *node, const char *str) +{ + if (!e_option_supplied(parser)) parser_warning(node, str); +} + +static void +fixup_nodes(NODE **rootnode) +{ + NODE *node, *next, *head; + + for (node = *rootnode; node; node = next) { + enum node_type type; + VALUE val; + + next = node->nd_next; + head = node->nd_head; + rb_gc_force_recycle((VALUE)node); + *rootnode = next; + switch (type = nd_type(head)) { + case NODE_DOT2: + case NODE_DOT3: + val = rb_range_new(head->nd_beg->nd_lit, head->nd_end->nd_lit, + type == NODE_DOT3 ? Qtrue : Qfalse); + rb_gc_force_recycle((VALUE)head->nd_beg); + rb_gc_force_recycle((VALUE)head->nd_end); + nd_set_type(head, NODE_LIT); + head->nd_lit = val; + break; + default: + break; + } + } +} + +static NODE *cond0(struct parser_params*,NODE*); + +static NODE* +range_op(struct parser_params *parser, NODE *node) +{ + enum node_type type; + + if (node == 0) return 0; + + type = nd_type(node); + value_expr(node); + if (type == NODE_LIT && FIXNUM_P(node->nd_lit)) { + warn_unless_e_option(parser, node, "integer literal in conditional range"); + return NEW_CALL(node, tEQ, NEW_LIST(NEW_GVAR(rb_intern("$.")))); + } + return cond0(parser, node); +} + +static int +literal_node(NODE *node) +{ + if (!node) return 1; /* same as NODE_NIL */ + switch (nd_type(node)) { + case NODE_LIT: + case NODE_STR: + case NODE_DSTR: + case NODE_EVSTR: + case NODE_DREGX: + case NODE_DREGX_ONCE: + case NODE_DSYM: + return 2; + case NODE_TRUE: + case NODE_FALSE: + case NODE_NIL: + return 1; + } + return 0; +} + +static NODE* +cond0(struct parser_params *parser, NODE *node) +{ + if (node == 0) return 0; + assign_in_cond(parser, node); + + switch (nd_type(node)) { + case NODE_DSTR: + case NODE_EVSTR: + case NODE_STR: + rb_warn0("string literal in condition"); + break; + + case NODE_DREGX: + case NODE_DREGX_ONCE: + warning_unless_e_option(parser, node, "regex literal in condition"); + return NEW_MATCH2(node, NEW_GVAR(rb_intern("$_"))); + + case NODE_AND: + case NODE_OR: + node->nd_1st = cond0(parser, node->nd_1st); + node->nd_2nd = cond0(parser, node->nd_2nd); + break; + + case NODE_DOT2: + case NODE_DOT3: + node->nd_beg = range_op(parser, node->nd_beg); + node->nd_end = range_op(parser, node->nd_end); + if (nd_type(node) == NODE_DOT2) nd_set_type(node,NODE_FLIP2); + else if (nd_type(node) == NODE_DOT3) nd_set_type(node, NODE_FLIP3); + if (!e_option_supplied(parser)) { + int b = literal_node(node->nd_beg); + int e = literal_node(node->nd_end); + if ((b == 1 && e == 1) || (b + e >= 2 && RTEST(ruby_verbose))) { + parser_warn(node, "range literal in condition"); + } + } + break; + + case NODE_DSYM: + parser_warning(node, "literal in condition"); + break; + + case NODE_LIT: + if (TYPE(node->nd_lit) == T_REGEXP) { + warn_unless_e_option(parser, node, "regex literal in condition"); + nd_set_type(node, NODE_MATCH); + } + else { + parser_warning(node, "literal in condition"); + } + default: + break; + } + return node; +} + +static NODE* +cond_gen(struct parser_params *parser, NODE *node) +{ + if (node == 0) return 0; + return cond0(parser, node); +} + +static NODE* +logop_gen(struct parser_params *parser, enum node_type type, NODE *left, NODE *right) +{ + value_expr(left); + if (left && nd_type(left) == type) { + NODE *node = left, *second; + while ((second = node->nd_2nd) != 0 && nd_type(second) == type) { + node = second; + } + node->nd_2nd = NEW_NODE(type, second, right, 0); + return left; + } + return NEW_NODE(type, left, right, 0); +} + +static void +no_blockarg(struct parser_params *parser, NODE *node) +{ + if (node && nd_type(node) == NODE_BLOCK_PASS) { + compile_error(PARSER_ARG "block argument should not be given"); + } +} + +static NODE * +ret_args_gen(struct parser_params *parser, NODE *node) +{ + if (node) { + no_blockarg(parser, node); + if (nd_type(node) == NODE_ARRAY) { + if (node->nd_next == 0) { + node = node->nd_head; + } + else { + nd_set_type(node, NODE_VALUES); + } + } + } + return node; +} + +static NODE * +new_yield_gen(struct parser_params *parser, NODE *node) +{ + long state = Qtrue; + + if (node) { + no_blockarg(parser, node); + if (node && nd_type(node) == NODE_SPLAT) { + state = Qtrue; + } + } + else { + state = Qfalse; + } + return NEW_YIELD(node, state); +} + +static NODE* +negate_lit(NODE *node) +{ + switch (TYPE(node->nd_lit)) { + case T_FIXNUM: + node->nd_lit = LONG2FIX(-FIX2LONG(node->nd_lit)); + break; + case T_BIGNUM: + node->nd_lit = rb_funcall(node->nd_lit,tUMINUS,0,0); + break; + case T_FLOAT: + RFLOAT(node->nd_lit)->float_value = -RFLOAT_VALUE(node->nd_lit); + break; + default: + break; + } + return node; +} + +static NODE * +arg_blk_pass(NODE *node1, NODE *node2) +{ + if (node2) { + node2->nd_head = node1; + return node2; + } + return node1; +} + +static NODE* +new_args_gen(struct parser_params *parser, NODE *m, NODE *o, ID r, NODE *p, ID b) +{ + int saved_line = ruby_sourceline; + NODE *node; + NODE *i1, *i2 = 0; + + node = NEW_ARGS(m ? m->nd_plen : 0, o); + i1 = m ? m->nd_next : 0; + node->nd_next = NEW_ARGS_AUX(r, b); + + if (p) { + i2 = p->nd_next; + node->nd_next->nd_next = NEW_ARGS_AUX(p->nd_pid, p->nd_plen); + } + else if (i1) { + node->nd_next->nd_next = NEW_ARGS_AUX(0, 0); + } + if (i1 || i2) { + node->nd_next->nd_next->nd_next = NEW_NODE(NODE_AND, i1, i2, 0); + } + ruby_sourceline = saved_line; + return node; +} +#endif /* !RIPPER */ + +static void +local_push_gen(struct parser_params *parser, int inherit_dvars) +{ + struct local_vars *local; + + local = ALLOC(struct local_vars); + local->prev = lvtbl; + local->args = vtable_alloc(0); + local->vars = vtable_alloc(inherit_dvars ? DVARS_INHERIT : DVARS_TOPSCOPE); + lvtbl = local; +} + +static void +local_pop_gen(struct parser_params *parser) +{ + struct local_vars *local = lvtbl->prev; + vtable_free(lvtbl->args); + vtable_free(lvtbl->vars); + xfree(lvtbl); + lvtbl = local; +} + +#ifndef RIPPER +static ID* +vtable_tblcpy(ID *buf, const struct vtable *src) +{ + int i, cnt = vtable_size(src); + + if (cnt > 0) { + buf[0] = cnt; + for (i = 0; i < cnt; i++) { + buf[i] = src->tbl[i]; + } + return buf; + } + return 0; +} + +static ID* +local_tbl_gen(struct parser_params *parser) +{ + int cnt = vtable_size(lvtbl->args) + vtable_size(lvtbl->vars); + ID *buf; + + if (cnt <= 0) return 0; + buf = ALLOC_N(ID, cnt + 1); + vtable_tblcpy(buf+1, lvtbl->args); + vtable_tblcpy(buf+vtable_size(lvtbl->args)+1, lvtbl->vars); + buf[0] = cnt; + return buf; +} +#endif + +static int +arg_var_gen(struct parser_params *parser, ID id) +{ + vtable_add(lvtbl->args, id); + return vtable_size(lvtbl->args) - 1; +} + +static int +local_var_gen(struct parser_params *parser, ID id) +{ + vtable_add(lvtbl->vars, id); + return vtable_size(lvtbl->vars) - 1; +} + +static int +local_id_gen(struct parser_params *parser, ID id) +{ + struct vtable *vars, *args; + + vars = lvtbl->vars; + args = lvtbl->args; + + while (vars && POINTER_P(vars->prev)) { + vars = vars->prev; + args = args->prev; + } + + if (vars && vars->prev == DVARS_INHERIT) { + return rb_local_defined(id); + } + else { + return (vtable_included(args, id) || + vtable_included(vars, id)); + } +} + +static const struct vtable * +dyna_push_gen(struct parser_params *parser) +{ + lvtbl->args = vtable_alloc(lvtbl->args); + lvtbl->vars = vtable_alloc(lvtbl->vars); + return lvtbl->args; +} + +static void +dyna_pop_gen(struct parser_params *parser, const struct vtable *lvargs) +{ + struct vtable *tmp; + + while (lvtbl->args != lvargs) { + local_pop(); + } + tmp = lvtbl->args; + lvtbl->args = lvtbl->args->prev; + vtable_free(tmp); + tmp = lvtbl->vars; + lvtbl->vars = lvtbl->vars->prev; + vtable_free(tmp); +} + +static int +dyna_in_block_gen(struct parser_params *parser) +{ + return POINTER_P(lvtbl->vars) && lvtbl->vars->prev != DVARS_TOPSCOPE; +} + +static int +dvar_defined_gen(struct parser_params *parser, ID id) +{ + struct vtable *vars, *args; + + args = lvtbl->args; + vars = lvtbl->vars; + + while (POINTER_P(vars)) { + if (vtable_included(args, id)) { + return 1; + } + if (vtable_included(vars, id)) { + return 1; + } + args = args->prev; + vars = vars->prev; + } + + if (vars == DVARS_INHERIT) { + return rb_dvar_defined(id); + } + + return 0; +} + +static int +dvar_curr_gen(struct parser_params *parser, ID id) +{ + return (vtable_included(lvtbl->args, id) || + vtable_included(lvtbl->vars, id)); +} + +#ifndef RIPPER +VALUE rb_reg_compile(VALUE str, int options); +VALUE rb_reg_check_preprocess(VALUE); + +static void +reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) +{ + int c = RE_OPTION_ENCODING_IDX(options); + + if (c) { + int opt, idx; + rb_char_to_option_kcode(c, &opt, &idx); + if (idx != ENCODING_GET(str) && + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + goto error; + } + ENCODING_SET(str, idx); + } + else if (RE_OPTION_ENCODING_NONE(options)) { + if (!ENCODING_IS_ASCII8BIT(str) && + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + c = 'n'; + goto error; + } + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + else if (parser->enc == rb_usascii_encoding()) { + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + /* raise in re.c */ + rb_enc_associate(str, rb_usascii_encoding()); + } + else { + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + } + return; + + error: + compile_error(PARSER_ARG + "regexp encoding option '%c' differs from source encoding '%s'", + c, rb_enc_name(rb_enc_get(str))); +} + +static void +reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options) +{ + VALUE err; + reg_fragment_setenc_gen(parser, str, options); + err = rb_reg_check_preprocess(str); + if (err != Qnil) { + err = rb_obj_as_string(err); + compile_error(PARSER_ARG "%s", RSTRING_PTR(err)); + RB_GC_GUARD(err); + } +} + +typedef struct { + struct parser_params* parser; + rb_encoding *enc; + NODE *succ_block; + NODE *fail_block; + int num; +} reg_named_capture_assign_t; + +int rb_enc_symname2_p(const char *name, int len, rb_encoding *enc); +static int +reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg0) +{ + reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0; + struct parser_params* parser = arg->parser; + rb_encoding *enc = arg->enc; + int len = name_end - name; + const char *s = (const char *)name; + ID var; + + arg->num++; + + if (arg->succ_block == 0) { + arg->succ_block = NEW_BEGIN(0); + arg->fail_block = NEW_BEGIN(0); + } + + if (!len || (*name != '_' && ISASCII(*name) && !rb_enc_islower(*name, enc)) || + rb_reserved_word(s, len) || !rb_enc_symname2_p(s, len, enc)) { + return ST_CONTINUE; + } + var = rb_intern3(s, len, enc); + if (dvar_defined(var) || local_id(var)) { + rb_warningS("named capture conflicts a local variable - %s", + rb_id2name(var)); + } + arg->succ_block = block_append(arg->succ_block, + newline_node(node_assign(assignable(var,0), + NEW_CALL( + gettable(rb_intern("$~")), + idAREF, + NEW_LIST(NEW_LIT(ID2SYM(var)))) + ))); + arg->fail_block = block_append(arg->fail_block, + newline_node(node_assign(assignable(var,0), NEW_LIT(Qnil)))); + return ST_CONTINUE; +} + +static NODE * +reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match) +{ + reg_named_capture_assign_t arg; + + arg.parser = parser; + arg.enc = rb_enc_get(regexp); + arg.succ_block = 0; + arg.fail_block = 0; + arg.num = 0; + onig_foreach_name(RREGEXP(regexp)->ptr, reg_named_capture_assign_iter, (void*)&arg); + + if (arg.num == 0) + return match; + + return + block_append( + newline_node(match), + NEW_IF(gettable(rb_intern("$~")), + block_append( + newline_node(arg.succ_block), + newline_node( + NEW_CALL( + gettable(rb_intern("$~")), + rb_intern("begin"), + NEW_LIST(NEW_LIT(INT2FIX(0)))))), + block_append( + newline_node(arg.fail_block), + newline_node( + NEW_LIT(Qnil))))); +} + +static VALUE +reg_compile_gen(struct parser_params* parser, VALUE str, int options) +{ + VALUE re; + VALUE err; + + reg_fragment_setenc(str, options); + err = rb_errinfo(); + re = rb_reg_compile(str, options & RE_OPTION_MASK); + if (NIL_P(re)) { + ID mesg = rb_intern("mesg"); + VALUE m = rb_attr_get(rb_errinfo(), mesg); + rb_set_errinfo(err); + if (!NIL_P(err)) { + rb_str_append(rb_str_cat(rb_attr_get(err, mesg), "\n", 1), m); + } + else { + compile_error(PARSER_ARG "%s", RSTRING_PTR(m)); + } + return Qnil; + } + return re; +} + +void +rb_gc_mark_parser(void) +{ +} + +NODE* +rb_parser_append_print(VALUE vparser, NODE *node) +{ + NODE *prelude = 0; + NODE *scope = node; + struct parser_params *parser; + + if (!node) return node; + + Data_Get_Struct(vparser, struct parser_params, parser); + + node = node->nd_body; + + if (nd_type(node) == NODE_PRELUDE) { + prelude = node; + node = node->nd_body; + } + + node = block_append(node, + NEW_FCALL(rb_intern("print"), + NEW_ARRAY(NEW_GVAR(rb_intern("$_"))))); + if (prelude) { + prelude->nd_body = node; + scope->nd_body = prelude; + } + else { + scope->nd_body = node; + } + + return scope; +} + +NODE * +rb_parser_while_loop(VALUE vparser, NODE *node, int chop, int split) +{ + NODE *prelude = 0; + NODE *scope = node; + struct parser_params *parser; + + if (!node) return node; + + Data_Get_Struct(vparser, struct parser_params, parser); + + node = node->nd_body; + + if (nd_type(node) == NODE_PRELUDE) { + prelude = node; + node = node->nd_body; + } + if (split) { + node = block_append(NEW_GASGN(rb_intern("$F"), + NEW_CALL(NEW_GVAR(rb_intern("$_")), + rb_intern("split"), 0)), + node); + } + if (chop) { + node = block_append(NEW_CALL(NEW_GVAR(rb_intern("$_")), + rb_intern("chop!"), 0), node); + } + + node = NEW_OPT_N(node); + + if (prelude) { + prelude->nd_body = node; + scope->nd_body = prelude; + } + else { + scope->nd_body = node; + } + + return scope; +} + +static const struct { + ID token; + const char *name; +} op_tbl[] = { + {tDOT2, ".."}, + {tDOT3, "..."}, + {'+', "+(binary)"}, + {'-', "-(binary)"}, + {tPOW, "**"}, + {tUPLUS, "+@"}, + {tUMINUS, "-@"}, + {tCMP, "<=>"}, + {tGEQ, ">="}, + {tLEQ, "<="}, + {tEQ, "=="}, + {tEQQ, "==="}, + {tNEQ, "!="}, + {tMATCH, "=~"}, + {tNMATCH, "!~"}, + {tAREF, "[]"}, + {tASET, "[]="}, + {tLSHFT, "<<"}, + {tRSHFT, ">>"}, + {tCOLON2, "::"}, +}; + +#define op_tbl_count (sizeof(op_tbl) / sizeof(op_tbl[0])) + +static struct symbols { + ID last_id; + st_table *sym_id; + st_table *id_str; + st_table *ivar2_id; + st_table *id_ivar2; + VALUE op_sym[tLAST_TOKEN]; +} global_symbols = {tLAST_ID}; + +static const struct st_hash_type symhash = { + rb_str_hash_cmp, + rb_str_hash, +}; + +struct ivar2_key { + ID id; + VALUE klass; +}; + +static int +ivar2_cmp(struct ivar2_key *key1, struct ivar2_key *key2) +{ + if (key1->id == key2->id && key1->klass == key2->klass) { + return 0; + } + return 1; +} + +static int +ivar2_hash(struct ivar2_key *key) +{ + return (key->id << 8) ^ (key->klass >> 2); +} + +static const struct st_hash_type ivar2_hash_type = { + ivar2_cmp, + ivar2_hash, +}; + +void +Init_sym(void) +{ + global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); + global_symbols.id_str = st_init_numtable_with_size(1000); + global_symbols.ivar2_id = st_init_table_with_size(&ivar2_hash_type, 1000); + global_symbols.id_ivar2 = st_init_numtable_with_size(1000); + + Init_id(); +} + +void +rb_gc_mark_symbols(void) +{ + rb_mark_tbl(global_symbols.id_str); + rb_gc_mark_locations(global_symbols.op_sym, + global_symbols.op_sym + tLAST_TOKEN); +} +#endif /* !RIPPER */ + +static ID +internal_id_gen(struct parser_params *parser) +{ + ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->vars); + id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1; + return ID_INTERNAL | (id << ID_SCOPE_SHIFT); +} + +#ifndef RIPPER +static int +is_special_global_name(const char *m, const char *e, rb_encoding *enc) +{ + int mb = 0; + + if (m >= e) return 0; + switch (*m) { + case '~': case '*': case '$': case '?': case '!': case '@': + case '/': case '\\': case ';': case ',': case '.': case '=': + case ':': case '<': case '>': case '\"': + case '&': case '`': case '\'': case '+': + case '0': + ++m; + break; + case '-': + ++m; + if (m < e && is_identchar(m, e, enc)) { + if (!ISASCII(*m)) mb = 1; + m += rb_enc_mbclen(m, e, enc); + } + break; + default: + if (!rb_enc_isdigit(*m, enc)) return 0; + do { + if (!ISASCII(*m)) mb = 1; + ++m; + } while (rb_enc_isdigit(*m, enc)); + } + return m == e ? mb + 1 : 0; +} + +int +rb_symname_p(const char *name) +{ + return rb_enc_symname_p(name, rb_ascii8bit_encoding()); +} + +int +rb_enc_symname_p(const char *name, rb_encoding *enc) +{ + return rb_enc_symname2_p(name, strlen(name), enc); +} + +int +rb_enc_symname2_p(const char *name, int len, rb_encoding *enc) +{ + const char *m = name; + const char *e = m + len; + int localid = Qfalse; + + if (!m) return Qfalse; + switch (*m) { + case '\0': + return Qfalse; + + case '$': + if (is_special_global_name(++m, e, enc)) return Qtrue; + goto id; + + case '@': + if (*++m == '@') ++m; + goto id; + + case '<': + switch (*++m) { + case '<': ++m; break; + case '=': if (*++m == '>') ++m; break; + default: break; + } + break; + + case '>': + switch (*++m) { + case '>': case '=': ++m; break; + } + break; + + case '=': + switch (*++m) { + case '~': ++m; break; + case '=': if (*++m == '=') ++m; break; + default: return Qfalse; + } + break; + + case '*': + if (*++m == '*') ++m; + break; + + case '+': case '-': + if (*++m == '@') ++m; + break; + + case '|': case '^': case '&': case '/': case '%': case '~': case '`': + ++m; + break; + + case '[': + if (*++m != ']') return Qfalse; + if (*++m == '=') ++m; + break; + + case '!': + switch (*++m) { + case '\0': return Qtrue; + case '=': case '~': ++m; break; + default: return Qfalse; + } + break; + + default: + localid = !rb_enc_isupper(*m, enc); + id: + if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m))) + return Qfalse; + while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); + if (localid) { + switch (*m) { + case '!': case '?': case '=': ++m; + } + } + break; + } + return m == e; +} + +static ID +register_symid(ID id, const char *name, long len, rb_encoding *enc) +{ + VALUE str = rb_enc_str_new(name, len, enc); + OBJ_FREEZE(str); + st_add_direct(global_symbols.sym_id, (st_data_t)str, id); + st_add_direct(global_symbols.id_str, id, (st_data_t)str); + return id; +} + +ID +rb_intern3(const char *name, long len, rb_encoding *enc) +{ + const char *m = name; + const char *e = m + len; + unsigned char c; + VALUE str; + ID id; + int last; + int mb; + struct RString fake_str; + fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE; + fake_str.basic.klass = rb_cString; + fake_str.as.heap.len = len; + fake_str.as.heap.ptr = (char *)name; + fake_str.as.heap.aux.capa = len; + str = (VALUE)&fake_str; + rb_enc_associate(str, enc); + + if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id)) + return id; + + if (rb_cString && !rb_enc_asciicompat(enc)) { + id = ID_JUNK; + goto new_id; + } + last = len-1; + id = 0; + switch (*m) { + case '$': + id |= ID_GLOBAL; + if ((mb = is_special_global_name(++m, e, enc)) != 0) { + if (!--mb) enc = rb_ascii8bit_encoding(); + goto new_id; + } + break; + case '@': + if (m[1] == '@') { + m++; + id |= ID_CLASS; + } + else { + id |= ID_INSTANCE; + } + m++; + break; + default: + c = m[0]; + if (c != '_' && rb_enc_isascii(c, enc) && rb_enc_ispunct(c, enc)) { + /* operators */ + int i; + + if (len == 1) { + id = c; + goto id_register; + } + for (i = 0; i < op_tbl_count; i++) { + if (*op_tbl[i].name == *m && + strcmp(op_tbl[i].name, m) == 0) { + id = op_tbl[i].token; + goto id_register; + } + } + } + + if (m[last] == '=') { + /* attribute assignment */ + id = rb_intern3(name, last, enc); + if (id > tLAST_TOKEN && !is_attrset_id(id)) { + enc = rb_enc_get(rb_id2str(id)); + id = rb_id_attrset(id); + goto id_register; + } + id = ID_ATTRSET; + } + else if (rb_enc_isupper(m[0], enc)) { + id = ID_CONST; + } + else { + id = ID_LOCAL; + } + break; + } + mb = 0; + if (!rb_enc_isdigit(*m, enc)) { + while (m <= name + last && is_identchar(m, e, enc)) { + if (ISASCII(*m)) { + m++; + } + else { + mb = 1; + m += rb_enc_mbclen(m, e, enc); + } + } + } + if (m - name < len) id = ID_JUNK; + if (enc != rb_usascii_encoding()) { + /* + * this clause makes sense only when called from other than + * rb_intern_str() taking care of code-range. + */ + if (!mb) { + for (; m <= name + len; ++m) { + if (!ISASCII(*m)) goto mbstr; + } + enc = rb_usascii_encoding(); + } + mbstr:; + } + new_id: + if (!(global_symbols.last_id << (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT))) { + if (len > 20) { + rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.20s...)", + name); + } + else { + rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.*s)", + (int)len, name); + } + } + id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; + id_register: + return register_symid(id, name, len, enc); +} + +ID +rb_intern2(const char *name, long len) +{ + return rb_intern3(name, len, rb_usascii_encoding()); +} + +#undef rb_intern +ID +rb_intern(const char *name) +{ + return rb_intern2(name, strlen(name)); +} + +ID +rb_intern_str(VALUE str) +{ + rb_encoding *enc; + ID id; + + if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { + enc = rb_usascii_encoding(); + } + else { + enc = rb_enc_get(str); + } + id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str), enc); + RB_GC_GUARD(str); + return id; +} + +VALUE +rb_id2str(ID id) +{ + st_data_t data; + + if (id < tLAST_TOKEN) { + int i = 0; + + if (rb_ispunct(id)) { + VALUE str = global_symbols.op_sym[i = (int)id]; + if (!str) { + char name[2]; + name[0] = (char)id; + name[1] = 0; + str = rb_usascii_str_new(name, 1); + OBJ_FREEZE(str); + global_symbols.op_sym[i] = str; + } + return str; + } + for (i = 0; i < op_tbl_count; i++) { + if (op_tbl[i].token == id) { + VALUE str = global_symbols.op_sym[i]; + if (!str) { + str = rb_usascii_str_new2(op_tbl[i].name); + OBJ_FREEZE(str); + global_symbols.op_sym[i] = str; + } + return str; + } + } + } + + if (st_lookup(global_symbols.id_str, id, &data)) { + VALUE str = (VALUE)data; + if (RBASIC(str)->klass == 0) + RBASIC(str)->klass = rb_cString; + return str; + } + + if (is_attrset_id(id)) { + ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; + VALUE str; + + while (!(str = rb_id2str(id2))) { + if (!is_local_id(id2)) return 0; + id2 = (id & ~ID_SCOPE_MASK) | ID_CONST; + } + str = rb_str_dup(str); + rb_str_cat(str, "=", 1); + rb_intern_str(str); + if (st_lookup(global_symbols.id_str, id, &data)) { + VALUE str = (VALUE)data; + if (RBASIC(str)->klass == 0) + RBASIC(str)->klass = rb_cString; + return str; + } + } + return 0; +} + +const char * +rb_id2name(ID id) +{ + VALUE str = rb_id2str(id); + + if (!str) return 0; + return RSTRING_PTR(str); +} + +static int +symbols_i(VALUE sym, ID value, VALUE ary) +{ + rb_ary_push(ary, ID2SYM(value)); + return ST_CONTINUE; +} + +/* + * call-seq: + * Symbol.all_symbols => array + * + * Returns an array of all the symbols currently in Ruby's symbol + * table. + * + * Symbol.all_symbols.size #=> 903 + * Symbol.all_symbols[1,20] #=> [:floor, :ARGV, :Binding, :symlink, + * :chown, :EOFError, :$;, :String, + * :LOCK_SH, :"setuid?", :$<, + * :default_proc, :compact, :extend, + * :Tms, :getwd, :$=, :ThreadGroup, + * :wait2, :$>] + */ + +VALUE +rb_sym_all_symbols(void) +{ + VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries); + + st_foreach(global_symbols.sym_id, symbols_i, ary); + return ary; +} + +int +rb_is_const_id(ID id) +{ + if (is_const_id(id)) return Qtrue; + return Qfalse; +} + +int +rb_is_class_id(ID id) +{ + if (is_class_id(id)) return Qtrue; + return Qfalse; +} + +int +rb_is_instance_id(ID id) +{ + if (is_instance_id(id)) return Qtrue; + return Qfalse; +} + +int +rb_is_local_id(ID id) +{ + if (is_local_id(id)) return Qtrue; + return Qfalse; +} + +int +rb_is_junk_id(ID id) +{ + if (is_junk_id(id)) return Qtrue; + return Qfalse; +} + +#endif /* !RIPPER */ + +static void +parser_initialize(struct parser_params *parser) +{ + parser->eofp = Qfalse; + + parser->parser_lex_strterm = 0; + parser->parser_cond_stack = 0; + parser->parser_cmdarg_stack = 0; + parser->parser_class_nest = 0; + parser->parser_paren_nest = 0; + parser->parser_lpar_beg = 0; + parser->parser_in_single = 0; + parser->parser_in_def = 0; + parser->parser_in_defined = 0; + parser->parser_compile_for_eval = 0; + parser->parser_cur_mid = 0; + parser->parser_tokenbuf = NULL; + parser->parser_tokidx = 0; + parser->parser_toksiz = 0; + parser->parser_heredoc_end = 0; + parser->parser_command_start = Qtrue; + parser->parser_deferred_nodes = 0; + parser->parser_lex_pbeg = 0; + parser->parser_lex_p = 0; + parser->parser_lex_pend = 0; + parser->parser_lvtbl = 0; + parser->parser_ruby__end__seen = 0; + parser->parser_ruby_sourcefile = 0; +#ifndef RIPPER + parser->is_ripper = 0; + parser->parser_eval_tree_begin = 0; + parser->parser_eval_tree = 0; +#else + parser->is_ripper = 1; + parser->parser_ruby_sourcefile_string = Qnil; + parser->delayed = Qnil; + + parser->result = Qnil; + parser->parsing_thread = Qnil; + parser->toplevel_p = Qtrue; +#endif +#ifdef YYMALLOC + parser->heap = NULL; +#endif + parser->enc = rb_usascii_encoding(); +} + +#ifdef RIPPER +#define parser_mark ripper_parser_mark +#define parser_free ripper_parser_free +#endif + +static void +parser_mark(void *ptr) +{ + struct parser_params *p = (struct parser_params*)ptr; + + rb_gc_mark((VALUE)p->parser_lex_strterm); + rb_gc_mark((VALUE)p->parser_deferred_nodes); + rb_gc_mark(p->parser_lex_input); + rb_gc_mark(p->parser_lex_lastline); + rb_gc_mark(p->parser_lex_nextline); +#ifndef RIPPER + rb_gc_mark((VALUE)p->parser_eval_tree_begin) ; + rb_gc_mark((VALUE)p->parser_eval_tree) ; + rb_gc_mark(p->debug_lines); +#else + rb_gc_mark(p->parser_ruby_sourcefile_string); + rb_gc_mark(p->delayed); + rb_gc_mark(p->value); + rb_gc_mark(p->result); + rb_gc_mark(p->parsing_thread); +#endif +#ifdef YYMALLOC + rb_gc_mark((VALUE)p->heap); +#endif +} + +static void +parser_free(void *ptr) +{ + struct parser_params *p = (struct parser_params*)ptr; + struct local_vars *local, *prev; + + if (p->parser_tokenbuf) { + xfree(p->parser_tokenbuf); + } + for (local = p->parser_lvtbl; local; local = prev) { + if (local->vars) xfree(local->vars); + prev = local->prev; + xfree(local); + } +#ifndef RIPPER + xfree(p->parser_ruby_sourcefile); +#endif + xfree(p); +} + +VALUE rb_parser_get_yydebug(VALUE); +VALUE rb_parser_set_yydebug(VALUE, VALUE); + +#ifndef RIPPER +static struct parser_params * +parser_new(void) +{ + struct parser_params *p; + + p = ALLOC_N(struct parser_params, 1); + MEMZERO(p, struct parser_params, 1); + parser_initialize(p); + return p; +} + +VALUE +rb_parser_new(void) +{ + struct parser_params *p = parser_new(); + + return Data_Wrap_Struct(0, parser_mark, parser_free, p); +} + +/* + * call-seq: + * ripper#end_seen? -> Boolean + * + * Return if parsed source ended by +\_\_END\_\_+. + * This number starts from 1. + */ +VALUE +rb_parser_end_seen_p(VALUE vparser) +{ + struct parser_params *parser; + + Data_Get_Struct(vparser, struct parser_params, parser); + return ruby__end__seen ? Qtrue : Qfalse; +} + +/* + * call-seq: + * ripper#encoding -> encoding + * + * Return encoding of the source. + */ +VALUE +rb_parser_encoding(VALUE vparser) +{ + struct parser_params *parser; + + Data_Get_Struct(vparser, struct parser_params, parser); + return rb_enc_from_encoding(parser->enc); +} + +/* + * call-seq: + * ripper.yydebug -> true or false + * + * Get yydebug. + */ +VALUE +rb_parser_get_yydebug(VALUE self) +{ + struct parser_params *parser; + + Data_Get_Struct(self, struct parser_params, parser); + return yydebug ? Qtrue : Qfalse; +} + +/* + * call-seq: + * ripper.yydebug = flag + * + * Set yydebug. + */ +VALUE +rb_parser_set_yydebug(VALUE self, VALUE flag) +{ + struct parser_params *parser; + + Data_Get_Struct(self, struct parser_params, parser); + yydebug = RTEST(flag); + return flag; +} + +#ifdef YYMALLOC +#define HEAPCNT(n, size) ((n) * (size) / sizeof(YYSTYPE)) +#define NEWHEAP() rb_node_newnode(NODE_ALLOCA, 0, (VALUE)parser->heap, 0) +#define ADD2HEAP(n, c, p) ((parser->heap = (n))->u1.node = (p), \ + (n)->u3.cnt = (c), (p)) + +void * +rb_parser_malloc(struct parser_params *parser, size_t size) +{ + size_t cnt = HEAPCNT(1, size); + NODE *n = NEWHEAP(); + void *ptr = xmalloc(size); + + return ADD2HEAP(n, cnt, ptr); +} + +void * +rb_parser_calloc(struct parser_params *parser, size_t nelem, size_t size) +{ + size_t cnt = HEAPCNT(nelem, size); + NODE *n = NEWHEAP(); + void *ptr = xcalloc(nelem, size); + + return ADD2HEAP(n, cnt, ptr); +} + +void * +rb_parser_realloc(struct parser_params *parser, void *ptr, size_t size) +{ + NODE *n; + size_t cnt = HEAPCNT(1, size); + + if (ptr && (n = parser->heap) != NULL) { + do { + if (n->u1.node == ptr) { + n->u1.node = ptr = xrealloc(ptr, size); + if (n->u3.cnt) n->u3.cnt = cnt; + return ptr; + } + } while ((n = n->u2.node) != NULL); + } + n = NEWHEAP(); + ptr = xrealloc(ptr, size); + return ADD2HEAP(n, cnt, ptr); +} + +void +rb_parser_free(struct parser_params *parser, void *ptr) +{ + NODE **prev = &parser->heap, *n; + + while ((n = *prev) != NULL) { + if (n->u1.node == ptr) { + *prev = n->u2.node; + rb_gc_force_recycle((VALUE)n); + break; + } + prev = &n->u2.node; + } + xfree(ptr); +} +#endif +#endif + +#ifdef RIPPER +#ifdef RIPPER_DEBUG +extern int rb_is_pointer_to_heap(VALUE); + +/* :nodoc: */ +static VALUE +ripper_validate_object(VALUE self, VALUE x) +{ + if (x == Qfalse) return x; + if (x == Qtrue) return x; + if (x == Qnil) return x; + if (x == Qundef) + rb_raise(rb_eArgError, "Qundef given"); + if (FIXNUM_P(x)) return x; + if (SYMBOL_P(x)) return x; + if (!rb_is_pointer_to_heap(x)) + rb_raise(rb_eArgError, "invalid pointer: %p", x); + switch (TYPE(x)) { + case T_STRING: + case T_OBJECT: + case T_ARRAY: + case T_BIGNUM: + case T_FLOAT: + return x; + case T_NODE: + if (nd_type(x) != NODE_LASGN) { + rb_raise(rb_eArgError, "NODE given: %p", x); + } + return ((NODE *)x)->nd_rval; + default: + rb_raise(rb_eArgError, "wrong type of ruby object: %p (%s)", + x, rb_obj_classname(x)); + } + return x; +} +#endif + +#define validate(x) (x = get_value(x)) + +static VALUE +ripper_dispatch0(struct parser_params *parser, ID mid) +{ + return rb_funcall(parser->value, mid, 0); +} + +static VALUE +ripper_dispatch1(struct parser_params *parser, ID mid, VALUE a) +{ + validate(a); + return rb_funcall(parser->value, mid, 1, a); +} + +static VALUE +ripper_dispatch2(struct parser_params *parser, ID mid, VALUE a, VALUE b) +{ + validate(a); + validate(b); + return rb_funcall(parser->value, mid, 2, a, b); +} + +static VALUE +ripper_dispatch3(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c) +{ + validate(a); + validate(b); + validate(c); + return rb_funcall(parser->value, mid, 3, a, b, c); +} + +static VALUE +ripper_dispatch4(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d) +{ + validate(a); + validate(b); + validate(c); + validate(d); + return rb_funcall(parser->value, mid, 4, a, b, c, d); +} + +static VALUE +ripper_dispatch5(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d, VALUE e) +{ + validate(a); + validate(b); + validate(c); + validate(d); + validate(e); + return rb_funcall(parser->value, mid, 5, a, b, c, d, e); +} + +static const struct kw_assoc { + ID id; + const char *name; +} keyword_to_name[] = { + {keyword_class, "class"}, + {keyword_module, "module"}, + {keyword_def, "def"}, + {keyword_undef, "undef"}, + {keyword_begin, "begin"}, + {keyword_rescue, "rescue"}, + {keyword_ensure, "ensure"}, + {keyword_end, "end"}, + {keyword_if, "if"}, + {keyword_unless, "unless"}, + {keyword_then, "then"}, + {keyword_elsif, "elsif"}, + {keyword_else, "else"}, + {keyword_case, "case"}, + {keyword_when, "when"}, + {keyword_while, "while"}, + {keyword_until, "until"}, + {keyword_for, "for"}, + {keyword_break, "break"}, + {keyword_next, "next"}, + {keyword_redo, "redo"}, + {keyword_retry, "retry"}, + {keyword_in, "in"}, + {keyword_do, "do"}, + {keyword_do_cond, "do"}, + {keyword_do_block, "do"}, + {keyword_return, "return"}, + {keyword_yield, "yield"}, + {keyword_super, "super"}, + {keyword_self, "self"}, + {keyword_nil, "nil"}, + {keyword_true, "true"}, + {keyword_false, "false"}, + {keyword_and, "and"}, + {keyword_or, "or"}, + {keyword_not, "not"}, + {modifier_if, "if"}, + {modifier_unless, "unless"}, + {modifier_while, "while"}, + {modifier_until, "until"}, + {modifier_rescue, "rescue"}, + {keyword_alias, "alias"}, + {keyword_defined, "defined?"}, + {keyword_BEGIN, "BEGIN"}, + {keyword_END, "END"}, + {keyword__LINE__, "__LINE__"}, + {keyword__FILE__, "__FILE__"}, + {keyword__ENCODING__, "__ENCODING__"}, + {0, NULL} +}; + +static const char* +keyword_id_to_str(ID id) +{ + const struct kw_assoc *a; + + for (a = keyword_to_name; a->id; a++) { + if (a->id == id) + return a->name; + } + return NULL; +} + +#undef ripper_id2sym +static VALUE +ripper_id2sym(ID id) +{ + const char *name; + char buf[8]; + + if (id <= 256) { + buf[0] = id; + buf[1] = '\0'; + return ID2SYM(rb_intern(buf)); + } + if ((name = keyword_id_to_str(id))) { + return ID2SYM(rb_intern(name)); + } + switch (id) { + case tOROP: + name = "||"; + break; + case tANDOP: + name = "&&"; + break; + default: + name = rb_id2name(id); + if (!name) { + rb_bug("cannot convert ID to string: %ld", (unsigned long)id); + } + break; + } + return ID2SYM(rb_intern(name)); +} + +static VALUE +ripper_intern(const char *s) +{ + return ID2SYM(rb_intern(s)); +} + +static ID +ripper_get_id(VALUE v) +{ + NODE *nd; + if (TYPE(v) != T_NODE) return 0; + nd = (NODE *)v; + if (nd_type(nd) != NODE_LASGN) return 0; + return nd->nd_vid; +} + +static VALUE +ripper_get_value(VALUE v) +{ + NODE *nd; + if (TYPE(v) != T_NODE) return v; + nd = (NODE *)v; + if (nd_type(nd) != NODE_LASGN) return Qnil; + return nd->nd_rval; +} + +static void +ripper_compile_error(struct parser_params *parser, const char *fmt, ...) +{ + VALUE str; + va_list args; + + va_start(args, fmt); + str = rb_vsprintf(fmt, args); + va_end(args); + rb_funcall(parser->value, rb_intern("compile_error"), 1, str); +} + +static void +ripper_warn0(struct parser_params *parser, const char *fmt) +{ + rb_funcall(parser->value, rb_intern("warn"), 1, STR_NEW2(fmt)); +} + +static void +ripper_warnI(struct parser_params *parser, const char *fmt, int a) +{ + rb_funcall(parser->value, rb_intern("warn"), 2, + STR_NEW2(fmt), INT2NUM(a)); +} + +#if 0 +static void +ripper_warnS(struct parser_params *parser, const char *fmt, const char *str) +{ + rb_funcall(parser->value, rb_intern("warn"), 2, + STR_NEW2(fmt), STR_NEW2(str)); +} +#endif + +static void +ripper_warning0(struct parser_params *parser, const char *fmt) +{ + rb_funcall(parser->value, rb_intern("warning"), 1, STR_NEW2(fmt)); +} + +static void +ripper_warningS(struct parser_params *parser, const char *fmt, const char *str) +{ + rb_funcall(parser->value, rb_intern("warning"), 2, + STR_NEW2(fmt), STR_NEW2(str)); +} + +static VALUE +ripper_lex_get_generic(struct parser_params *parser, VALUE src) +{ + return rb_funcall(src, ripper_id_gets, 0); +} + +static VALUE +ripper_s_allocate(VALUE klass) +{ + struct parser_params *p; + VALUE self; + + p = ALLOC_N(struct parser_params, 1); + MEMZERO(p, struct parser_params, 1); + self = Data_Wrap_Struct(klass, parser_mark, parser_free, p); + p->value = self; + return self; +} + +#define ripper_initialized_p(r) ((r)->parser_lex_input != 0) + +/* + * call-seq: + * Ripper.new(src, filename="(ripper)", lineno=1) -> ripper + * + * Create a new Ripper object. + * _src_ must be a String, an IO, or an Object which has #gets method. + * + * This method does not starts parsing. + * See also Ripper#parse and Ripper.parse. + */ +static VALUE +ripper_initialize(int argc, VALUE *argv, VALUE self) +{ + struct parser_params *parser; + VALUE src, fname, lineno; + + Data_Get_Struct(self, struct parser_params, parser); + rb_scan_args(argc, argv, "12", &src, &fname, &lineno); + if (rb_obj_respond_to(src, ripper_id_gets, 0)) { + parser->parser_lex_gets = ripper_lex_get_generic; + } + else { + StringValue(src); + parser->parser_lex_gets = lex_get_str; + } + parser->parser_lex_input = src; + parser->eofp = Qfalse; + if (NIL_P(fname)) { + fname = STR_NEW2("(ripper)"); + } + else { + StringValue(fname); + } + parser_initialize(parser); + + parser->parser_ruby_sourcefile_string = fname; + parser->parser_ruby_sourcefile = RSTRING_PTR(fname); + parser->parser_ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1; + + return Qnil; +} + +extern VALUE rb_thread_pass(void); + +struct ripper_args { + struct parser_params *parser; + int argc; + VALUE *argv; +}; + +static VALUE +ripper_parse0(VALUE parser_v) +{ + struct parser_params *parser; + + Data_Get_Struct(parser_v, struct parser_params, parser); + parser_prepare(parser); + ripper_yyparse((void*)parser); + return parser->result; +} + +static VALUE +ripper_ensure(VALUE parser_v) +{ + struct parser_params *parser; + + Data_Get_Struct(parser_v, struct parser_params, parser); + parser->parsing_thread = Qnil; + return Qnil; +} + +/* + * call-seq: + * ripper#parse + * + * Start parsing and returns the value of the root action. + */ +static VALUE +ripper_parse(VALUE self) +{ + struct parser_params *parser; + + Data_Get_Struct(self, struct parser_params, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (!NIL_P(parser->parsing_thread)) { + if (parser->parsing_thread == rb_thread_current()) + rb_raise(rb_eArgError, "Ripper#parse is not reentrant"); + else + rb_raise(rb_eArgError, "Ripper#parse is not multithread-safe"); + } + parser->parsing_thread = rb_thread_current(); + rb_ensure(ripper_parse0, self, ripper_ensure, self); + + return parser->result; +} + +/* + * call-seq: + * ripper#column -> Integer + * + * Return column number of current parsing line. + * This number starts from 0. + */ +static VALUE +ripper_column(VALUE self) +{ + struct parser_params *parser; + long col; + + Data_Get_Struct(self, struct parser_params, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(parser->parsing_thread)) return Qnil; + col = parser->tokp - parser->parser_lex_pbeg; + return LONG2NUM(col); +} + +/* + * call-seq: + * ripper#lineno -> Integer + * + * Return line number of current parsing line. + * This number starts from 1. + */ +static VALUE +ripper_lineno(VALUE self) +{ + struct parser_params *parser; + + Data_Get_Struct(self, struct parser_params, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(parser->parsing_thread)) return Qnil; + return INT2NUM(parser->parser_ruby_sourceline); +} + +#ifdef RIPPER_DEBUG +/* :nodoc: */ +static VALUE +ripper_assert_Qundef(VALUE self, VALUE obj, VALUE msg) +{ + StringValue(msg); + if (obj == Qundef) { + rb_raise(rb_eArgError, "%s", RSTRING_PTR(msg)); + } + return Qnil; +} + +/* :nodoc: */ +static VALUE +ripper_value(VALUE self, VALUE obj) +{ + return ULONG2NUM(obj); +} +#endif + +void +Init_ripper(void) +{ + VALUE Ripper; + + Ripper = rb_define_class("Ripper", rb_cObject); + rb_define_const(Ripper, "Version", rb_usascii_str_new2(RIPPER_VERSION)); + rb_define_alloc_func(Ripper, ripper_s_allocate); + rb_define_method(Ripper, "initialize", ripper_initialize, -1); + rb_define_method(Ripper, "parse", ripper_parse, 0); + rb_define_method(Ripper, "column", ripper_column, 0); + rb_define_method(Ripper, "lineno", ripper_lineno, 0); + rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0); + rb_define_method(Ripper, "encoding", rb_parser_encoding, 0); + rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0); + rb_define_method(Ripper, "yydebug=", rb_parser_set_yydebug, 1); +#ifdef RIPPER_DEBUG + rb_define_method(rb_mKernel, "assert_Qundef", ripper_assert_Qundef, 2); + rb_define_method(rb_mKernel, "rawVALUE", ripper_value, 1); + rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1); +#endif + + ripper_id_gets = rb_intern("gets"); + ripper_init_eventids1(Ripper); + ripper_init_eventids2(Ripper); + /* ensure existing in symbol table */ + rb_intern("||"); + rb_intern("&&"); +} +#endif /* RIPPER */ diff --git a/prelude.rb b/prelude.rb new file mode 100644 index 0000000..fdf401b --- /dev/null +++ b/prelude.rb @@ -0,0 +1,35 @@ + +# Mutex + +class Mutex + def synchronize + self.lock + begin + yield + ensure + self.unlock rescue nil + end + end +end + +# Thread + +class Thread + MUTEX_FOR_THREAD_EXCLUSIVE = Mutex.new + def self.exclusive + MUTEX_FOR_THREAD_EXCLUSIVE.synchronize{ + yield + } + end +end + +def require_relative(relative_feature) + c = caller.first + e = c.rindex(/:\d+:in /) + file = $` + if /\A\((.*)\)/ =~ file # eval, etc. + raise LoadError, "require_relative is called in #{$1}" + end + absolute_feature = File.expand_path(File.join(File.dirname(file), relative_feature)) + require absolute_feature +end diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..6c47fe8 --- /dev/null +++ b/proc.c @@ -0,0 +1,1951 @@ +/********************************************************************** + + proc.c - Proc, Binding, Env + + $Author: yugui $ + created at: Wed Jan 17 12:13:14 2007 + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include "eval_intern.h" +#include "gc.h" + +struct METHOD { + VALUE oclass; /* class that holds the method */ + VALUE rclass; /* class of the receiver */ + VALUE recv; + ID id, oid; + NODE *body; +}; + +VALUE rb_cUnboundMethod; +VALUE rb_cMethod; +VALUE rb_cBinding; +VALUE rb_cProc; + +static VALUE bmcall(VALUE, VALUE); +static int method_arity(VALUE); +static VALUE rb_obj_is_method(VALUE m); + +/* Proc */ + +static void +proc_free(void *ptr) +{ + RUBY_FREE_ENTER("proc"); + if (ptr) { + ruby_xfree(ptr); + } + RUBY_FREE_LEAVE("proc"); +} + +static void +proc_mark(void *ptr) +{ + rb_proc_t *proc; + RUBY_MARK_ENTER("proc"); + if (ptr) { + proc = ptr; + RUBY_MARK_UNLESS_NULL(proc->envval); + RUBY_MARK_UNLESS_NULL(proc->blockprocval); + RUBY_MARK_UNLESS_NULL(proc->block.proc); + RUBY_MARK_UNLESS_NULL(proc->block.self); + if (proc->block.iseq && RUBY_VM_IFUNC_P(proc->block.iseq)) { + RUBY_MARK_UNLESS_NULL((VALUE)(proc->block.iseq)); + } + } + RUBY_MARK_LEAVE("proc"); +} + +VALUE +rb_proc_alloc(VALUE klass) +{ + VALUE obj; + rb_proc_t *proc; + obj = Data_Make_Struct(klass, rb_proc_t, proc_mark, proc_free, proc); + MEMZERO(proc, rb_proc_t, 1); + return obj; +} + +VALUE +rb_obj_is_proc(VALUE proc) +{ + if (TYPE(proc) == T_DATA && + RDATA(proc)->dfree == (RUBY_DATA_FUNC) proc_free) { + return Qtrue; + } + else { + return Qfalse; + } +} + +static VALUE +proc_dup(VALUE self) +{ + VALUE procval = rb_proc_alloc(rb_cProc); + rb_proc_t *src, *dst; + GetProcPtr(self, src); + GetProcPtr(procval, dst); + + dst->block = src->block; + dst->block.proc = procval; + dst->envval = src->envval; + dst->safe_level = src->safe_level; + dst->is_lambda = src->is_lambda; + + return procval; +} + +static VALUE +proc_clone(VALUE self) +{ + VALUE procval = proc_dup(self); + CLONESETUP(procval, self); + return procval; +} + +/* + * call-seq: + * prc.lambda? => true or false + * + * Returns true for a Proc object which argument handling is rigid. + * Such procs are typically generated by lambda. + * + * A Proc object generated by proc ignore extra arguments. + * + * proc {|a,b| [a,b] }.call(1,2,3) => [1,2] + * + * It provides nil for lacked arguments. + * + * proc {|a,b| [a,b] }.call(1) => [1,nil] + * + * It expand single-array argument. + * + * proc {|a,b| [a,b] }.call([1,2]) => [1,2] + * + * A Proc object generated by lambda doesn't have such tricks. + * + * lambda {|a,b| [a,b] }.call(1,2,3) => ArgumentError + * lambda {|a,b| [a,b] }.call(1) => ArgumentError + * lambda {|a,b| [a,b] }.call([1,2]) => ArgumentError + * + * Proc#lambda? is a predicate for the tricks. + * It returns true if no tricks. + * + * lambda {}.lambda? => true + * proc {}.lambda? => false + * + * Proc.new is same as proc. + * + * Proc.new {}.lambda? => false + * + * lambda, proc and Proc.new preserves the tricks of + * a Proc object given by & argument. + * + * lambda(&lambda {}).lambda? => true + * proc(&lambda {}).lambda? => true + * Proc.new(&lambda {}).lambda? => true + * + * lambda(&proc {}).lambda? => false + * proc(&proc {}).lambda? => false + * Proc.new(&proc {}).lambda? => false + * + * A Proc object generated by & argument has the tricks + * + * def n(&b) b.lambda? end + * n {} => false + * + * The & argument preserves the tricks if a Proc object is given + * by & argument. + * + * n(&lambda {}) => true + * n(&proc {}) => false + * n(&Proc.new {}) => false + * + * A Proc object converted from a method has no tricks. + * + * def m() end + * method(:m).to_proc.lambda? => true + * + * n(&method(:m)) => true + * n(&method(:m).to_proc) => true + * + * define_method is treated same as method definition. + * The defined method has no tricks. + * + * class C + * define_method(:d) {} + * end + * C.new.e(1,2) => ArgumentError + * C.new.method(:d).to_proc.lambda? => true + * + * define_method always defines a method without the tricks, + * even if a non-lambda Proc object is given. + * This is the only exception which the tricks are not preserved. + * + * class C + * define_method(:e, &proc {}) + * end + * C.new.e(1,2) => ArgumentError + * C.new.method(:e).to_proc.lambda? => true + * + * This exception is for a wrapper of define_method. + * It eases defining a method defining method which defines a usual method which has no tricks. + * + * class << C + * def def2(name, &body) + * define_method(name, &body) + * end + * end + * class C + * def2(:f) {} + * end + * C.new.f(1,2) => ArgumentError + * + * The wrapper, def2, defines a method which has no tricks. + * + */ + +static VALUE +proc_lambda_p(VALUE procval) +{ + rb_proc_t *proc; + GetProcPtr(procval, proc); + + return proc->is_lambda ? Qtrue : Qfalse; +} + +/* Binding */ + +static void +binding_free(void *ptr) +{ + rb_binding_t *bind; + RUBY_FREE_ENTER("binding"); + if (ptr) { + bind = ptr; + ruby_xfree(ptr); + } + RUBY_FREE_LEAVE("binding"); +} + +static void +binding_mark(void *ptr) +{ + rb_binding_t *bind; + RUBY_MARK_ENTER("binding"); + if (ptr) { + bind = ptr; + RUBY_MARK_UNLESS_NULL(bind->env); + } + RUBY_MARK_LEAVE("binding"); +} + +static VALUE +binding_alloc(VALUE klass) +{ + VALUE obj; + rb_binding_t *bind; + obj = Data_Make_Struct(klass, rb_binding_t, binding_mark, binding_free, bind); + return obj; +} + +static VALUE +binding_dup(VALUE self) +{ + VALUE bindval = binding_alloc(rb_cBinding); + rb_binding_t *src, *dst; + GetBindingPtr(self, src); + GetBindingPtr(bindval, dst); + dst->env = src->env; + return bindval; +} + +static VALUE +binding_clone(VALUE self) +{ + VALUE bindval = binding_dup(self); + CLONESETUP(bindval, self); + return bindval; +} + +VALUE +rb_binding_new(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + VALUE bindval = binding_alloc(rb_cBinding); + rb_binding_t *bind; + + if (cfp == 0) { + rb_raise(rb_eRuntimeError, "Can't create Binding Object on top of Fiber."); + } + + GetBindingPtr(bindval, bind); + bind->env = rb_vm_make_env_object(th, cfp); + return bindval; +} + +/* + * call-seq: + * binding -> a_binding + * + * Returns a +Binding+ object, describing the variable and + * method bindings at the point of call. This object can be used when + * calling +eval+ to execute the evaluated command in this + * environment. Also see the description of class +Binding+. + * + * def getBinding(param) + * return binding + * end + * b = getBinding("hello") + * eval("param", b) #=> "hello" + */ + +static VALUE +rb_f_binding(VALUE self) +{ + return rb_binding_new(); +} + +/* + * call-seq: + * binding.eval(string [, filename [,lineno]]) => obj + * + * Evaluates the Ruby expression(s) in string, in the + * binding's context. If the optional filename and + * lineno parameters are present, they will be used when + * reporting syntax errors. + * + * def getBinding(param) + * return binding + * end + * b = getBinding("hello") + * b.eval("param") #=> "hello" + */ + +static VALUE +bind_eval(int argc, VALUE *argv, VALUE bindval) +{ + VALUE args[4]; + + rb_scan_args(argc, argv, "12", &args[0], &args[2], &args[3]); + args[1] = bindval; + return rb_f_eval(argc+1, args, Qnil /* self will be searched in eval */); +} + +static VALUE +proc_new(VALUE klass, int is_lambda) +{ + VALUE procval = Qnil; + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = th->cfp; + rb_block_t *block; + + if ((GC_GUARDED_PTR_REF(cfp->lfp[0])) != 0 && + !RUBY_VM_CLASS_SPECIAL_P(cfp->lfp[0])) { + + block = GC_GUARDED_PTR_REF(cfp->lfp[0]); + } + else { + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + + if ((GC_GUARDED_PTR_REF(cfp->lfp[0])) != 0 && + !RUBY_VM_CLASS_SPECIAL_P(cfp->lfp[0])) { + + block = GC_GUARDED_PTR_REF(cfp->lfp[0]); + + if (is_lambda) { + rb_warn("tried to create Proc object without a block"); + } + } + else { + rb_raise(rb_eArgError, + "tried to create Proc object without a block"); + } + } + + procval = block->proc; + + if (procval) { + if (RBASIC(procval)->klass == klass) { + return procval; + } + else { + VALUE newprocval = proc_dup(procval); + RBASIC(newprocval)->klass = klass; + return newprocval; + } + } + + procval = rb_vm_make_proc(th, block, klass); + + if (is_lambda) { + rb_proc_t *proc; + GetProcPtr(procval, proc); + proc->is_lambda = Qtrue; + } + return procval; +} + +/* + * call-seq: + * Proc.new {|...| block } => a_proc + * Proc.new => a_proc + * + * Creates a new Proc object, bound to the current + * context. Proc::new may be called without a block only + * within a method with an attached block, in which case that block is + * converted to the Proc object. + * + * def proc_from + * Proc.new + * end + * proc = proc_from { "hello" } + * proc.call #=> "hello" + */ + +static VALUE +rb_proc_s_new(int argc, VALUE *argv, VALUE klass) +{ + VALUE block = proc_new(klass, Qfalse); + + rb_obj_call_init(block, argc, argv); + return block; +} + +/* + * call-seq: + * proc { |...| block } => a_proc + * + * Equivalent to Proc.new. + */ + +VALUE +rb_block_proc(void) +{ + return proc_new(rb_cProc, Qfalse); +} + +VALUE +rb_block_lambda(void) +{ + return proc_new(rb_cProc, Qtrue); +} + +VALUE +rb_f_lambda(void) +{ + rb_warn("rb_f_lambda() is deprecated; use rb_block_proc() instead"); + return rb_block_lambda(); +} + +/* + * call-seq: + * lambda { |...| block } => a_proc + * + * Equivalent to Proc.new, except the resulting Proc objects + * check the number of parameters passed when called. + */ + +static VALUE +proc_lambda(void) +{ + return rb_block_lambda(); +} + +/* CHECKME: are the argument checking semantics correct? */ + +/* + * call-seq: + * prc.call(params,...) => obj + * prc[params,...] => obj + * prc.(params,...) => obj + * + * Invokes the block, setting the block's parameters to the values in + * params using something close to method calling semantics. + * Generates a warning if multiple values are passed to a proc that + * expects just one (previously this silently converted the parameters + * to an array). Note that prc.() invokes prc.call() with the parameters + * given. It's a syntax sugar to hide "call". + * + * For procs created using Kernel.proc, generates an + * error if the wrong number of parameters + * are passed to a proc with multiple parameters. For procs created using + * Proc.new, extra parameters are silently discarded. + * + * Returns the value of the last expression evaluated in the block. See + * also Proc#yield. + * + * a_proc = Proc.new {|a, *b| b.collect {|i| i*a }} + * a_proc.call(9, 1, 2, 3) #=> [9, 18, 27] + * a_proc[9, 1, 2, 3] #=> [9, 18, 27] + * a_proc = Proc.new {|a,b| a} + * a_proc.call(1,2,3) + * + * produces: + * + * prog.rb:5: wrong number of arguments (3 for 2) (ArgumentError) + * from prog.rb:4:in `call' + * from prog.rb:5 + */ + +/* + * call-seq: + * prc === obj => obj + * + * Invokes the block, with obj as the block's parameter. It is + * to allow a proc object to be a target of when clause in the case statement. + */ + +static VALUE +proc_call(int argc, VALUE *argv, VALUE procval) +{ + rb_proc_t *proc; + rb_block_t *blockptr = 0; + rb_iseq_t *iseq; + GetProcPtr(procval, proc); + + iseq = proc->block.iseq; + if (BUILTIN_TYPE(iseq) == T_NODE || iseq->arg_block != -1) { + if (rb_block_given_p()) { + rb_proc_t *proc; + VALUE procval; + procval = rb_block_proc(); + GetProcPtr(procval, proc); + blockptr = &proc->block; + } + } + + return rb_vm_invoke_proc(GET_THREAD(), proc, proc->block.self, + argc, argv, blockptr); +} + +#if SIZEOF_LONG > SIZEOF_INT +static inline int +check_argc(long argc) +{ + if (argc > INT_MAX || argc < 0) { + rb_raise(rb_eArgError, "too many arguments (%lu)", + (unsigned long)argc); + } + return (int)argc; +} +#else +#define check_argc(argc) (argc) +#endif + +VALUE +rb_proc_call(VALUE self, VALUE args) +{ + rb_proc_t *proc; + GetProcPtr(self, proc); + return rb_vm_invoke_proc(GET_THREAD(), proc, proc->block.self, + check_argc(RARRAY_LEN(args)), RARRAY_PTR(args), 0); +} + +VALUE +rb_proc_call_with_block(VALUE self, int argc, VALUE *argv, VALUE pass_procval) +{ + rb_proc_t *proc; + rb_block_t *block = 0; + GetProcPtr(self, proc); + + if (!NIL_P(pass_procval)) { + rb_proc_t *pass_proc; + GetProcPtr(pass_procval, pass_proc); + block = &pass_proc->block; + } + + return rb_vm_invoke_proc(GET_THREAD(), proc, proc->block.self, + argc, argv, block); +} + +/* + * call-seq: + * prc.arity -> fixnum + * + * Returns the number of arguments that would not be ignored. If the block + * is declared to take no arguments, returns 0. If the block is known + * to take exactly n arguments, returns n. If the block has optional + * arguments, return -n-1, where n is the number of mandatory + * arguments. A proc with no argument declarations + * is the same a block declaring || as its arguments. + * + * Proc.new {}.arity #=> 0 + * Proc.new {||}.arity #=> 0 + * Proc.new {|a|}.arity #=> 1 + * Proc.new {|a,b|}.arity #=> 2 + * Proc.new {|a,b,c|}.arity #=> 3 + * Proc.new {|*a|}.arity #=> -1 + * Proc.new {|a,*b|}.arity #=> -2 + * Proc.new {|a,*b, c|}.arity #=> -3 + */ + +static VALUE +proc_arity(VALUE self) +{ + rb_proc_t *proc; + rb_iseq_t *iseq; + GetProcPtr(self, proc); + iseq = proc->block.iseq; + if (iseq) { + if (BUILTIN_TYPE(iseq) != T_NODE) { + if (iseq->arg_rest < 0) { + return INT2FIX(iseq->argc); + } + else { + return INT2FIX(-(iseq->argc + 1 + iseq->arg_post_len)); + } + } + else { + NODE *node = (NODE *)iseq; + if (nd_type(node) == NODE_IFUNC && node->nd_cfnc == bmcall) { + /* method(:foo).to_proc.arity */ + return INT2FIX(method_arity(node->nd_tval)); + } + } + } + return INT2FIX(-1); +} + +int +rb_proc_arity(VALUE proc) +{ + return FIX2INT(proc_arity(proc)); +} + +static rb_iseq_t * +get_proc_iseq(VALUE self) +{ + rb_proc_t *proc; + rb_iseq_t *iseq; + + GetProcPtr(self, proc); + iseq = proc->block.iseq; + if (!RUBY_VM_NORMAL_ISEQ_P(iseq)) + return 0; + return iseq; +} + +static VALUE +iseq_location(rb_iseq_t *iseq) +{ + VALUE loc[2]; + + if (!iseq) return Qnil; + loc[0] = iseq->filename; + if (iseq->insn_info_table) { + loc[1] = INT2FIX(rb_iseq_first_lineno(iseq)); + } + else { + loc[1] = Qnil; + } + return rb_ary_new4(2, loc); +} + +/* + * call-seq: + * prc.source_location => [String, Fixnum] + * + * returns the ruby source filename and line number containing this proc + * or nil if this proc was not defined in ruby (i.e. native) + */ + +VALUE +rb_proc_location(VALUE self) +{ + return iseq_location(get_proc_iseq(self)); +} + +/* + * call-seq: + * prc == other_proc => true or false + * + * Return true if prc is the same object as + * other_proc, or if they are both procs with the same body. + */ + +static VALUE +proc_eq(VALUE self, VALUE other) +{ + if (self == other) { + return Qtrue; + } + else { + if (TYPE(other) == T_DATA && + RDATA(other)->dmark == proc_mark) { + rb_proc_t *p1, *p2; + GetProcPtr(self, p1); + GetProcPtr(other, p2); + if (p1->envval == p2->envval && + p1->block.iseq->iseq_size == p2->block.iseq->iseq_size && + p1->block.iseq->local_size == p2->block.iseq->local_size && + MEMCMP(p1->block.iseq->iseq, p2->block.iseq->iseq, VALUE, + p1->block.iseq->iseq_size) == 0) { + return Qtrue; + } + } + } + return Qfalse; +} + +/* + * call-seq: + * prc.hash => integer + * + * Return hash value corresponding to proc body. + */ + +static VALUE +proc_hash(VALUE self) +{ + int hash; + rb_proc_t *proc; + GetProcPtr(self, proc); + hash = (long)proc->block.iseq; + hash ^= (long)proc->envval; + hash ^= (long)proc->block.lfp >> 16; + return INT2FIX(hash); +} + +/* + * call-seq: + * prc.to_s => string + * + * Shows the unique identifier for this proc, along with + * an indication of where the proc was defined. + */ + +static VALUE +proc_to_s(VALUE self) +{ + VALUE str = 0; + rb_proc_t *proc; + const char *cname = rb_obj_classname(self); + rb_iseq_t *iseq; + const char *is_lambda; + + GetProcPtr(self, proc); + iseq = proc->block.iseq; + is_lambda = proc->is_lambda ? " (lambda)" : ""; + + if (RUBY_VM_NORMAL_ISEQ_P(iseq)) { + int line_no = 0; + + if (iseq->insn_info_table) { + line_no = rb_iseq_first_lineno(iseq); + } + str = rb_sprintf("#<%s:%p@%s:%d%s>", cname, (void *)self, + RSTRING_PTR(iseq->filename), + line_no, is_lambda); + } + else { + str = rb_sprintf("#<%s:%p%s>", cname, (void *)proc->block.iseq, + is_lambda); + } + + if (OBJ_TAINTED(self)) { + OBJ_TAINT(str); + } + return str; +} + +/* + * call-seq: + * prc.to_proc -> prc + * + * Part of the protocol for converting objects to Proc + * objects. Instances of class Proc simply return + * themselves. + */ + +static VALUE +proc_to_proc(VALUE self) +{ + return self; +} + +static void +bm_mark(struct METHOD *data) +{ + rb_gc_mark(data->rclass); + rb_gc_mark(data->oclass); + rb_gc_mark(data->recv); + rb_gc_mark((VALUE)data->body); +} + +NODE * +rb_method_body(VALUE method) +{ + struct METHOD *data; + + if (TYPE(method) == T_DATA && + RDATA(method)->dmark == (RUBY_DATA_FUNC) bm_mark) { + Data_Get_Struct(method, struct METHOD, data); + return data->body; + } + else { + return 0; + } +} + +NODE *rb_get_method_body(VALUE klass, ID id, ID *idp); + +static VALUE +mnew(VALUE klass, VALUE obj, ID id, VALUE mclass, int scope) +{ + VALUE method; + NODE *body; + struct METHOD *data; + VALUE rclass = klass; + ID oid = id; + + again: + if ((body = rb_get_method_body(klass, id, 0)) == 0) { + rb_print_undef(rclass, oid, 0); + } + if (scope && (body->nd_noex & NOEX_MASK) != NOEX_PUBLIC) { + rb_print_undef(rclass, oid, (int)(body->nd_noex & NOEX_MASK)); + } + + klass = body->nd_clss; + body = body->nd_body; + + if (nd_type(body) == NODE_ZSUPER) { + klass = RCLASS_SUPER(klass); + goto again; + } + + while (rclass != klass && + (FL_TEST(rclass, FL_SINGLETON) || TYPE(rclass) == T_ICLASS)) { + rclass = RCLASS_SUPER(rclass); + } + if (TYPE(klass) == T_ICLASS) + klass = RBASIC(klass)->klass; + method = Data_Make_Struct(mclass, struct METHOD, bm_mark, -1, data); + data->oclass = klass; + data->recv = obj; + + data->id = id; + data->body = body; + data->rclass = rclass; + data->oid = oid; + OBJ_INFECT(method, klass); + + return method; +} + + +/********************************************************************** + * + * Document-class : Method + * + * Method objects are created by Object#method, and are + * associated with a particular object (not just with a class). They + * may be used to invoke the method within the object, and as a block + * associated with an iterator. They may also be unbound from one + * object (creating an UnboundMethod) and bound to + * another. + * + * class Thing + * def square(n) + * n*n + * end + * end + * thing = Thing.new + * meth = thing.method(:square) + * + * meth.call(9) #=> 81 + * [ 1, 2, 3 ].collect(&meth) #=> [1, 4, 9] + * + */ + +/* + * call-seq: + * meth == other_meth => true or false + * + * Two method objects are equal if that are bound to the same + * object and contain the same body. + */ + + +static VALUE +method_eq(VALUE method, VALUE other) +{ + struct METHOD *m1, *m2; + + if (TYPE(other) != T_DATA + || RDATA(other)->dmark != (RUBY_DATA_FUNC) bm_mark) + return Qfalse; + if (CLASS_OF(method) != CLASS_OF(other)) + return Qfalse; + + Data_Get_Struct(method, struct METHOD, m1); + Data_Get_Struct(other, struct METHOD, m2); + + if (m1->oclass != m2->oclass || m1->rclass != m2->rclass || + m1->recv != m2->recv || m1->body != m2->body) + return Qfalse; + + return Qtrue; +} + +/* + * call-seq: + * meth.hash => integer + * + * Return a hash value corresponding to the method object. + */ + +static VALUE +method_hash(VALUE method) +{ + struct METHOD *m; + long hash; + + Data_Get_Struct(method, struct METHOD, m); + hash = (long)m->oclass; + hash ^= (long)m->rclass; + hash ^= (long)m->recv; + hash ^= (long)m->body; + + return INT2FIX(hash); +} + +/* + * call-seq: + * meth.unbind => unbound_method + * + * Dissociates meth from it's current receiver. The resulting + * UnboundMethod can subsequently be bound to a new object + * of the same class (see UnboundMethod). + */ + +static VALUE +method_unbind(VALUE obj) +{ + VALUE method; + struct METHOD *orig, *data; + + Data_Get_Struct(obj, struct METHOD, orig); + method = + Data_Make_Struct(rb_cUnboundMethod, struct METHOD, bm_mark, -1, data); + data->oclass = orig->oclass; + data->recv = Qundef; + data->id = orig->id; + data->body = orig->body; + data->rclass = orig->rclass; + data->oid = orig->oid; + OBJ_INFECT(method, obj); + + return method; +} + +/* + * call-seq: + * meth.receiver => object + * + * Returns the bound receiver of the method object. + */ + +static VALUE +method_receiver(VALUE obj) +{ + struct METHOD *data; + + Data_Get_Struct(obj, struct METHOD, data); + return data->recv; +} + +/* + * call-seq: + * meth.name => symbol + * + * Returns the name of the method. + */ + +static VALUE +method_name(VALUE obj) +{ + struct METHOD *data; + + Data_Get_Struct(obj, struct METHOD, data); + return ID2SYM(data->id); +} + +/* + * call-seq: + * meth.owner => class_or_module + * + * Returns the class or module that defines the method. + */ + +static VALUE +method_owner(VALUE obj) +{ + struct METHOD *data; + + Data_Get_Struct(obj, struct METHOD, data); + return data->oclass; +} + +/* + * call-seq: + * obj.method(sym) => method + * + * Looks up the named method as a receiver in obj, returning a + * Method object (or raising NameError). The + * Method object acts as a closure in obj's object + * instance, so instance variables and the value of self + * remain available. + * + * class Demo + * def initialize(n) + * @iv = n + * end + * def hello() + * "Hello, @iv = #{@iv}" + * end + * end + * + * k = Demo.new(99) + * m = k.method(:hello) + * m.call #=> "Hello, @iv = 99" + * + * l = Demo.new('Fred') + * m = l.method("hello") + * m.call #=> "Hello, @iv = Fred" + */ + +VALUE +rb_obj_method(VALUE obj, VALUE vid) +{ + return mnew(CLASS_OF(obj), obj, rb_to_id(vid), rb_cMethod, Qfalse); +} + +VALUE +rb_obj_public_method(VALUE obj, VALUE vid) +{ + return mnew(CLASS_OF(obj), obj, rb_to_id(vid), rb_cMethod, Qtrue); +} + +/* + * call-seq: + * mod.instance_method(symbol) => unbound_method + * + * Returns an +UnboundMethod+ representing the given + * instance method in _mod_. + * + * class Interpreter + * def do_a() print "there, "; end + * def do_d() print "Hello "; end + * def do_e() print "!\n"; end + * def do_v() print "Dave"; end + * Dispatcher = { + * ?a => instance_method(:do_a), + * ?d => instance_method(:do_d), + * ?e => instance_method(:do_e), + * ?v => instance_method(:do_v) + * } + * def interpret(string) + * string.each_byte {|b| Dispatcher[b].bind(self).call } + * end + * end + * + * + * interpreter = Interpreter.new + * interpreter.interpret('dave') + * + * produces: + * + * Hello there, Dave! + */ + +static VALUE +rb_mod_instance_method(VALUE mod, VALUE vid) +{ + return mnew(mod, Qundef, rb_to_id(vid), rb_cUnboundMethod, Qfalse); +} + +static VALUE +rb_mod_public_instance_method(VALUE mod, VALUE vid) +{ + return mnew(mod, Qundef, rb_to_id(vid), rb_cUnboundMethod, Qtrue); +} + +/* + * call-seq: + * define_method(symbol, method) => new_method + * define_method(symbol) { block } => proc + * + * Defines an instance method in the receiver. The _method_ + * parameter can be a +Proc+, a +Method+ or an +UnboundMethod+ object. + * If a block is specified, it is used as the method body. This block + * is evaluated using instance_eval, a point that is + * tricky to demonstrate because define_method is private. + * (This is why we resort to the +send+ hack in this example.) + * + * class A + * def fred + * puts "In Fred" + * end + * def create_method(name, &block) + * self.class.send(:define_method, name, &block) + * end + * define_method(:wilma) { puts "Charge it!" } + * end + * class B < A + * define_method(:barney, instance_method(:fred)) + * end + * a = B.new + * a.barney + * a.wilma + * a.create_method(:betty) { p self } + * a.betty + * + * produces: + * + * In Fred + * Charge it! + * # + */ + +static VALUE +rb_mod_define_method(int argc, VALUE *argv, VALUE mod) +{ + ID id; + VALUE body; + NODE *node; + int noex = NOEX_PUBLIC; + + if (argc == 1) { + id = rb_to_id(argv[0]); + body = rb_block_lambda(); + } + else if (argc == 2) { + id = rb_to_id(argv[0]); + body = argv[1]; + if (!rb_obj_is_method(body) && !rb_obj_is_proc(body)) { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected Proc/Method)", + rb_obj_classname(body)); + } + } + else { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + + if (RDATA(body)->dmark == (RUBY_DATA_FUNC) bm_mark) { + struct METHOD *method = (struct METHOD *)DATA_PTR(body); + VALUE rclass = method->rclass; + if (rclass != mod) { + if (FL_TEST(rclass, FL_SINGLETON)) { + rb_raise(rb_eTypeError, + "can't bind singleton method to a different class"); + } + if (!RTEST(rb_class_inherited_p(mod, rclass))) { + rb_raise(rb_eTypeError, + "bind argument must be a subclass of %s", + rb_class2name(rclass)); + } + } + node = method->body; + } + else if (rb_obj_is_proc(body)) { + rb_proc_t *proc; + body = proc_dup(body); + GetProcPtr(body, proc); + if (BUILTIN_TYPE(proc->block.iseq) != T_NODE) { + proc->block.iseq->defined_method_id = id; + proc->block.iseq->klass = mod; + proc->is_lambda = Qtrue; + proc->is_from_method = Qtrue; + } + node = NEW_BMETHOD(body); + } + else { + /* type error */ + rb_raise(rb_eTypeError, "wrong argument type (expected Proc/Method)"); + } + + /* TODO: visibility */ + + rb_add_method(mod, id, node, noex); + return body; +} + +static VALUE +rb_obj_define_method(int argc, VALUE *argv, VALUE obj) +{ + VALUE klass = rb_singleton_class(obj); + + return rb_mod_define_method(argc, argv, klass); +} + + +/* + * MISSING: documentation + */ + +static VALUE +method_clone(VALUE self) +{ + VALUE clone; + struct METHOD *orig, *data; + + Data_Get_Struct(self, struct METHOD, orig); + clone = Data_Make_Struct(CLASS_OF(self), struct METHOD, bm_mark, -1, data); + CLONESETUP(clone, self); + *data = *orig; + + return clone; +} + +/* + * call-seq: + * meth.call(args, ...) => obj + * meth[args, ...] => obj + * + * Invokes the meth with the specified arguments, returning the + * method's return value. + * + * m = 12.method("+") + * m.call(3) #=> 15 + * m.call(20) #=> 32 + */ + +VALUE +rb_method_call(int argc, VALUE *argv, VALUE method) +{ + VALUE result = Qnil; /* OK */ + struct METHOD *data; + int state; + volatile int safe = -1; + + Data_Get_Struct(method, struct METHOD, data); + if (data->recv == Qundef) { + rb_raise(rb_eTypeError, "can't call unbound method; bind first"); + } + PUSH_TAG(); + if (OBJ_TAINTED(method)) { + safe = rb_safe_level(); + if (rb_safe_level() < 4) { + rb_set_safe_level_force(4); + } + } + if ((state = EXEC_TAG()) == 0) { + rb_thread_t *th = GET_THREAD(); + VALUE rb_vm_call(rb_thread_t * th, VALUE klass, VALUE recv, VALUE id, ID oid, + int argc, const VALUE *argv, const NODE *body, int nosuper); + + PASS_PASSED_BLOCK_TH(th); + result = rb_vm_call(th, data->oclass, data->recv, data->id, data->oid, + argc, argv, data->body, 0); + } + POP_TAG(); + if (safe >= 0) + rb_set_safe_level_force(safe); + if (state) + JUMP_TAG(state); + return result; +} + +/********************************************************************** + * + * Document-class: UnboundMethod + * + * Ruby supports two forms of objectified methods. Class + * Method is used to represent methods that are associated + * with a particular object: these method objects are bound to that + * object. Bound method objects for an object can be created using + * Object#method. + * + * Ruby also supports unbound methods; methods objects that are not + * associated with a particular object. These can be created either by + * calling Module#instance_method or by calling + * unbind on a bound method object. The result of both of + * these is an UnboundMethod object. + * + * Unbound methods can only be called after they are bound to an + * object. That object must be be a kind_of? the method's original + * class. + * + * class Square + * def area + * @side * @side + * end + * def initialize(side) + * @side = side + * end + * end + * + * area_un = Square.instance_method(:area) + * + * s = Square.new(12) + * area = area_un.bind(s) + * area.call #=> 144 + * + * Unbound methods are a reference to the method at the time it was + * objectified: subsequent changes to the underlying class will not + * affect the unbound method. + * + * class Test + * def test + * :original + * end + * end + * um = Test.instance_method(:test) + * class Test + * def test + * :modified + * end + * end + * t = Test.new + * t.test #=> :modified + * um.bind(t).call #=> :original + * + */ + +/* + * call-seq: + * umeth.bind(obj) -> method + * + * Bind umeth to obj. If Klass was the class + * from which umeth was obtained, + * obj.kind_of?(Klass) must be true. + * + * class A + * def test + * puts "In test, class = #{self.class}" + * end + * end + * class B < A + * end + * class C < B + * end + * + * + * um = B.instance_method(:test) + * bm = um.bind(C.new) + * bm.call + * bm = um.bind(B.new) + * bm.call + * bm = um.bind(A.new) + * bm.call + * + * produces: + * + * In test, class = C + * In test, class = B + * prog.rb:16:in `bind': bind argument must be an instance of B (TypeError) + * from prog.rb:16 + */ + +static VALUE +umethod_bind(VALUE method, VALUE recv) +{ + struct METHOD *data, *bound; + + Data_Get_Struct(method, struct METHOD, data); + if (data->rclass != CLASS_OF(recv)) { + if (FL_TEST(data->rclass, FL_SINGLETON)) { + rb_raise(rb_eTypeError, + "singleton method called for a different object"); + } + if (!rb_obj_is_kind_of(recv, data->rclass)) { + rb_raise(rb_eTypeError, "bind argument must be an instance of %s", + rb_class2name(data->rclass)); + } + } + + method = Data_Make_Struct(rb_cMethod, struct METHOD, bm_mark, -1, bound); + *bound = *data; + bound->recv = recv; + bound->rclass = CLASS_OF(recv); + + return method; +} + +int +rb_node_arity(NODE* body) +{ + switch (nd_type(body)) { + case NODE_CFUNC: + if (body->nd_argc < 0) + return -1; + return check_argc(body->nd_argc); + case NODE_ZSUPER: + return -1; + case NODE_ATTRSET: + return 1; + case NODE_IVAR: + return 0; + case NODE_BMETHOD: + return rb_proc_arity(body->nd_cval); + case RUBY_VM_METHOD_NODE: + { + rb_iseq_t *iseq; + GetISeqPtr((VALUE)body->nd_body, iseq); + if (iseq->arg_rest == -1 && iseq->arg_opts == 0) { + return iseq->argc; + } + else { + return -(iseq->argc + 1 + iseq->arg_post_len); + } + } + default: + rb_raise(rb_eArgError, "invalid node 0x%x", nd_type(body)); + } +} + +/* + * call-seq: + * meth.arity => fixnum + * + * Returns an indication of the number of arguments accepted by a + * method. Returns a nonnegative integer for methods that take a fixed + * number of arguments. For Ruby methods that take a variable number of + * arguments, returns -n-1, where n is the number of required + * arguments. For methods written in C, returns -1 if the call takes a + * variable number of arguments. + * + * class C + * def one; end + * def two(a); end + * def three(*a); end + * def four(a, b); end + * def five(a, b, *c); end + * def six(a, b, *c, &d); end + * end + * c = C.new + * c.method(:one).arity #=> 0 + * c.method(:two).arity #=> 1 + * c.method(:three).arity #=> -1 + * c.method(:four).arity #=> 2 + * c.method(:five).arity #=> -3 + * c.method(:six).arity #=> -3 + * + * "cat".method(:size).arity #=> 0 + * "cat".method(:replace).arity #=> 1 + * "cat".method(:squeeze).arity #=> -1 + * "cat".method(:count).arity #=> -1 + */ + +static VALUE +method_arity_m(VALUE method) +{ + int n = method_arity(method); + return INT2FIX(n); +} + +static int +method_arity(VALUE method) +{ + struct METHOD *data; + + Data_Get_Struct(method, struct METHOD, data); + return rb_node_arity(data->body); +} + +int +rb_mod_method_arity(VALUE mod, ID id) +{ + NODE *node = rb_method_node(mod, id); + return rb_node_arity(node); +} + +int +rb_obj_method_arity(VALUE obj, ID id) +{ + return rb_mod_method_arity(CLASS_OF(obj), id); +} + +static rb_iseq_t * +get_method_iseq(VALUE method) +{ + struct METHOD *data; + NODE *body; + rb_iseq_t *iseq; + + Data_Get_Struct(method, struct METHOD, data); + body = data->body; + switch (nd_type(body)) { + case RUBY_VM_METHOD_NODE: + GetISeqPtr((VALUE)body->nd_body, iseq); + if (RUBY_VM_NORMAL_ISEQ_P(iseq)) break; + default: + return 0; + } + return iseq; +} + +/* + * call-seq: + * meth.source_location => [String, Fixnum] + * + * returns the ruby source filename and line number containing this method + * or nil if this method was not defined in ruby (i.e. native) + */ + +VALUE +rb_method_location(VALUE method) +{ + return iseq_location(get_method_iseq(method)); +} + +/* + * call-seq: + * meth.to_s => string + * meth.inspect => string + * + * Show the name of the underlying method. + * + * "cat".method(:count).inspect #=> "#" + */ + +static VALUE +method_inspect(VALUE method) +{ + struct METHOD *data; + VALUE str; + const char *s; + const char *sharp = "#"; + + Data_Get_Struct(method, struct METHOD, data); + str = rb_str_buf_new2("#<"); + s = rb_obj_classname(method); + rb_str_buf_cat2(str, s); + rb_str_buf_cat2(str, ": "); + + if (FL_TEST(data->oclass, FL_SINGLETON)) { + VALUE v = rb_iv_get(data->oclass, "__attached__"); + + if (data->recv == Qundef) { + rb_str_buf_append(str, rb_inspect(data->oclass)); + } + else if (data->recv == v) { + rb_str_buf_append(str, rb_inspect(v)); + sharp = "."; + } + else { + rb_str_buf_append(str, rb_inspect(data->recv)); + rb_str_buf_cat2(str, "("); + rb_str_buf_append(str, rb_inspect(v)); + rb_str_buf_cat2(str, ")"); + sharp = "."; + } + } + else { + rb_str_buf_cat2(str, rb_class2name(data->rclass)); + if (data->rclass != data->oclass) { + rb_str_buf_cat2(str, "("); + rb_str_buf_cat2(str, rb_class2name(data->oclass)); + rb_str_buf_cat2(str, ")"); + } + } + rb_str_buf_cat2(str, sharp); + rb_str_append(str, rb_id2str(data->oid)); + rb_str_buf_cat2(str, ">"); + + return str; +} + +static VALUE +mproc(VALUE method) +{ + return rb_funcall(Qnil, rb_intern("proc"), 0); +} + +static VALUE +mlambda(VALUE method) +{ + return rb_funcall(Qnil, rb_intern("lambda"), 0); +} + +static VALUE +bmcall(VALUE args, VALUE method) +{ + volatile VALUE a; + VALUE ret; + int argc; + + if (CLASS_OF(args) != rb_cArray) { + args = rb_ary_new3(1, args); + argc = 1; + } + else { + argc = check_argc(RARRAY_LEN(args)); + } + ret = rb_method_call(argc, RARRAY_PTR(args), method); + RB_GC_GUARD(a) = args; + return ret; +} + +VALUE +rb_proc_new( + VALUE (*func)(ANYARGS), /* VALUE yieldarg[, VALUE procarg] */ + VALUE val) +{ + VALUE procval = rb_iterate(mproc, 0, func, val); + return procval; +} + +/* + * call-seq: + * meth.to_proc => prc + * + * Returns a Proc object corresponding to this method. + */ + +static VALUE +method_proc(VALUE method) +{ + VALUE procval; + rb_proc_t *proc; + /* + * class Method + * def to_proc + * proc{|*args| + * self.call(*args) + * } + * end + * end + */ + procval = rb_iterate(mlambda, 0, bmcall, method); + GetProcPtr(procval, proc); + proc->is_from_method = 1; + return procval; +} + +static VALUE +rb_obj_is_method(VALUE m) +{ + if (TYPE(m) == T_DATA && RDATA(m)->dmark == (RUBY_DATA_FUNC) bm_mark) { + return Qtrue; + } + return Qfalse; +} + +/* + * call_seq: + * local_jump_error.exit_value => obj + * + * Returns the exit value associated with this +LocalJumpError+. + */ +static VALUE +localjump_xvalue(VALUE exc) +{ + return rb_iv_get(exc, "@exit_value"); +} + +/* + * call-seq: + * local_jump_error.reason => symbol + * + * The reason this block was terminated: + * :break, :redo, :retry, :next, :return, or :noreason. + */ + +static VALUE +localjump_reason(VALUE exc) +{ + return rb_iv_get(exc, "@reason"); +} + +/* + * call-seq: + * prc.binding => binding + * + * Returns the binding associated with prc. Note that + * Kernel#eval accepts either a Proc or a + * Binding object as its second parameter. + * + * def fred(param) + * proc {} + * end + * + * b = fred(99) + * eval("param", b.binding) #=> 99 + */ +static VALUE +proc_binding(VALUE self) +{ + rb_proc_t *proc; + VALUE bindval = binding_alloc(rb_cBinding); + rb_binding_t *bind; + + GetProcPtr(self, proc); + GetBindingPtr(bindval, bind); + + if (TYPE(proc->block.iseq) == T_NODE) { + rb_raise(rb_eArgError, "Can't create Binding from C level Proc"); + } + + bind->env = proc->envval; + return bindval; +} + +static VALUE curry(VALUE dummy, VALUE args, int argc, VALUE *argv, VALUE passed_proc); + +static VALUE +make_curry_proc(VALUE proc, VALUE passed, VALUE arity) +{ + VALUE args = rb_ary_new3(3, proc, passed, arity); + rb_proc_t *procp; + int is_lambda; + + GetProcPtr(proc, procp); + is_lambda = procp->is_lambda; + rb_ary_freeze(passed); + rb_ary_freeze(args); + proc = rb_proc_new(curry, args); + GetProcPtr(proc, procp); + procp->is_lambda = is_lambda; + return proc; +} + +static VALUE +curry(VALUE dummy, VALUE args, int argc, VALUE *argv, VALUE passed_proc) +{ + VALUE proc, passed, arity; + proc = RARRAY_PTR(args)[0]; + passed = RARRAY_PTR(args)[1]; + arity = RARRAY_PTR(args)[2]; + + passed = rb_ary_plus(passed, rb_ary_new4(argc, argv)); + rb_ary_freeze(passed); + + if (RARRAY_LEN(passed) < FIX2INT(arity)) { + if (!NIL_P(passed_proc)) { + rb_warn("given block not used"); + } + arity = make_curry_proc(proc, passed, arity); + return arity; + } + else { + return rb_proc_call_with_block(proc, check_argc(RARRAY_LEN(passed)), + RARRAY_PTR(passed), passed_proc); + } +} + + /* + * call-seq: + * prc.curry => a_proc + * prc.curry(arity) => a_proc + * + * Returns a curried proc. If the optional arity argument is given, + * it determines the number of arguments. + * A curried proc receives some arguments. If a sufficient number of + * arguments are supplied, it passes the supplied arguments to the original + * proc and returns the result. Otherwise, returns another curried proc that + * takes the rest of arguments. + * + * b = proc {|x, y, z| (x||0) + (y||0) + (z||0) } + * p b.curry[1][2][3] #=> 6 + * p b.curry[1, 2][3, 4] #=> 6 + * p b.curry(5)[1][2][3][4][5] #=> 6 + * p b.curry(5)[1, 2][3, 4][5] #=> 6 + * p b.curry(1)[1] #=> 1 + * + * b = proc {|x, y, z, *w| (x||0) + (y||0) + (z||0) + w.inject(0, &:+) } + * p b.curry[1][2][3] #=> 6 + * p b.curry[1, 2][3, 4] #=> 10 + * p b.curry(5)[1][2][3][4][5] #=> 15 + * p b.curry(5)[1, 2][3, 4][5] #=> 15 + * p b.curry(1)[1] #=> 1 + * + * b = lambda {|x, y, z| (x||0) + (y||0) + (z||0) } + * p b.curry[1][2][3] #=> 6 + * p b.curry[1, 2][3, 4] #=> wrong number of arguments (4 or 3) + * p b.curry(5) #=> wrong number of arguments (5 or 3) + * p b.curry(1) #=> wrong number of arguments (1 or 3) + * + * b = lambda {|x, y, z, *w| (x||0) + (y||0) + (z||0) + w.inject(0, &:+) } + * p b.curry[1][2][3] #=> 6 + * p b.curry[1, 2][3, 4] #=> 10 + * p b.curry(5)[1][2][3][4][5] #=> 15 + * p b.curry(5)[1, 2][3, 4][5] #=> 15 + * p b.curry(1) #=> wrong number of arguments (1 or 3) + * + * b = proc { :foo } + * p b.curry[] #=> :foo + */ +static VALUE +proc_curry(int argc, VALUE *argv, VALUE self) +{ + int sarity, marity = FIX2INT(proc_arity(self)); + VALUE arity, opt = Qfalse; + + if (marity < 0) { + marity = -marity - 1; + opt = Qtrue; + } + + rb_scan_args(argc, argv, "01", &arity); + if (NIL_P(arity)) { + arity = INT2FIX(marity); + } + else { + sarity = FIX2INT(arity); + if (proc_lambda_p(self) && (sarity < marity || (sarity > marity && !opt))) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", sarity, marity); + } + } + + return make_curry_proc(self, rb_ary_new(), arity); +} + +/* + * Proc objects are blocks of code that have been bound to + * a set of local variables. Once bound, the code may be called in + * different contexts and still access those variables. + * + * def gen_times(factor) + * return Proc.new {|n| n*factor } + * end + * + * times3 = gen_times(3) + * times5 = gen_times(5) + * + * times3.call(12) #=> 36 + * times5.call(5) #=> 25 + * times3.call(times5.call(4)) #=> 60 + * + */ + +void +Init_Proc(void) +{ + /* Proc */ + rb_cProc = rb_define_class("Proc", rb_cObject); + rb_undef_alloc_func(rb_cProc); + rb_define_singleton_method(rb_cProc, "new", rb_proc_s_new, -1); + rb_define_method(rb_cProc, "call", proc_call, -1); + rb_define_method(rb_cProc, "[]", proc_call, -1); + rb_define_method(rb_cProc, "===", proc_call, -1); + rb_define_method(rb_cProc, "yield", proc_call, -1); + rb_define_method(rb_cProc, "to_proc", proc_to_proc, 0); + rb_define_method(rb_cProc, "arity", proc_arity, 0); + rb_define_method(rb_cProc, "clone", proc_clone, 0); + rb_define_method(rb_cProc, "dup", proc_dup, 0); + rb_define_method(rb_cProc, "==", proc_eq, 1); + rb_define_method(rb_cProc, "eql?", proc_eq, 1); + rb_define_method(rb_cProc, "hash", proc_hash, 0); + rb_define_method(rb_cProc, "to_s", proc_to_s, 0); + rb_define_method(rb_cProc, "lambda?", proc_lambda_p, 0); + rb_define_method(rb_cProc, "binding", proc_binding, 0); + rb_define_method(rb_cProc, "curry", proc_curry, -1); + rb_define_method(rb_cProc, "source_location", rb_proc_location, 0); + + /* Exceptions */ + rb_eLocalJumpError = rb_define_class("LocalJumpError", rb_eStandardError); + rb_define_method(rb_eLocalJumpError, "exit_value", localjump_xvalue, 0); + rb_define_method(rb_eLocalJumpError, "reason", localjump_reason, 0); + + rb_eSysStackError = rb_define_class("SystemStackError", rb_eException); + sysstack_error = rb_exc_new3(rb_eSysStackError, + rb_obj_freeze(rb_str_new2("stack level too deep"))); + OBJ_TAINT(sysstack_error); + OBJ_FREEZE(sysstack_error); + + /* utility functions */ + rb_define_global_function("proc", rb_block_proc, 0); + rb_define_global_function("lambda", proc_lambda, 0); + + /* Method */ + rb_cMethod = rb_define_class("Method", rb_cObject); + rb_undef_alloc_func(rb_cMethod); + rb_undef_method(CLASS_OF(rb_cMethod), "new"); + rb_define_method(rb_cMethod, "==", method_eq, 1); + rb_define_method(rb_cMethod, "eql?", method_eq, 1); + rb_define_method(rb_cMethod, "hash", method_hash, 0); + rb_define_method(rb_cMethod, "clone", method_clone, 0); + rb_define_method(rb_cMethod, "call", rb_method_call, -1); + rb_define_method(rb_cMethod, "[]", rb_method_call, -1); + rb_define_method(rb_cMethod, "arity", method_arity_m, 0); + rb_define_method(rb_cMethod, "inspect", method_inspect, 0); + rb_define_method(rb_cMethod, "to_s", method_inspect, 0); + rb_define_method(rb_cMethod, "to_proc", method_proc, 0); + rb_define_method(rb_cMethod, "receiver", method_receiver, 0); + rb_define_method(rb_cMethod, "name", method_name, 0); + rb_define_method(rb_cMethod, "owner", method_owner, 0); + rb_define_method(rb_cMethod, "unbind", method_unbind, 0); + rb_define_method(rb_cMethod, "source_location", rb_method_location, 0); + rb_define_method(rb_mKernel, "method", rb_obj_method, 1); + rb_define_method(rb_mKernel, "public_method", rb_obj_public_method, 1); + + /* UnboundMethod */ + rb_cUnboundMethod = rb_define_class("UnboundMethod", rb_cObject); + rb_undef_alloc_func(rb_cUnboundMethod); + rb_undef_method(CLASS_OF(rb_cUnboundMethod), "new"); + rb_define_method(rb_cUnboundMethod, "==", method_eq, 1); + rb_define_method(rb_cUnboundMethod, "eql?", method_eq, 1); + rb_define_method(rb_cUnboundMethod, "hash", method_hash, 0); + rb_define_method(rb_cUnboundMethod, "clone", method_clone, 0); + rb_define_method(rb_cUnboundMethod, "arity", method_arity_m, 0); + rb_define_method(rb_cUnboundMethod, "inspect", method_inspect, 0); + rb_define_method(rb_cUnboundMethod, "to_s", method_inspect, 0); + rb_define_method(rb_cUnboundMethod, "name", method_name, 0); + rb_define_method(rb_cUnboundMethod, "owner", method_owner, 0); + rb_define_method(rb_cUnboundMethod, "bind", umethod_bind, 1); + rb_define_method(rb_cUnboundMethod, "source_location", rb_method_location, 0); + + /* Module#*_method */ + rb_define_method(rb_cModule, "instance_method", rb_mod_instance_method, 1); + rb_define_method(rb_cModule, "public_instance_method", rb_mod_public_instance_method, 1); + rb_define_private_method(rb_cModule, "define_method", rb_mod_define_method, -1); + + /* Kernel */ + rb_define_method(rb_mKernel, "define_singleton_method", rb_obj_define_method, -1); +} + +/* + * Objects of class Binding encapsulate the execution + * context at some particular place in the code and retain this context + * for future use. The variables, methods, value of self, + * and possibly an iterator block that can be accessed in this context + * are all retained. Binding objects can be created using + * Kernel#binding, and are made available to the callback + * of Kernel#set_trace_func. + * + * These binding objects can be passed as the second argument of the + * Kernel#eval method, establishing an environment for the + * evaluation. + * + * class Demo + * def initialize(n) + * @secret = n + * end + * def getBinding + * return binding() + * end + * end + * + * k1 = Demo.new(99) + * b1 = k1.getBinding + * k2 = Demo.new(-3) + * b2 = k2.getBinding + * + * eval("@secret", b1) #=> 99 + * eval("@secret", b2) #=> -3 + * eval("@secret") #=> nil + * + * Binding objects have no class-specific methods. + * + */ + +void +Init_Binding(void) +{ + rb_cBinding = rb_define_class("Binding", rb_cObject); + rb_undef_alloc_func(rb_cBinding); + rb_undef_method(CLASS_OF(rb_cBinding), "new"); + rb_define_method(rb_cBinding, "clone", binding_clone, 0); + rb_define_method(rb_cBinding, "dup", binding_dup, 0); + rb_define_method(rb_cBinding, "eval", bind_eval, -1); + rb_define_global_function("binding", rb_f_binding, 0); +} + diff --git a/process.c b/process.c new file mode 100644 index 0000000..909546a --- /dev/null +++ b/process.c @@ -0,0 +1,5243 @@ +/********************************************************************** + + process.c - + + $Author: yugui $ + created at: Tue Aug 10 14:30:50 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/io.h" +#include "ruby/util.h" +#include "vm_core.h" + +#include +#include +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif + +#include +#include + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +struct timeval rb_time_interval(VALUE); + +#ifdef HAVE_SYS_WAIT_H +# include +#endif +#ifdef HAVE_SYS_RESOURCE_H +# include +#endif +#ifdef HAVE_SYS_PARAM_H +# include +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif +#include "ruby/st.h" + +#ifdef __EMX__ +#undef HAVE_GETPGRP +#endif + +#include + +#ifdef HAVE_SYS_TIMES_H +#include +#endif + +#ifdef HAVE_GRP_H +#include +#endif + +#if defined(HAVE_TIMES) || defined(_WIN32) +static VALUE rb_cProcessTms; +#endif + +#ifndef WIFEXITED +#define WIFEXITED(w) (((w) & 0xff) == 0) +#endif +#ifndef WIFSIGNALED +#define WIFSIGNALED(w) (((w) & 0x7f) > 0 && (((w) & 0x7f) < 0x7f)) +#endif +#ifndef WIFSTOPPED +#define WIFSTOPPED(w) (((w) & 0xff) == 0x7f) +#endif +#ifndef WEXITSTATUS +#define WEXITSTATUS(w) (((w) >> 8) & 0xff) +#endif +#ifndef WTERMSIG +#define WTERMSIG(w) ((w) & 0x7f) +#endif +#ifndef WSTOPSIG +#define WSTOPSIG WEXITSTATUS +#endif + +#if defined(__APPLE__) && ( defined(__MACH__) || defined(__DARWIN__) ) && !defined(__MacOS_X__) +#define __MacOS_X__ 1 +#endif + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) +#define HAVE_44BSD_SETUID 1 +#define HAVE_44BSD_SETGID 1 +#endif + +#ifdef __NetBSD__ +#undef HAVE_SETRUID +#undef HAVE_SETRGID +#endif + +#ifdef BROKEN_SETREUID +#define setreuid ruby_setreuid +#endif +#ifdef BROKEN_SETREGID +#define setregid ruby_setregid +#endif + +#if defined(HAVE_44BSD_SETUID) || defined(__MacOS_X__) +#if !defined(USE_SETREUID) && !defined(BROKEN_SETREUID) +#define OBSOLETE_SETREUID 1 +#endif +#if !defined(USE_SETREGID) && !defined(BROKEN_SETREGID) +#define OBSOLETE_SETREGID 1 +#endif +#endif + +#if SIZEOF_RLIM_T == SIZEOF_INT +# define RLIM2NUM(v) UINT2NUM(v) +# define NUM2RLIM(v) NUM2UINT(v) +#elif SIZEOF_RLIM_T == SIZEOF_LONG +# define RLIM2NUM(v) ULONG2NUM(v) +# define NUM2RLIM(v) NUM2ULONG(v) +#elif SIZEOF_RLIM_T == SIZEOF_LONG_LONG +# define RLIM2NUM(v) ULL2NUM(v) +# define NUM2RLIM(v) NUM2ULL(v) +#endif + +#define preserving_errno(stmts) \ + do {int saved_errno = errno; stmts; errno = saved_errno;} while (0) + + +/* + * call-seq: + * Process.pid => fixnum + * + * Returns the process id of this process. Not available on all + * platforms. + * + * Process.pid #=> 27415 + */ + +static VALUE +get_pid(void) +{ + rb_secure(2); + return PIDT2NUM(getpid()); +} + + +/* + * call-seq: + * Process.ppid => fixnum + * + * Returns the process id of the parent of this process. Returns + * untrustworthy value on Win32/64. Not available on all platforms. + * + * puts "I am #{Process.pid}" + * Process.fork { puts "Dad is #{Process.ppid}" } + * + * produces: + * + * I am 27417 + * Dad is 27417 + */ + +static VALUE +get_ppid(void) +{ + rb_secure(2); + return PIDT2NUM(getppid()); +} + + +/********************************************************************* + * + * Document-class: Process::Status + * + * Process::Status encapsulates the information on the + * status of a running or terminated system process. The built-in + * variable $? is either +nil+ or a + * Process::Status object. + * + * fork { exit 99 } #=> 26557 + * Process.wait #=> 26557 + * $?.class #=> Process::Status + * $?.to_i #=> 25344 + * $? >> 8 #=> 99 + * $?.stopped? #=> false + * $?.exited? #=> true + * $?.exitstatus #=> 99 + * + * Posix systems record information on processes using a 16-bit + * integer. The lower bits record the process status (stopped, + * exited, signaled) and the upper bits possibly contain additional + * information (for example the program's return code in the case of + * exited processes). Pre Ruby 1.8, these bits were exposed directly + * to the Ruby program. Ruby now encapsulates these in a + * Process::Status object. To maximize compatibility, + * however, these objects retain a bit-oriented interface. In the + * descriptions that follow, when we talk about the integer value of + * _stat_, we're referring to this 16 bit value. + */ + +static VALUE rb_cProcessStatus; + +VALUE +rb_last_status_get(void) +{ + return GET_THREAD()->last_status; +} + +void +rb_last_status_set(int status, rb_pid_t pid) +{ + rb_thread_t *th = GET_THREAD(); + th->last_status = rb_obj_alloc(rb_cProcessStatus); + rb_iv_set(th->last_status, "status", INT2FIX(status)); + rb_iv_set(th->last_status, "pid", PIDT2NUM(pid)); +} + +static void +rb_last_status_clear(void) +{ + GET_THREAD()->last_status = Qnil; +} + +/* + * call-seq: + * stat.to_i => fixnum + * stat.to_int => fixnum + * + * Returns the bits in _stat_ as a Fixnum. Poking + * around in these bits is platform dependent. + * + * fork { exit 0xab } #=> 26566 + * Process.wait #=> 26566 + * sprintf('%04x', $?.to_i) #=> "ab00" + */ + +static VALUE +pst_to_i(VALUE st) +{ + return rb_iv_get(st, "status"); +} + +#define PST2INT(st) NUM2INT(pst_to_i(st)) + +/* + * call-seq: + * stat.pid => fixnum + * + * Returns the process ID that this status object represents. + * + * fork { exit } #=> 26569 + * Process.wait #=> 26569 + * $?.pid #=> 26569 + */ + +static VALUE +pst_pid(VALUE st) +{ + return rb_attr_get(st, rb_intern("pid")); +} + +static void +pst_message(VALUE str, rb_pid_t pid, int status) +{ + rb_str_catf(str, "pid %ld", (long)pid); + if (WIFSTOPPED(status)) { + int stopsig = WSTOPSIG(status); + const char *signame = ruby_signal_name(stopsig); + if (signame) { + rb_str_catf(str, " stopped SIG%s (signal %d)", signame, stopsig); + } + else { + rb_str_catf(str, " stopped signal %d", stopsig); + } + } + if (WIFSIGNALED(status)) { + int termsig = WTERMSIG(status); + const char *signame = ruby_signal_name(termsig); + if (signame) { + rb_str_catf(str, " SIG%s (signal %d)", signame, termsig); + } + else { + rb_str_catf(str, " signal %d", termsig); + } + } + if (WIFEXITED(status)) { + rb_str_catf(str, " exit %d", WEXITSTATUS(status)); + } +#ifdef WCOREDUMP + if (WCOREDUMP(status)) { + rb_str_cat2(str, " (core dumped)"); + } +#endif +} + + +/* + * call-seq: + * stat.to_s => string + * + * Show pid and exit status as a string. + */ + +static VALUE +pst_to_s(VALUE st) +{ + rb_pid_t pid; + int status; + VALUE str; + + pid = NUM2LONG(pst_pid(st)); + status = PST2INT(st); + + str = rb_str_buf_new(0); + pst_message(str, pid, status); + return str; +} + + +/* + * call-seq: + * stat.inspect => string + * + * Override the inspection method. + */ + +static VALUE +pst_inspect(VALUE st) +{ + rb_pid_t pid; + int status; + VALUE vpid, str; + + vpid = pst_pid(st); + if (NIL_P(vpid)) { + return rb_sprintf("#<%s: uninitialized>", rb_class2name(CLASS_OF(st))); + } + pid = NUM2LONG(vpid); + status = PST2INT(st); + + str = rb_sprintf("#<%s: ", rb_class2name(CLASS_OF(st))); + pst_message(str, pid, status); + rb_str_cat2(str, ">"); + return str; +} + + +/* + * call-seq: + * stat == other => true or false + * + * Returns +true+ if the integer value of _stat_ + * equals other. + */ + +static VALUE +pst_equal(VALUE st1, VALUE st2) +{ + if (st1 == st2) return Qtrue; + return rb_equal(pst_to_i(st1), st2); +} + + +/* + * call-seq: + * stat & num => fixnum + * + * Logical AND of the bits in _stat_ with num. + * + * fork { exit 0x37 } + * Process.wait + * sprintf('%04x', $?.to_i) #=> "3700" + * sprintf('%04x', $? & 0x1e00) #=> "1600" + */ + +static VALUE +pst_bitand(VALUE st1, VALUE st2) +{ + int status = PST2INT(st1) & NUM2INT(st2); + + return INT2NUM(status); +} + + +/* + * call-seq: + * stat >> num => fixnum + * + * Shift the bits in _stat_ right num places. + * + * fork { exit 99 } #=> 26563 + * Process.wait #=> 26563 + * $?.to_i #=> 25344 + * $? >> 8 #=> 99 + */ + +static VALUE +pst_rshift(VALUE st1, VALUE st2) +{ + int status = PST2INT(st1) >> NUM2INT(st2); + + return INT2NUM(status); +} + + +/* + * call-seq: + * stat.stopped? => true or false + * + * Returns +true+ if this process is stopped. This is only + * returned if the corresponding wait call had the + * WUNTRACED flag set. + */ + +static VALUE +pst_wifstopped(VALUE st) +{ + int status = PST2INT(st); + + if (WIFSTOPPED(status)) + return Qtrue; + else + return Qfalse; +} + + +/* + * call-seq: + * stat.stopsig => fixnum or nil + * + * Returns the number of the signal that caused _stat_ to stop + * (or +nil+ if self is not stopped). + */ + +static VALUE +pst_wstopsig(VALUE st) +{ + int status = PST2INT(st); + + if (WIFSTOPPED(status)) + return INT2NUM(WSTOPSIG(status)); + return Qnil; +} + + +/* + * call-seq: + * stat.signaled? => true or false + * + * Returns +true+ if _stat_ terminated because of + * an uncaught signal. + */ + +static VALUE +pst_wifsignaled(VALUE st) +{ + int status = PST2INT(st); + + if (WIFSIGNALED(status)) + return Qtrue; + else + return Qfalse; +} + + +/* + * call-seq: + * stat.termsig => fixnum or nil + * + * Returns the number of the signal that caused _stat_ to + * terminate (or +nil+ if self was not terminated by an + * uncaught signal). + */ + +static VALUE +pst_wtermsig(VALUE st) +{ + int status = PST2INT(st); + + if (WIFSIGNALED(status)) + return INT2NUM(WTERMSIG(status)); + return Qnil; +} + + +/* + * call-seq: + * stat.exited? => true or false + * + * Returns +true+ if _stat_ exited normally (for + * example using an exit() call or finishing the + * program). + */ + +static VALUE +pst_wifexited(VALUE st) +{ + int status = PST2INT(st); + + if (WIFEXITED(status)) + return Qtrue; + else + return Qfalse; +} + + +/* + * call-seq: + * stat.exitstatus => fixnum or nil + * + * Returns the least significant eight bits of the return code of + * _stat_. Only available if exited? is + * +true+. + * + * fork { } #=> 26572 + * Process.wait #=> 26572 + * $?.exited? #=> true + * $?.exitstatus #=> 0 + * + * fork { exit 99 } #=> 26573 + * Process.wait #=> 26573 + * $?.exited? #=> true + * $?.exitstatus #=> 99 + */ + +static VALUE +pst_wexitstatus(VALUE st) +{ + int status = PST2INT(st); + + if (WIFEXITED(status)) + return INT2NUM(WEXITSTATUS(status)); + return Qnil; +} + + +/* + * call-seq: + * stat.success? => true, false or nil + * + * Returns +true+ if _stat_ is successful, +false+ if not. + * Returns +nil+ if exited? is not +true+. + */ + +static VALUE +pst_success_p(VALUE st) +{ + int status = PST2INT(st); + + if (!WIFEXITED(status)) + return Qnil; + return WEXITSTATUS(status) == EXIT_SUCCESS ? Qtrue : Qfalse; +} + + +/* + * call-seq: + * stat.coredump? => true or false + * + * Returns +true+ if _stat_ generated a coredump + * when it terminated. Not available on all platforms. + */ + +static VALUE +pst_wcoredump(VALUE st) +{ +#ifdef WCOREDUMP + int status = PST2INT(st); + + if (WCOREDUMP(status)) + return Qtrue; + else + return Qfalse; +#else + return Qfalse; +#endif +} + +#if !defined(HAVE_WAITPID) && !defined(HAVE_WAIT4) +#define NO_WAITPID +static st_table *pid_tbl; + +struct wait_data { + rb_pid_t pid; + int status; +}; + +static int +wait_each(rb_pid_t pid, int status, struct wait_data *data) +{ + if (data->status != -1) return ST_STOP; + + data->pid = pid; + data->status = status; + return ST_DELETE; +} + +static int +waitall_each(rb_pid_t pid, int status, VALUE ary) +{ + rb_last_status_set(status, pid); + rb_ary_push(ary, rb_assoc_new(PIDT2NUM(pid), rb_last_status_get())); + return ST_DELETE; +} +#else +struct waitpid_arg { + rb_pid_t pid; + int *st; + int flags; +}; +#endif + +static VALUE +rb_waitpid_blocking(void *data) +{ + rb_pid_t result; +#ifndef NO_WAITPID + struct waitpid_arg *arg = data; +#endif + +#if defined NO_WAITPID + result = wait(data); +#elif defined HAVE_WAITPID + result = waitpid(arg->pid, arg->st, arg->flags); +#else /* HAVE_WAIT4 */ + result = wait4(arg->pid, arg->st, arg->flags, NULL); +#endif + + return (VALUE)result; +} + +rb_pid_t +rb_waitpid(rb_pid_t pid, int *st, int flags) +{ + rb_pid_t result; +#ifndef NO_WAITPID + struct waitpid_arg arg; + + retry: + arg.pid = pid; + arg.st = st; + arg.flags = flags; + result = (rb_pid_t)rb_thread_blocking_region(rb_waitpid_blocking, &arg, + RUBY_UBF_PROCESS, 0); + if (result < 0) { + if (errno == EINTR) { + RUBY_VM_CHECK_INTS(); + goto retry; + } + return (rb_pid_t)-1; + } +#else /* NO_WAITPID */ + if (pid_tbl) { + st_data_t status, piddata = (st_data_t)pid; + if (pid == (rb_pid_t)-1) { + struct wait_data data; + data.pid = (rb_pid_t)-1; + data.status = -1; + st_foreach(pid_tbl, wait_each, (st_data_t)&data); + if (data.status != -1) { + rb_last_status_set(data.status, data.pid); + return data.pid; + } + } + else if (st_delete(pid_tbl, &piddata, &status)) { + rb_last_status_set(*st = (int)status, pid); + return pid; + } + } + + if (flags) { + rb_raise(rb_eArgError, "can't do waitpid with flags"); + } + + for (;;) { + result = (rb_pid_t)rb_thread_blocking_region(rb_waitpid_blocking, + st, RUBY_UBF_PROCESS, 0); + if (result < 0) { + if (errno == EINTR) { + rb_thread_schedule(); + continue; + } + return (rb_pid_t)-1; + } + if (result == pid || pid == (rb_pid_t)-1) { + break; + } + if (!pid_tbl) + pid_tbl = st_init_numtable(); + st_insert(pid_tbl, pid, (st_data_t)st); + if (!rb_thread_alone()) rb_thread_schedule(); + } +#endif + if (result > 0) { + rb_last_status_set(*st, result); + } + return result; +} + + +/* [MG]:FIXME: I wasn't sure how this should be done, since ::wait() + has historically been documented as if it didn't take any arguments + despite the fact that it's just an alias for ::waitpid(). The way I + have it below is more truthful, but a little confusing. + + I also took the liberty of putting in the pid values, as they're + pretty useful, and it looked as if the original 'ri' output was + supposed to contain them after "[...]depending on the value of + aPid:". + + The 'ansi' and 'bs' formats of the ri output don't display the + definition list for some reason, but the plain text one does. + */ + +/* + * call-seq: + * Process.wait() => fixnum + * Process.wait(pid=-1, flags=0) => fixnum + * Process.waitpid(pid=-1, flags=0) => fixnum + * + * Waits for a child process to exit, returns its process id, and + * sets $? to a Process::Status object + * containing information on that process. Which child it waits on + * depends on the value of _pid_: + * + * > 0:: Waits for the child whose process ID equals _pid_. + * + * 0:: Waits for any child whose process group ID equals that of the + * calling process. + * + * -1:: Waits for any child process (the default if no _pid_ is + * given). + * + * < -1:: Waits for any child whose process group ID equals the absolute + * value of _pid_. + * + * The _flags_ argument may be a logical or of the flag values + * Process::WNOHANG (do not block if no child available) + * or Process::WUNTRACED (return stopped children that + * haven't been reported). Not all flags are available on all + * platforms, but a flag value of zero will work on all platforms. + * + * Calling this method raises a SystemError if there are + * no child processes. Not available on all platforms. + * + * include Process + * fork { exit 99 } #=> 27429 + * wait #=> 27429 + * $?.exitstatus #=> 99 + * + * pid = fork { sleep 3 } #=> 27440 + * Time.now #=> 2008-03-08 19:56:16 +0900 + * waitpid(pid, Process::WNOHANG) #=> nil + * Time.now #=> 2008-03-08 19:56:16 +0900 + * waitpid(pid, 0) #=> 27440 + * Time.now #=> 2008-03-08 19:56:19 +0900 + */ + +static VALUE +proc_wait(int argc, VALUE *argv) +{ + VALUE vpid, vflags; + rb_pid_t pid; + int flags, status; + + rb_secure(2); + flags = 0; + if (argc == 0) { + pid = -1; + } + else { + rb_scan_args(argc, argv, "02", &vpid, &vflags); + pid = NUM2PIDT(vpid); + if (argc == 2 && !NIL_P(vflags)) { + flags = NUM2UINT(vflags); + } + } + if ((pid = rb_waitpid(pid, &status, flags)) < 0) + rb_sys_fail(0); + if (pid == 0) { + rb_last_status_clear(); + return Qnil; + } + return PIDT2NUM(pid); +} + + +/* + * call-seq: + * Process.wait2(pid=-1, flags=0) => [pid, status] + * Process.waitpid2(pid=-1, flags=0) => [pid, status] + * + * Waits for a child process to exit (see Process::waitpid for exact + * semantics) and returns an array containing the process id and the + * exit status (a Process::Status object) of that + * child. Raises a SystemError if there are no child + * processes. + * + * Process.fork { exit 99 } #=> 27437 + * pid, status = Process.wait2 + * pid #=> 27437 + * status.exitstatus #=> 99 + */ + +static VALUE +proc_wait2(int argc, VALUE *argv) +{ + VALUE pid = proc_wait(argc, argv); + if (NIL_P(pid)) return Qnil; + return rb_assoc_new(pid, rb_last_status_get()); +} + + +/* + * call-seq: + * Process.waitall => [ [pid1,status1], ...] + * + * Waits for all children, returning an array of + * _pid_/_status_ pairs (where _status_ is a + * Process::Status object). + * + * fork { sleep 0.2; exit 2 } #=> 27432 + * fork { sleep 0.1; exit 1 } #=> 27433 + * fork { exit 0 } #=> 27434 + * p Process.waitall + * + * produces: + * + * [[27434, #], + * [27433, #], + * [27432, #]] + */ + +static VALUE +proc_waitall(void) +{ + VALUE result; + rb_pid_t pid; + int status; + + rb_secure(2); + result = rb_ary_new(); +#ifdef NO_WAITPID + if (pid_tbl) { + st_foreach(pid_tbl, waitall_each, result); + } +#else + rb_last_status_clear(); +#endif + + for (pid = -1;;) { +#ifdef NO_WAITPID + pid = wait(&status); +#else + pid = rb_waitpid(-1, &status, 0); +#endif + if (pid == -1) { + if (errno == ECHILD) + break; +#ifdef NO_WAITPID + if (errno == EINTR) { + rb_thread_schedule(); + continue; + } +#endif + rb_sys_fail(0); + } +#ifdef NO_WAITPID + rb_last_status_set(status, pid); +#endif + rb_ary_push(result, rb_assoc_new(PIDT2NUM(pid), rb_last_status_get())); + } + return result; +} + +static inline ID +id_pid(void) +{ + ID pid; + CONST_ID(pid, "pid"); + return pid; +} + +static VALUE +detach_process_pid(VALUE thread) +{ + return rb_thread_local_aref(thread, id_pid()); +} + +static VALUE +detach_process_watcher(void *arg) +{ + rb_pid_t cpid, pid = (rb_pid_t)(VALUE)arg; + int status; + + while ((cpid = rb_waitpid(pid, &status, 0)) == 0) { + /* wait while alive */ + } + return rb_last_status_get(); +} + +VALUE +rb_detach_process(rb_pid_t pid) +{ + VALUE watcher = rb_thread_create(detach_process_watcher, (void*)(VALUE)pid); + rb_thread_local_aset(watcher, id_pid(), PIDT2NUM(pid)); + rb_define_singleton_method(watcher, "pid", detach_process_pid, 0); + return watcher; +} + + +/* + * call-seq: + * Process.detach(pid) => thread + * + * Some operating systems retain the status of terminated child + * processes until the parent collects that status (normally using + * some variant of wait(). If the parent never collects + * this status, the child stays around as a zombie process. + * Process::detach prevents this by setting up a + * separate Ruby thread whose sole job is to reap the status of the + * process _pid_ when it terminates. Use detach + * only when you do not intent to explicitly wait for the child to + * terminate. + * + * The waiting thread returns the exit status of the detached process + * when it terminates, so you can use Thread#join to + * know the result. If specified _pid_ is not a valid child process + * ID, the thread returns +nil+ immediately. + * + * In this first example, we don't reap the first child process, so + * it appears as a zombie in the process status display. + * + * p1 = fork { sleep 0.1 } + * p2 = fork { sleep 0.2 } + * Process.waitpid(p2) + * sleep 2 + * system("ps -ho pid,state -p #{p1}") + * + * produces: + * + * 27389 Z + * + * In the next example, Process::detach is used to reap + * the child automatically. + * + * p1 = fork { sleep 0.1 } + * p2 = fork { sleep 0.2 } + * Process.detach(p1) + * Process.waitpid(p2) + * sleep 2 + * system("ps -ho pid,state -p #{p1}") + * + * (produces no output) + */ + +static VALUE +proc_detach(VALUE obj, VALUE pid) +{ + rb_secure(2); + return rb_detach_process(NUM2PIDT(pid)); +} + +#ifndef HAVE_STRING_H +char *strtok(); +#endif + +void rb_thread_stop_timer_thread(void); +void rb_thread_start_timer_thread(void); +void rb_thread_reset_timer_thread(void); + +static int forked_child = 0; + +#define before_exec() \ + (rb_enable_interrupt(), (forked_child ? 0 : (rb_thread_stop_timer_thread(), 1))) +#define after_exec() \ + (rb_thread_reset_timer_thread(), rb_thread_start_timer_thread(), forked_child = 0, rb_disable_interrupt()) +#define before_fork() before_exec() +#define after_fork() (GET_THREAD()->thrown_errinfo = 0, after_exec()) + +#include "dln.h" + +static void +security(const char *str) +{ + if (rb_env_path_tainted()) { + if (rb_safe_level() > 0) { + rb_raise(rb_eSecurityError, "Insecure PATH - %s", str); + } + } +} + +static int +proc_exec_v(char **argv, const char *prog) +{ + char fbuf[MAXPATHLEN]; + + if (!prog) + prog = argv[0]; + prog = dln_find_exe_r(prog, 0, fbuf, sizeof(fbuf)); + if (!prog) { + errno = ENOENT; + return -1; + } + +#if defined(__EMX__) || defined(OS2) + { +#define COMMAND "cmd.exe" + char *extension; + + if ((extension = strrchr(prog, '.')) != NULL && STRCASECMP(extension, ".bat") == 0) { + char **new_argv; + char *p; + int n; + + for (n = 0; argv[n]; n++) + /* no-op */; + new_argv = ALLOCA_N(char*, n + 2); + for (; n > 0; n--) + new_argv[n + 1] = argv[n]; + new_argv[1] = strcpy(ALLOCA_N(char, strlen(argv[0]) + 1), argv[0]); + for (p = new_argv[1]; *p != '\0'; p++) + if (*p == '/') + *p = '\\'; + new_argv[0] = COMMAND; + argv = new_argv; + prog = dln_find_exe_r(argv[0], 0, fbuf, sizeof(fbuf)); + if (!prog) { + errno = ENOENT; + return -1; + } + } + } +#endif /* __EMX__ */ + before_exec(); + execv(prog, argv); + preserving_errno(after_exec()); + return -1; +} + +int +rb_proc_exec_n(int argc, VALUE *argv, const char *prog) +{ + char **args; + int i; + + args = ALLOCA_N(char*, argc+1); + for (i=0; i()~&|\\$;'`\"\n",*s)) { +#if defined(__CYGWIN32__) || defined(__EMX__) + char fbuf[MAXPATHLEN]; + char *shell = dln_find_exe_r("sh", 0, fbuf, sizeof(fbuf)); + int status = -1; + before_exec(); + if (shell) + execl(shell, "sh", "-c", str, (char *) NULL); + else + status = system(str); + after_exec(); + if (status != -1) + exit(status); +#else + before_exec(); + execl("/bin/sh", "sh", "-c", str, (char *)NULL); + preserving_errno(after_exec()); +#endif + return -1; + } + } + a = argv = ALLOCA_N(char*, (s-str)/2+2); + ss = ALLOCA_N(char, s-str+1); + memcpy(ss, str, s-str); + ss[s-str] = '\0'; + if ((*a++ = strtok(ss, " \t")) != 0) { + while ((t = strtok(NULL, " \t")) != 0) { + *a++ = t; + } + *a = NULL; + } + if (argv[0]) { + return proc_exec_v(argv, 0); + } + errno = ENOENT; +#endif /* _WIN32 */ + return -1; +} + +#if defined(_WIN32) +#define HAVE_SPAWNV 1 +#endif + +#if !defined(HAVE_FORK) && defined(HAVE_SPAWNV) +#if defined(_WIN32) +#define proc_spawn_v(argv, prog) rb_w32_aspawn(P_NOWAIT, prog, argv) +#else +static rb_pid_t +proc_spawn_v(char **argv, char *prog) +{ + char fbuf[MAXPATHLEN]; + rb_pid_t status; + + if (!prog) + prog = argv[0]; + security(prog); + prog = dln_find_exe_r(prog, 0, fbuf, sizeof(fbuf)); + if (!prog) + return -1; + + before_exec(); + status = spawnv(P_WAIT, prog, argv); + rb_last_status_set(status == -1 ? 127 : status, 0); + after_exec(); + return status; +} +#endif + +static rb_pid_t +proc_spawn_n(int argc, VALUE *argv, VALUE prog) +{ + char **args; + int i; + + args = ALLOCA_N(char*, argc + 1); + for (i = 0; i < argc; i++) { + args[i] = RSTRING_PTR(argv[i]); + } + args[i] = (char*) 0; + if (args[0]) + return proc_spawn_v(args, prog ? RSTRING_PTR(prog) : 0); + return -1; +} + +#if defined(_WIN32) +#define proc_spawn(str) rb_w32_spawn(P_NOWAIT, str, 0) +#else +static rb_pid_t +proc_spawn(char *str) +{ + char fbuf[MAXPATHLEN]; + char *s, *t; + char **argv, **a; + rb_pid_t status; + + for (s = str; *s; s++) { + if (*s != ' ' && !ISALPHA(*s) && strchr("*?{}[]<>()~&|\\$;'`\"\n",*s)) { + char *shell = dln_find_exe_r("sh", 0, fbuf, sizeof(fbuf)); + before_exec(); + status = shell?spawnl(P_WAIT,shell,"sh","-c",str,(char*)NULL):system(str); + rb_last_status_set(status == -1 ? 127 : status, 0); + after_exec(); + return status; + } + } + a = argv = ALLOCA_N(char*, (s - str) / 2 + 2); + s = ALLOCA_N(char, s - str + 1); + strcpy(s, str); + if (*a++ = strtok(s, " \t")) { + while (t = strtok(NULL, " \t")) + *a++ = t; + *a = NULL; + } + return argv[0] ? proc_spawn_v(argv, 0) : -1; +} +#endif +#endif + +static VALUE +hide_obj(VALUE obj) +{ + RBASIC(obj)->klass = 0; + return obj; +} + +enum { + EXEC_OPTION_PGROUP, + EXEC_OPTION_RLIMIT, + EXEC_OPTION_UNSETENV_OTHERS, + EXEC_OPTION_ENV, + EXEC_OPTION_CHDIR, + EXEC_OPTION_UMASK, + EXEC_OPTION_DUP2, + EXEC_OPTION_CLOSE, + EXEC_OPTION_OPEN, + EXEC_OPTION_CLOSE_OTHERS +}; + +static VALUE +check_exec_redirect_fd(VALUE v) +{ + VALUE tmp; + int fd; + if (FIXNUM_P(v)) { + fd = FIX2INT(v); + } + else if (!NIL_P(tmp = rb_check_convert_type(v, T_FILE, "IO", "to_io"))) { + rb_io_t *fptr; + GetOpenFile(tmp, fptr); + if (fptr->tied_io_for_writing) + rb_raise(rb_eArgError, "duplex IO redirection"); + fd = fptr->fd; + } + else { + rb_raise(rb_eArgError, "wrong exec redirect"); + } + if (fd < 0) { + rb_raise(rb_eArgError, "negative file descriptor"); + } + return INT2FIX(fd); +} + +static void +check_exec_redirect(VALUE key, VALUE val, VALUE options) +{ + int index; + VALUE ary, param; + VALUE path, flags, perm; + ID id; + + switch (TYPE(val)) { + case T_SYMBOL: + id = SYM2ID(val); + if (id == rb_intern("close")) { + index = EXEC_OPTION_CLOSE; + param = Qnil; + } + else { + rb_raise(rb_eArgError, "wrong exec redirect symbol: %s", + rb_id2name(id)); + } + break; + + case T_FILE: + val = check_exec_redirect_fd(val); + /* fall through */ + case T_FIXNUM: + index = EXEC_OPTION_DUP2; + param = val; + break; + + case T_ARRAY: + index = EXEC_OPTION_OPEN; + path = rb_ary_entry(val, 0); + FilePathValue(path); + flags = rb_ary_entry(val, 1); + if (NIL_P(flags)) + flags = INT2NUM(O_RDONLY); + else if (TYPE(flags) == T_STRING) + flags = INT2NUM(rb_io_modestr_oflags(StringValueCStr(flags))); + else + flags = rb_to_int(flags); + perm = rb_ary_entry(val, 2); + perm = NIL_P(perm) ? INT2FIX(0644) : rb_to_int(perm); + param = hide_obj(rb_ary_new3(3, hide_obj(rb_str_dup(path)), + flags, perm)); + break; + + case T_STRING: + index = EXEC_OPTION_OPEN; + path = val; + FilePathValue(path); + if (TYPE(key) == T_FILE) + key = check_exec_redirect_fd(key); + if (FIXNUM_P(key) && (FIX2INT(key) == 1 || FIX2INT(key) == 2)) + flags = INT2NUM(O_WRONLY|O_CREAT|O_TRUNC); + else + flags = INT2NUM(O_RDONLY); + perm = INT2FIX(0644); + param = hide_obj(rb_ary_new3(3, hide_obj(rb_str_dup(path)), + flags, perm)); + break; + + default: + rb_raise(rb_eArgError, "wrong exec redirect action"); + } + + ary = rb_ary_entry(options, index); + if (NIL_P(ary)) { + ary = hide_obj(rb_ary_new()); + rb_ary_store(options, index, ary); + } + if (TYPE(key) != T_ARRAY) { + VALUE fd = check_exec_redirect_fd(key); + rb_ary_push(ary, hide_obj(rb_assoc_new(fd, param))); + } + else { + int i, n=0; + for (i = 0 ; i < RARRAY_LEN(key); i++) { + VALUE v = RARRAY_PTR(key)[i]; + VALUE fd = check_exec_redirect_fd(v); + rb_ary_push(ary, hide_obj(rb_assoc_new(fd, param))); + n++; + } + } +} + +#ifdef RLIM2NUM +static int rlimit_type_by_lname(const char *name); +#endif + +int +rb_exec_arg_addopt(struct rb_exec_arg *e, VALUE key, VALUE val) +{ + VALUE options = e->options; + ID id; +#ifdef RLIM2NUM + int rtype; +#endif + + rb_secure(2); + + switch (TYPE(key)) { + case T_SYMBOL: + id = SYM2ID(key); +#ifdef HAVE_SETPGID + if (id == rb_intern("pgroup")) { + if (!NIL_P(rb_ary_entry(options, EXEC_OPTION_PGROUP))) { + rb_raise(rb_eArgError, "pgroup option specified twice"); + } + if (!RTEST(val)) + val = Qfalse; + else if (val == Qtrue) + val = INT2FIX(0); + else { + pid_t pgroup = NUM2PIDT(val); + if (pgroup < 0) { + rb_raise(rb_eArgError, "negative process group ID : %ld", (long)pgroup); + } + val = PIDT2NUM(pgroup); + } + rb_ary_store(options, EXEC_OPTION_PGROUP, val); + } + else +#endif +#ifdef RLIM2NUM + if (strncmp("rlimit_", rb_id2name(id), 7) == 0 && + (rtype = rlimit_type_by_lname(rb_id2name(id)+7)) != -1) { + VALUE ary = rb_ary_entry(options, EXEC_OPTION_RLIMIT); + VALUE tmp, softlim, hardlim; + if (NIL_P(ary)) { + ary = hide_obj(rb_ary_new()); + rb_ary_store(options, EXEC_OPTION_RLIMIT, ary); + } + tmp = rb_check_array_type(val); + if (!NIL_P(tmp)) { + if (RARRAY_LEN(tmp) == 1) + softlim = hardlim = rb_to_int(rb_ary_entry(tmp, 0)); + else if (RARRAY_LEN(tmp) == 2) { + softlim = rb_to_int(rb_ary_entry(tmp, 0)); + hardlim = rb_to_int(rb_ary_entry(tmp, 1)); + } + else { + rb_raise(rb_eArgError, "wrong exec rlimit option"); + } + } + else { + softlim = hardlim = rb_to_int(val); + } + tmp = hide_obj(rb_ary_new3(3, INT2NUM(rtype), softlim, hardlim)); + rb_ary_push(ary, tmp); + } + else +#endif + if (id == rb_intern("unsetenv_others")) { + if (!NIL_P(rb_ary_entry(options, EXEC_OPTION_UNSETENV_OTHERS))) { + rb_raise(rb_eArgError, "unsetenv_others option specified twice"); + } + val = RTEST(val) ? Qtrue : Qfalse; + rb_ary_store(options, EXEC_OPTION_UNSETENV_OTHERS, val); + } + else if (id == rb_intern("chdir")) { + if (!NIL_P(rb_ary_entry(options, EXEC_OPTION_CHDIR))) { + rb_raise(rb_eArgError, "chdir option specified twice"); + } + FilePathValue(val); + rb_ary_store(options, EXEC_OPTION_CHDIR, + hide_obj(rb_str_dup(val))); + } + else if (id == rb_intern("umask")) { + mode_t cmask = NUM2LONG(val); + if (!NIL_P(rb_ary_entry(options, EXEC_OPTION_UMASK))) { + rb_raise(rb_eArgError, "umask option specified twice"); + } + rb_ary_store(options, EXEC_OPTION_UMASK, LONG2NUM(cmask)); + } + else if (id == rb_intern("close_others")) { + if (!NIL_P(rb_ary_entry(options, EXEC_OPTION_CLOSE_OTHERS))) { + rb_raise(rb_eArgError, "close_others option specified twice"); + } + val = RTEST(val) ? Qtrue : Qfalse; + rb_ary_store(options, EXEC_OPTION_CLOSE_OTHERS, val); + } + else if (id == rb_intern("in")) { + key = INT2FIX(0); + goto redirect; + } + else if (id == rb_intern("out")) { + key = INT2FIX(1); + goto redirect; + } + else if (id == rb_intern("err")) { + key = INT2FIX(2); + goto redirect; + } + else { + rb_raise(rb_eArgError, "wrong exec option symbol: %s", + rb_id2name(id)); + } + break; + + case T_FIXNUM: + case T_FILE: + case T_ARRAY: +redirect: + check_exec_redirect(key, val, options); + break; + + default: + rb_raise(rb_eArgError, "wrong exec option"); + } + + return ST_CONTINUE; +} + +static int +check_exec_options_i(st_data_t st_key, st_data_t st_val, st_data_t arg) +{ + VALUE key = (VALUE)st_key; + VALUE val = (VALUE)st_val; + struct rb_exec_arg *e = (struct rb_exec_arg *)arg; + return rb_exec_arg_addopt(e, key, val); +} + +static VALUE +check_exec_fds(VALUE options) +{ + VALUE h = rb_hash_new(); + VALUE ary; + int index, i; + int maxhint = -1; + + for (index = EXEC_OPTION_DUP2; index <= EXEC_OPTION_OPEN; index++) { + ary = rb_ary_entry(options, index); + if (NIL_P(ary)) + continue; + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE elt = RARRAY_PTR(ary)[i]; + int fd = FIX2INT(RARRAY_PTR(elt)[0]); + if (RTEST(rb_hash_lookup(h, INT2FIX(fd)))) { + rb_raise(rb_eArgError, "fd %d specified twice", fd); + } + rb_hash_aset(h, INT2FIX(fd), Qtrue); + if (maxhint < fd) + maxhint = fd; + if (index == EXEC_OPTION_DUP2) { + fd = FIX2INT(RARRAY_PTR(elt)[1]); + if (maxhint < fd) + maxhint = fd; + } + } + } + if (rb_ary_entry(options, EXEC_OPTION_CLOSE_OTHERS) != Qfalse) { + rb_ary_store(options, EXEC_OPTION_CLOSE_OTHERS, INT2FIX(maxhint)); + } + return h; +} + +static void +rb_check_exec_options(VALUE opthash, struct rb_exec_arg *e) +{ + if (RHASH_EMPTY_P(opthash)) + return; + st_foreach(RHASH_TBL(opthash), check_exec_options_i, (st_data_t)e); +} + +static int +check_exec_env_i(st_data_t st_key, st_data_t st_val, st_data_t arg) +{ + VALUE key = (VALUE)st_key; + VALUE val = (VALUE)st_val; + VALUE env = (VALUE)arg; + char *k; + + k = StringValueCStr(key); + if (strchr(k, '=')) + rb_raise(rb_eArgError, "environment name contains a equal : %s", k); + + if (!NIL_P(val)) + StringValueCStr(val); + + rb_ary_push(env, hide_obj(rb_assoc_new(key, val))); + + return ST_CONTINUE; +} + +static VALUE +rb_check_exec_env(VALUE hash) +{ + VALUE env; + + env = hide_obj(rb_ary_new()); + st_foreach(RHASH_TBL(hash), check_exec_env_i, (st_data_t)env); + + return env; +} + +static VALUE +rb_check_argv(int argc, VALUE *argv) +{ + VALUE tmp, prog; + int i; + const char *name = 0; + + if (argc == 0) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + + prog = 0; + tmp = rb_check_array_type(argv[0]); + if (!NIL_P(tmp)) { + if (RARRAY_LEN(tmp) != 2) { + rb_raise(rb_eArgError, "wrong first argument"); + } + prog = RARRAY_PTR(tmp)[0]; + argv[0] = RARRAY_PTR(tmp)[1]; + SafeStringValue(prog); + StringValueCStr(prog); + prog = rb_str_new4(prog); + name = RSTRING_PTR(prog); + } + for (i = 0; i < argc; i++) { + SafeStringValue(argv[i]); + argv[i] = rb_str_new4(argv[i]); + StringValueCStr(argv[i]); + } + security(name ? name : RSTRING_PTR(argv[0])); + return prog; +} + +static VALUE +rb_exec_getargs(int *argc_p, VALUE **argv_p, int accept_shell, VALUE *env_ret, VALUE *opthash_ret, struct rb_exec_arg *e) +{ + VALUE hash, prog; + + if (0 < *argc_p) { + hash = rb_check_convert_type((*argv_p)[*argc_p-1], T_HASH, "Hash", "to_hash"); + if (!NIL_P(hash)) { + *opthash_ret = hash; + (*argc_p)--; + } + } + + if (0 < *argc_p) { + hash = rb_check_convert_type((*argv_p)[0], T_HASH, "Hash", "to_hash"); + if (!NIL_P(hash)) { + *env_ret = hash; + (*argc_p)--; + (*argv_p)++; + } + } + prog = rb_check_argv(*argc_p, *argv_p); + if (!prog) { + prog = (*argv_p)[0]; + if (accept_shell && *argc_p == 1) { + *argc_p = 0; + *argv_p = 0; + } + } + return prog; +} + +static void +rb_exec_fillarg(VALUE prog, int argc, VALUE *argv, VALUE env, VALUE opthash, struct rb_exec_arg *e) +{ + VALUE options; + MEMZERO(e, struct rb_exec_arg, 1); + options = hide_obj(rb_ary_new()); + e->options = options; + + if (!NIL_P(opthash)) { + rb_check_exec_options(opthash, e); + } + if (!NIL_P(env)) { + env = rb_check_exec_env(env); + rb_ary_store(options, EXEC_OPTION_ENV, env); + } + + e->argc = argc; + e->argv = argv; + e->prog = prog ? RSTRING_PTR(prog) : 0; +} + +VALUE +rb_exec_arg_init(int argc, VALUE *argv, int accept_shell, struct rb_exec_arg *e) +{ + VALUE prog; + VALUE env = Qnil, opthash = Qnil; + prog = rb_exec_getargs(&argc, &argv, accept_shell, &env, &opthash, e); + rb_exec_fillarg(prog, argc, argv, env, opthash, e); + return prog; +} + +void +rb_exec_arg_fixup(struct rb_exec_arg *e) +{ + e->redirect_fds = check_exec_fds(e->options); +} + +/* + * call-seq: + * exec([env,] command [, arg, ...] [,options]) + * + * Replaces the current process by running the given external _command_. + * If optional arguments, sequence of +arg+, are not given, that argument is + * taken as a line that is subject to shell expansion before being + * executed. If one or more +arg+ given, they + * are passed as parameters to _command_ with no shell + * expansion. If +command+ is a two-element array, the first + * element is the command to be executed, and the second argument is + * used as the argv[0] value, which may show up in process + * listings. In order to execute the command, one of the exec(2) + * system calls is used, so the running command may inherit some of the environment + * of the original program (including open file descriptors). + * + * The hash arguments, env and options, are same as + * system and spawn. + * See spawn for details. + * + * Raises SystemCallError if the _command_ couldn't execute (typically + * Errno::ENOENT when it was not found). + * + * exec "echo *" # echoes list of files in current directory + * # never get here + * + * + * exec "echo", "*" # echoes an asterisk + * # never get here + */ + +VALUE +rb_f_exec(int argc, VALUE *argv) +{ + struct rb_exec_arg earg; + + rb_exec_arg_init(argc, argv, Qtrue, &earg); + if (NIL_P(rb_ary_entry(earg.options, EXEC_OPTION_CLOSE_OTHERS))) + rb_exec_arg_addopt(&earg, ID2SYM(rb_intern("close_others")), Qfalse); + rb_exec_arg_fixup(&earg); + + rb_exec(&earg); + rb_sys_fail(earg.prog); + return Qnil; /* dummy */ +} + +/*#define DEBUG_REDIRECT*/ +#if defined(DEBUG_REDIRECT) + +#include + +static void +ttyprintf(const char *fmt, ...) +{ + va_list ap; + FILE *tty; + int save = errno; + tty = fopen("/dev/tty", "w"); + if (!tty) + return; + + va_start(ap, fmt); + vfprintf(tty, fmt, ap); + va_end(ap); + fclose(tty); + errno = save; +} + +static int +redirect_dup(int oldfd) +{ + int ret; + ret = dup(oldfd); + ttyprintf("dup(%d) => %d\n", oldfd, ret); + return ret; +} + +static int +redirect_dup2(int oldfd, int newfd) +{ + int ret; + ret = dup2(oldfd, newfd); + ttyprintf("dup2(%d, %d)\n", oldfd, newfd); + return ret; +} + +static int +redirect_close(int fd) +{ + int ret; + ret = close(fd); + ttyprintf("close(%d)\n", fd); + return ret; +} + +static int +redirect_open(const char *pathname, int flags, mode_t perm) +{ + int ret; + ret = open(pathname, flags, perm); + ttyprintf("open(\"%s\", 0x%x, 0%o) => %d\n", pathname, flags, perm, ret); + return ret; +} + +#else +#define redirect_dup(oldfd) dup(oldfd) +#define redirect_dup2(oldfd, newfd) dup2(oldfd, newfd) +#define redirect_close(fd) close(fd) +#define redirect_open(pathname, flags, perm) open(pathname, flags, perm) +#endif + +static int +save_redirect_fd(int fd, VALUE save) +{ + if (!NIL_P(save)) { + VALUE newary; + int save_fd = redirect_dup(fd); + if (save_fd == -1) return -1; + newary = rb_ary_entry(save, EXEC_OPTION_DUP2); + if (NIL_P(newary)) { + newary = hide_obj(rb_ary_new()); + rb_ary_store(save, EXEC_OPTION_DUP2, newary); + } + rb_ary_push(newary, + hide_obj(rb_assoc_new(INT2FIX(fd), INT2FIX(save_fd)))); + + newary = rb_ary_entry(save, EXEC_OPTION_CLOSE); + if (NIL_P(newary)) { + newary = hide_obj(rb_ary_new()); + rb_ary_store(save, EXEC_OPTION_CLOSE, newary); + } + rb_ary_push(newary, hide_obj(rb_assoc_new(INT2FIX(save_fd), Qnil))); + } + + return 0; +} + +static VALUE +save_env_i(VALUE i, VALUE ary, int argc, VALUE *argv) +{ + rb_ary_push(ary, hide_obj(rb_ary_dup(argv[0]))); + return Qnil; +} + +static void +save_env(VALUE save) +{ + if (!NIL_P(save) && NIL_P(rb_ary_entry(save, EXEC_OPTION_ENV))) { + VALUE env = rb_const_get(rb_cObject, rb_intern("ENV")); + if (RTEST(env)) { + VALUE ary = hide_obj(rb_ary_new()); + rb_block_call(env, rb_intern("each"), 0, 0, save_env_i, + (VALUE)ary); + rb_ary_store(save, EXEC_OPTION_ENV, ary); + } + rb_ary_store(save, EXEC_OPTION_UNSETENV_OTHERS, Qtrue); + } +} + +static int +intcmp(const void *a, const void *b) +{ + return *(int*)a - *(int*)b; +} + +static int +run_exec_dup2(VALUE ary, VALUE save) +{ + int n, i; + int ret; + int extra_fd = -1; + struct fd_pair { + int oldfd; + int newfd; + int older_index; + int num_newer; + } *pairs = 0; + + n = RARRAY_LEN(ary); + pairs = ALLOC_N(struct fd_pair, n); + + /* initialize oldfd and newfd: O(n) */ + for (i = 0; i < n; i++) { + VALUE elt = RARRAY_PTR(ary)[i]; + pairs[i].oldfd = FIX2INT(RARRAY_PTR(elt)[1]); + pairs[i].newfd = FIX2INT(RARRAY_PTR(elt)[0]); /* unique */ + pairs[i].older_index = -1; + } + + /* sort the table by oldfd: O(n log n) */ + qsort(pairs, n, sizeof(struct fd_pair), intcmp); + + /* initialize older_index and num_newer: O(n log n) */ + for (i = 0; i < n; i++) { + int newfd = pairs[i].newfd; + struct fd_pair key, *found; + key.oldfd = newfd; + found = bsearch(&key, pairs, n, sizeof(struct fd_pair), intcmp); + pairs[i].num_newer = 0; + if (found) { + while (pairs < found && (found-1)->oldfd == newfd) + found--; + while (found < pairs+n && found->oldfd == newfd) { + pairs[i].num_newer++; + found->older_index = i; + found++; + } + } + } + + /* non-cyclic redirection: O(n) */ + for (i = 0; i < n; i++) { + int j = i; + while (j != -1 && pairs[j].oldfd != -1 && pairs[j].num_newer == 0) { + if (save_redirect_fd(pairs[j].newfd, save) < 0) + goto fail; + ret = redirect_dup2(pairs[j].oldfd, pairs[j].newfd); + if (ret == -1) + goto fail; + pairs[j].oldfd = -1; + j = pairs[j].older_index; + if (j != -1) + pairs[j].num_newer--; + } + } + + /* cyclic redirection: O(n) */ + for (i = 0; i < n; i++) { + int j; + if (pairs[i].oldfd == -1) + continue; + if (pairs[i].oldfd == pairs[i].newfd) { /* self cycle */ +#ifdef F_GETFD + int fd = pairs[i].oldfd; + ret = fcntl(fd, F_GETFD); + if (ret == -1) + goto fail; + if (ret & FD_CLOEXEC) { + ret &= ~FD_CLOEXEC; + ret = fcntl(fd, F_SETFD, ret); + if (ret == -1) + goto fail; + } +#endif + pairs[i].oldfd = -1; + continue; + } + if (extra_fd == -1) { + extra_fd = redirect_dup(pairs[i].oldfd); + if (extra_fd == -1) + goto fail; + } + else { + ret = redirect_dup2(pairs[i].oldfd, extra_fd); + if (ret == -1) + goto fail; + } + pairs[i].oldfd = extra_fd; + j = pairs[i].older_index; + pairs[i].older_index = -1; + while (j != -1) { + ret = redirect_dup2(pairs[j].oldfd, pairs[j].newfd); + if (ret == -1) + goto fail; + pairs[j].oldfd = -1; + j = pairs[j].older_index; + } + } + if (extra_fd != -1) { + ret = redirect_close(extra_fd); + if (ret == -1) + goto fail; + } + + xfree(pairs); + return 0; + + fail: + xfree(pairs); + return -1; +} + +static int +run_exec_close(VALUE ary) +{ + int i, ret; + + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE elt = RARRAY_PTR(ary)[i]; + int fd = FIX2INT(RARRAY_PTR(elt)[0]); + ret = redirect_close(fd); + if (ret == -1) + return -1; + } + return 0; +} + +static int +run_exec_open(VALUE ary, VALUE save) +{ + int i, ret; + + for (i = 0; i < RARRAY_LEN(ary);) { + VALUE elt = RARRAY_PTR(ary)[i]; + int fd = FIX2INT(RARRAY_PTR(elt)[0]); + VALUE param = RARRAY_PTR(elt)[1]; + char *path = RSTRING_PTR(RARRAY_PTR(param)[0]); + int flags = NUM2INT(RARRAY_PTR(param)[1]); + int perm = NUM2INT(RARRAY_PTR(param)[2]); + int need_close = 1; + int fd2 = redirect_open(path, flags, perm); + if (fd2 == -1) return -1; + while (i < RARRAY_LEN(ary) && + (elt = RARRAY_PTR(ary)[i], RARRAY_PTR(elt)[1] == param)) { + fd = FIX2INT(RARRAY_PTR(elt)[0]); + if (fd == fd2) { + need_close = 0; + } + else { + if (save_redirect_fd(fd, save) < 0) + return -1; + ret = redirect_dup2(fd2, fd); + if (ret == -1) return -1; + } + i++; + } + if (need_close) { + ret = redirect_close(fd2); + if (ret == -1) return -1; + } + } + return 0; +} + +#ifdef HAVE_SETPGID +static int +run_exec_pgroup(VALUE obj, VALUE save) +{ + /* + * If FD_CLOEXEC is available, rb_fork waits the child's execve. + * So setpgid is done in the child when rb_fork is returned in the parent. + * No race condition, even without setpgid from the parent. + * (Is there an environment which has setpgid but FD_CLOEXEC?) + */ + pid_t pgroup; + if (!NIL_P(save)) { + /* maybe meaningless with no fork environment... */ + rb_ary_store(save, EXEC_OPTION_PGROUP, PIDT2NUM(getpgrp())); + } + pgroup = NUM2PIDT(obj); + if (pgroup == 0) { + pgroup = getpid(); + } + return setpgid(getpid(), pgroup); +} +#endif + +#ifdef RLIM2NUM +static int +run_exec_rlimit(VALUE ary, VALUE save) +{ + int i; + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE elt = RARRAY_PTR(ary)[i]; + int rtype = NUM2INT(RARRAY_PTR(elt)[0]); + struct rlimit rlim; + if (!NIL_P(save)) { + VALUE tmp, newary; + if (getrlimit(rtype, &rlim) == -1) + return -1; + tmp = hide_obj(rb_ary_new3(3, RARRAY_PTR(elt)[0], + RLIM2NUM(rlim.rlim_cur), + RLIM2NUM(rlim.rlim_max))); + newary = rb_ary_entry(save, EXEC_OPTION_RLIMIT); + if (NIL_P(newary)) { + newary = hide_obj(rb_ary_new()); + rb_ary_store(save, EXEC_OPTION_RLIMIT, newary); + } + rb_ary_push(newary, tmp); + } + rlim.rlim_cur = NUM2RLIM(RARRAY_PTR(elt)[1]); + rlim.rlim_max = NUM2RLIM(RARRAY_PTR(elt)[2]); + if (setrlimit(rtype, &rlim) == -1) + return -1; + } + return 0; +} +#endif + +int +rb_run_exec_options(const struct rb_exec_arg *e, struct rb_exec_arg *s) +{ + VALUE options = e->options; + VALUE soptions = Qnil; + VALUE obj; + + if (!RTEST(options)) + return 0; + + if (s) { + s->argc = 0; + s->argv = NULL; + s->prog = NULL; + s->options = soptions = hide_obj(rb_ary_new()); + s->redirect_fds = Qnil; + } + +#ifdef HAVE_SETPGID + obj = rb_ary_entry(options, EXEC_OPTION_PGROUP); + if (RTEST(obj)) { + if (run_exec_pgroup(obj, soptions) == -1) + return -1; + } +#endif + +#ifdef RLIM2NUM + obj = rb_ary_entry(options, EXEC_OPTION_RLIMIT); + if (!NIL_P(obj)) { + if (run_exec_rlimit(obj, soptions) == -1) + return -1; + } +#endif + + obj = rb_ary_entry(options, EXEC_OPTION_UNSETENV_OTHERS); + if (RTEST(obj)) { + save_env(soptions); + rb_env_clear(); + } + + obj = rb_ary_entry(options, EXEC_OPTION_ENV); + if (!NIL_P(obj)) { + int i; + save_env(soptions); + for (i = 0; i < RARRAY_LEN(obj); i++) { + VALUE pair = RARRAY_PTR(obj)[i]; + VALUE key = RARRAY_PTR(pair)[0]; + VALUE val = RARRAY_PTR(pair)[1]; + if (NIL_P(val)) + ruby_setenv(StringValueCStr(key), 0); + else + ruby_setenv(StringValueCStr(key), StringValueCStr(val)); + } + } + + obj = rb_ary_entry(options, EXEC_OPTION_CHDIR); + if (!NIL_P(obj)) { + if (!NIL_P(soptions)) { + char *cwd = my_getcwd(); + rb_ary_store(soptions, EXEC_OPTION_CHDIR, + hide_obj(rb_str_new2(cwd))); + xfree(cwd); + } + if (chdir(RSTRING_PTR(obj)) == -1) + return -1; + } + + obj = rb_ary_entry(options, EXEC_OPTION_UMASK); + if (!NIL_P(obj)) { + mode_t mask = NUM2LONG(obj); + mode_t oldmask = umask(mask); /* never fail */ + if (!NIL_P(soptions)) + rb_ary_store(soptions, EXEC_OPTION_UMASK, LONG2NUM(oldmask)); + } + + obj = rb_ary_entry(options, EXEC_OPTION_DUP2); + if (!NIL_P(obj)) { + if (run_exec_dup2(obj, soptions) == -1) + return -1; + } + + obj = rb_ary_entry(options, EXEC_OPTION_CLOSE); + if (!NIL_P(obj)) { + if (!NIL_P(soptions)) + rb_warn("cannot close fd before spawn"); + else { + if (run_exec_close(obj) == -1) + return -1; + } + } + +#ifdef HAVE_FORK + obj = rb_ary_entry(options, EXEC_OPTION_CLOSE_OTHERS); + if (obj != Qfalse) { + rb_close_before_exec(3, FIX2LONG(obj), e->redirect_fds); + } +#endif + + obj = rb_ary_entry(options, EXEC_OPTION_OPEN); + if (!NIL_P(obj)) { + if (run_exec_open(obj, soptions) == -1) + return -1; + } + + return 0; +} + +int +rb_exec(const struct rb_exec_arg *e) +{ + int argc = e->argc; + VALUE *argv = e->argv; + const char *prog = e->prog; + + if (rb_run_exec_options(e, NULL) < 0) { + return -1; + } + + if (argc == 0) { + rb_proc_exec(prog); + } + else { + rb_proc_exec_n(argc, argv, prog); + } +#ifndef FD_CLOEXEC + preserving_errno({ + fprintf(stderr, "%s:%d: command not found: %s\n", + rb_sourcefile(), rb_sourceline(), prog); + }); +#endif + return -1; +} + +#ifdef HAVE_FORK +static int +rb_exec_atfork(void* arg) +{ + rb_thread_atfork_before_exec(); + return rb_exec(arg); +} +#endif + +#ifdef HAVE_FORK +#ifdef FD_CLOEXEC +#if SIZEOF_INT == SIZEOF_LONG +#define proc_syswait (VALUE (*)(VALUE))rb_syswait +#else +static VALUE +proc_syswait(VALUE pid) +{ + rb_syswait((int)pid); + return Qnil; +} +#endif +#endif + +static int +move_fds_to_avoid_crash(int *fdp, int n, VALUE fds) +{ + long min = 0; + int i; + for (i = 0; i < n; i++) { + int ret; + while (RTEST(rb_hash_lookup(fds, INT2FIX(fdp[i])))) { + if (min <= fdp[i]) + min = fdp[i]+1; + while (RTEST(rb_hash_lookup(fds, INT2FIX(min)))) + min++; + ret = fcntl(fdp[i], F_DUPFD, min); + if (ret == -1) + return -1; + close(fdp[i]); + fdp[i] = ret; + } + } + return 0; +} + +static int +pipe_nocrash(int filedes[2], VALUE fds) +{ + int ret; + ret = rb_pipe(filedes); + if (ret == -1) + return -1; + if (RTEST(fds)) { + int save = errno; + if (move_fds_to_avoid_crash(filedes, 2, fds) == -1) { + close(filedes[0]); + close(filedes[1]); + return -1; + } + errno = save; + } + return ret; +} + +/* + * Forks child process, and returns the process ID in the parent + * process. + * + * If +status+ is given, protects from any exceptions and sets the + * jump status to it. + * + * In the child process, just returns 0 if +chfunc+ is +NULL+. + * Otherwise +chfunc+ will be called with +charg+, and then the child + * process exits with +EXIT_SUCCESS+ when it returned zero. + * + * In the case of the function is called and returns non-zero value, + * the child process exits with non-+EXIT_SUCCESS+ value (normally + * 127). And, on the platforms where +FD_CLOEXEC+ is available, + * +errno+ is propagated to the parent process, and this function + * returns -1 in the parent process. On the other platforms, just + * returns pid. + * + * If fds is not Qnil, internal pipe for the errno propagation is + * arranged to avoid conflicts of the hash keys in +fds+. + * + * +chfunc+ must not raise any exceptions. + */ +rb_pid_t +rb_fork(int *status, int (*chfunc)(void*), void *charg, VALUE fds) +{ + rb_pid_t pid; + int err, state = 0; +#ifdef FD_CLOEXEC + int ep[2]; +#endif + +#define prefork() ( \ + rb_io_flush(rb_stdout), \ + rb_io_flush(rb_stderr) \ + ) + prefork(); + +#ifdef FD_CLOEXEC + if (chfunc) { + if (pipe_nocrash(ep, fds)) return -1; + if (fcntl(ep[1], F_SETFD, FD_CLOEXEC)) { + preserving_errno((close(ep[0]), close(ep[1]))); + return -1; + } + } +#endif + for (; before_fork(), (pid = fork()) < 0; prefork()) { + after_fork(); + switch (errno) { + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + if (!status && !chfunc) { + rb_thread_sleep(1); + continue; + } + else { + rb_protect((VALUE (*)())rb_thread_sleep, 1, &state); + if (status) *status = state; + if (!state) continue; + } + default: +#ifdef FD_CLOEXEC + if (chfunc) { + preserving_errno((close(ep[0]), close(ep[1]))); + } +#endif + if (state && !status) rb_jump_tag(state); + return -1; + } + } + if (!pid) { + forked_child = 1; + if (chfunc) { +#ifdef FD_CLOEXEC + close(ep[0]); +#endif + if (!(*chfunc)(charg)) _exit(EXIT_SUCCESS); +#ifdef FD_CLOEXEC + err = errno; + write(ep[1], &err, sizeof(err)); +#endif +#if EXIT_SUCCESS == 127 + _exit(EXIT_FAILURE); +#else + _exit(127); +#endif + } + } + after_fork(); +#ifdef FD_CLOEXEC + if (pid && chfunc) { + close(ep[1]); + if ((state = read(ep[0], &err, sizeof(err))) < 0) { + err = errno; + } + close(ep[0]); + if (state) { + if (status) { + rb_protect(proc_syswait, (VALUE)pid, status); + } + else { + rb_syswait(pid); + } + errno = err; + return -1; + } + } +#endif + return pid; +} +#endif + +/* + * call-seq: + * Kernel.fork [{ block }] => fixnum or nil + * Process.fork [{ block }] => fixnum or nil + * + * Creates a subprocess. If a block is specified, that block is run + * in the subprocess, and the subprocess terminates with a status of + * zero. Otherwise, the +fork+ call returns twice, once in + * the parent, returning the process ID of the child, and once in + * the child, returning _nil_. The child process can exit using + * Kernel.exit! to avoid running any + * at_exit functions. The parent process should + * use Process.wait to collect the termination statuses + * of its children or use Process.detach to register + * disinterest in their status; otherwise, the operating system + * may accumulate zombie processes. + * + * The thread calling fork is the only thread in the created child process. + * fork doesn't copy other threads. + */ + +static VALUE +rb_f_fork(VALUE obj) +{ +#if defined(HAVE_FORK) && !defined(CANNOT_FORK_WITH_PTHREAD) + rb_pid_t pid; + + rb_secure(2); + + switch (pid = rb_fork(0, 0, 0, Qnil)) { + case 0: +#ifdef linux + after_exec(); +#endif + rb_thread_atfork(); + if (rb_block_given_p()) { + int status; + + rb_protect(rb_yield, Qundef, &status); + ruby_stop(status); + } + return Qnil; + + case -1: + rb_sys_fail("fork(2)"); + return Qnil; + + default: + return PIDT2NUM(pid); + } +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.exit!(fixnum=-1) + * + * Exits the process immediately. No exit handlers are + * run. fixnum is returned to the underlying system as the + * exit status. + * + * Process.exit!(0) + */ + +static VALUE +rb_f_exit_bang(int argc, VALUE *argv, VALUE obj) +{ + VALUE status; + int istatus; + + rb_secure(4); + if (argc > 0 && rb_scan_args(argc, argv, "01", &status) == 1) { + switch (status) { + case Qtrue: + istatus = EXIT_SUCCESS; + break; + case Qfalse: + istatus = EXIT_FAILURE; + break; + default: + istatus = NUM2INT(status); + break; + } + } + else { + istatus = EXIT_FAILURE; + } + _exit(istatus); + + return Qnil; /* not reached */ +} + +void +rb_exit(int status) +{ + if (GET_THREAD()->tag) { + VALUE args[2]; + + args[0] = INT2NUM(status); + args[1] = rb_str_new2("exit"); + rb_exc_raise(rb_class_new_instance(2, args, rb_eSystemExit)); + } + ruby_finalize(); + exit(status); +} + + +/* + * call-seq: + * exit(integer=0) + * Kernel::exit(integer=0) + * Process::exit(integer=0) + * + * Initiates the termination of the Ruby script by raising the + * SystemExit exception. This exception may be caught. The + * optional parameter is used to return a status code to the invoking + * environment. + * + * begin + * exit + * puts "never get here" + * rescue SystemExit + * puts "rescued a SystemExit exception" + * end + * puts "after begin block" + * + * produces: + * + * rescued a SystemExit exception + * after begin block + * + * Just prior to termination, Ruby executes any at_exit functions + * (see Kernel::at_exit) and runs any object finalizers (see + * ObjectSpace::define_finalizer). + * + * at_exit { puts "at_exit function" } + * ObjectSpace.define_finalizer("string", proc { puts "in finalizer" }) + * exit + * + * produces: + * + * at_exit function + * in finalizer + */ + +VALUE +rb_f_exit(int argc, VALUE *argv) +{ + VALUE status; + int istatus; + + rb_secure(4); + if (argc > 0 && rb_scan_args(argc, argv, "01", &status) == 1) { + switch (status) { + case Qtrue: + istatus = EXIT_SUCCESS; + break; + case Qfalse: + istatus = EXIT_FAILURE; + break; + default: + istatus = NUM2INT(status); +#if EXIT_SUCCESS != 0 + if (istatus == 0) + istatus = EXIT_SUCCESS; +#endif + break; + } + } + else { + istatus = EXIT_SUCCESS; + } + rb_exit(istatus); + return Qnil; /* not reached */ +} + + +/* + * call-seq: + * abort + * Kernel::abort + * Process::abort + * + * Terminate execution immediately, effectively by calling + * Kernel.exit(1). If _msg_ is given, it is written + * to STDERR prior to terminating. + */ + +VALUE +rb_f_abort(int argc, VALUE *argv) +{ + extern void ruby_error_print(void); + + rb_secure(4); + if (argc == 0) { + if (!NIL_P(GET_THREAD()->errinfo)) { + ruby_error_print(); + } + rb_exit(EXIT_FAILURE); + } + else { + VALUE args[2]; + + rb_scan_args(argc, argv, "1", &args[1]); + StringValue(argv[0]); + rb_io_puts(argc, argv, rb_stderr); + args[0] = INT2NUM(EXIT_FAILURE); + rb_exc_raise(rb_class_new_instance(2, args, rb_eSystemExit)); + } + return Qnil; /* not reached */ +} + + +#if defined(POSIX_SIGNAL) +# define signal(a,b) posix_signal(a,b) +#endif + +void +rb_syswait(rb_pid_t pid) +{ + static int overriding; +#ifdef SIGHUP + RETSIGTYPE (*hfunc)(int) = 0; +#endif +#ifdef SIGQUIT + RETSIGTYPE (*qfunc)(int) = 0; +#endif + RETSIGTYPE (*ifunc)(int) = 0; + int status; + int i, hooked = Qfalse; + + if (!overriding) { +#ifdef SIGHUP + hfunc = signal(SIGHUP, SIG_IGN); +#endif +#ifdef SIGQUIT + qfunc = signal(SIGQUIT, SIG_IGN); +#endif + ifunc = signal(SIGINT, SIG_IGN); + overriding = Qtrue; + hooked = Qtrue; + } + + do { + i = rb_waitpid(pid, &status, 0); + } while (i == -1 && errno == EINTR); + + if (hooked) { +#ifdef SIGHUP + signal(SIGHUP, hfunc); +#endif +#ifdef SIGQUIT + signal(SIGQUIT, qfunc); +#endif + signal(SIGINT, ifunc); + overriding = Qfalse; + } +} + +static rb_pid_t +rb_spawn_internal(int argc, VALUE *argv, int default_close_others) +{ + rb_pid_t status; + VALUE prog; + struct rb_exec_arg earg; +#if !defined HAVE_FORK + struct rb_exec_arg sarg; +#endif + + prog = rb_exec_arg_init(argc, argv, Qtrue, &earg); + if (NIL_P(rb_ary_entry(earg.options, EXEC_OPTION_CLOSE_OTHERS))) { + VALUE v = default_close_others ? Qtrue : Qfalse; + rb_exec_arg_addopt(&earg, ID2SYM(rb_intern("close_others")), v); + } + rb_exec_arg_fixup(&earg); + +#if defined HAVE_FORK + status = rb_fork(&status, rb_exec_atfork, &earg, earg.redirect_fds); + if (prog && earg.argc) earg.argv[0] = prog; +#else + if (rb_run_exec_options(&earg, &sarg) < 0) { + return -1; + } + + argc = earg.argc; + argv = earg.argv; + if (prog && argc) argv[0] = prog; +# if defined HAVE_SPAWNV + if (!argc) { + status = proc_spawn(RSTRING_PTR(prog)); + } + else { + status = proc_spawn_n(argc, argv, prog); + } +# if defined(_WIN32) + if (status == -1) + rb_last_status_set(0x7f << 8, 0); +# endif +# else + if (argc) prog = rb_ary_join(rb_ary_new4(argc, argv), rb_str_new2(" ")); + status = system(StringValuePtr(prog)); + rb_last_status_set((status & 0xff) << 8, 0); +# endif + + rb_run_exec_options(&sarg, NULL); +#endif + return status; +} + +rb_pid_t +rb_spawn(int argc, VALUE *argv) +{ + return rb_spawn_internal(argc, argv, Qtrue); +} + +/* + * call-seq: + * system([env,] cmd [, arg, ...] [,options]) => true, false or nil + * + * Executes _cmd_ in a subshell, returning +true+ if the command + * gives zero exit status, +false+ for non zero exit status. Returns + * +nil+ if command execution fails. An error status is available in + * $?. The arguments are processed in the same way as + * for Kernel::exec. + * + * The hash arguments, env and options, are same as + * exec and spawn. + * See spawn for details. + * + * system("echo *") + * system("echo", "*") + * + * produces: + * + * config.h main.rb + * * + */ + +static VALUE +rb_f_system(int argc, VALUE *argv) +{ + int status; + +#if defined(SIGCLD) && !defined(SIGCHLD) +# define SIGCHLD SIGCLD +#endif + +#ifdef SIGCHLD + RETSIGTYPE (*chfunc)(int); + + chfunc = signal(SIGCHLD, SIG_DFL); +#endif + status = rb_spawn_internal(argc, argv, Qfalse); +#if defined(HAVE_FORK) || defined(HAVE_SPAWNV) + if (status > 0) { + rb_syswait(status); + } +#endif +#ifdef SIGCHLD + signal(SIGCHLD, chfunc); +#endif + if (status < 0) { + return Qnil; + } + status = PST2INT(rb_last_status_get()); + if (status == EXIT_SUCCESS) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * spawn([env,] cmd [, arg, ...] [,options]) => pid + * + * Similar to Kernel::system except for not waiting for + * end of _cmd_, but returns its pid. + * + * If a hash is given as +env+, the environment is + * updated by +env+ before exec(2) in the child process. + * If a pair in +env+ has nil as the value, the variable is deleted. + * + * # set FOO as BAR and unset BAZ. + * pid = spawn({"FOO"=>"BAR", "BAZ"=>nil}, command) + * + * If a hash is given as +options+, + * it specifies + * process group, + * resource limit, + * current directory, + * umask and + * redirects for the child process. + * Also, it can be specified to clear environment variables. + * + * The :unsetenv_others key in +options+ specifies + * to clear environment variables, other than specified by +env+. + * + * pid = spawn(command, :unsetenv_others=>true) # no environment variable + * pid = spawn({"FOO"=>"BAR"}, command, :unsetenv_others=>true) # FOO only + * + * The :pgroup key in +options+ specifies a process group. + * The corresponding value should be true, zero or positive integer. + * true and zero means the process should be a process leader. + * Other values specifies a process group to be belongs. + * + * pid = spawn(command, :pgroup=>true) # process leader + * pid = spawn(command, :pgroup=>10) # belongs to the process group 10 + * + * The :rlimit_foo key specifies a resource limit. + * foo should be one of resource types such as core + * The corresponding value should be an integer or an array which have one or + * two integers: same as cur_limit and max_limit arguments for + * Process.setrlimit. + * + * pid = spawn(command, :rlimit_core=>0) # never dump core. + * cur, max = Process.getrlimit(:CORE) + * pid = spawn(command, :rlimit_core=>[0,max]) # disable core temporary. + * pid = spawn(command, :rlimit_core=>max) # enable core dump + * + * The :chdir key in +options+ specifies the current directory. + * + * pid = spawn(command, :chdir=>"/var/tmp") + * + * The :umask key in +options+ specifies the umask. + * + * pid = spawn(command, :umask=>077) + * + * The :in, :out, :err, a fixnum, an IO and an array key specifies a redirect. + * The redirection maps a file descriptor in the child process. + * + * For example, stderr can be merged into stdout: + * + * pid = spawn(command, :err=>:out) + * pid = spawn(command, STDERR=>STDOUT) + * pid = spawn(command, 2=>1) + * + * The hash keys specifies a file descriptor + * in the child process started by spawn. + * :err, STDERR and 2 specifies the standard error stream. + * + * The hash values specifies a file descriptor + * in the parent process which invokes spawn. + * :out, STDOUT and 1 specifies the standard output stream. + * + * The standard output in the child process is not specified. + * So it is inherited from the parent process. + * + * The standard input stream can be specifed by :in, STDIN and 0. + * + * A filename can be specified as a hash value. + * + * pid = spawn(command, STDIN=>"/dev/null") # read mode + * pid = spawn(command, STDOUT=>"/dev/null") # write mode + * pid = spawn(command, STDERR=>"log") # write mode + * pid = spawn(command, 3=>"/dev/null") # read mode + * + * For standard output and standard error, + * it is opened in write mode. + * Otherwise read mode is used. + * + * For specifying flags and permission of file creation explicitly, + * an array is used instead. + * + * pid = spawn(command, STDIN=>["file"]) # read mode is assumed + * pid = spawn(command, STDIN=>["file", "r"]) + * pid = spawn(command, STDOUT=>["log", "w"]) # 0644 assumed + * pid = spawn(command, STDOUT=>["log", "w", 0600]) + * pid = spawn(command, STDOUT=>["log", File::WRONLY|File::EXCL|File::CREAT, 0600]) + * + * The array specifies a filename, flags and permission. + * The flags can be a string or an integer. + * If the flags is ommitted or nil, File::RDONLY is assumed. + * The permission should be an integer. + * If the permission is ommitted or nil, 0644 is assumed. + * + * If an array of IOs and integers are specified as a hash key, + * all the elemetns are redirected. + * + * # standard output and standard error is redirected to log file. + * pid = spawn(command, [STDOUT, STDERR]=>["log", "w"]) + * + * spawn closes all non-standard unspecified descriptors by default. + * The "standard" descriptors are 0, 1 and 2. + * This behavior is specified by :close_others option. + * :close_others doesn't affect the standard descriptors which are + * closed only if :close is specified explicitly. + * + * pid = spawn(command, :close_others=>true) # close 3,4,5,... (default) + * pid = spawn(command, :close_others=>false) # don't close 3,4,5,... + * + * :close_others is true by default for spawn and IO.popen. + * + * So IO.pipe and spawn can be used as IO.popen. + * + * # similar to r = IO.popen(command) + * r, w = IO.pipe + * pid = spawn(command, STDOUT=>w) # r, w is closed in the child process. + * w.close + * + * :close is specified as a hash value to close a fd individualy. + * + * f = open(foo) + * system(command, f=>:close) # don't inherit f. + * + * It is also possible to exchange file descriptors. + * + * pid = spawn(command, STDOUT=>STDERR, STDERR=>STDOUT) + * + * The hash keys specify file descriptors in the child process. + * The hash values specifies file descriptors in the parent process. + * So the above specifies exchanging STDOUT and STDERR. + * Internally, +spawn+ uses an extra file descriptor to resolve such cyclic + * file descriptor mapping. + * + */ + +static VALUE +rb_f_spawn(int argc, VALUE *argv) +{ + rb_pid_t pid; + + pid = rb_spawn(argc, argv); + if (pid == -1) rb_sys_fail(RSTRING_PTR(argv[0])); +#if defined(HAVE_FORK) || defined(HAVE_SPAWNV) + return PIDT2NUM(pid); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * sleep([duration]) => fixnum + * + * Suspends the current thread for _duration_ seconds (which may be any number, + * including a +Float+ with fractional seconds). Returns the actual number of + * seconds slept (rounded), which may be less than that asked for if another + * thread calls Thread#run. Zero arguments causes +sleep+ to sleep + * forever. + * + * Time.new #=> 2008-03-08 19:56:19 +0900 + * sleep 1.2 #=> 1 + * Time.new #=> 2008-03-08 19:56:20 +0900 + * sleep 1.9 #=> 2 + * Time.new #=> 2008-03-08 19:56:22 +0900 + */ + +static VALUE +rb_f_sleep(int argc, VALUE *argv) +{ + time_t beg, end; + + beg = time(0); + if (argc == 0) { + rb_thread_sleep_forever(); + } + else if (argc == 1) { + rb_thread_wait_for(rb_time_interval(argv[0])); + } + else { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + + end = time(0) - beg; + + return INT2FIX(end); +} + + +/* + * call-seq: + * Process.getpgrp => integer + * + * Returns the process group ID for this process. Not available on + * all platforms. + * + * Process.getpgid(0) #=> 25527 + * Process.getpgrp #=> 25527 + */ + +static VALUE +proc_getpgrp(void) +{ +#if defined(HAVE_GETPGRP) && defined(GETPGRP_VOID) || defined(HAVE_GETPGID) + rb_pid_t pgrp; +#endif + + rb_secure(2); +#if defined(HAVE_GETPGRP) && defined(GETPGRP_VOID) + pgrp = getpgrp(); + if (pgrp < 0) rb_sys_fail(0); + return PIDT2NUM(pgrp); +#else +# ifdef HAVE_GETPGID + pgrp = getpgid(0); + if (pgrp < 0) rb_sys_fail(0); + return PIDT2NUM(pgrp); +# else + rb_notimplement(); +# endif +#endif +} + + +/* + * call-seq: + * Process.setpgrp => 0 + * + * Equivalent to setpgid(0,0). Not available on all + * platforms. + */ + +static VALUE +proc_setpgrp(void) +{ + rb_secure(2); + /* check for posix setpgid() first; this matches the posix */ + /* getpgrp() above. It appears that configure will set SETPGRP_VOID */ + /* even though setpgrp(0,0) would be preferred. The posix call avoids */ + /* this confusion. */ +#ifdef HAVE_SETPGID + if (setpgid(0,0) < 0) rb_sys_fail(0); +#elif defined(HAVE_SETPGRP) && defined(SETPGRP_VOID) + if (setpgrp() < 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return INT2FIX(0); +} + + +/* + * call-seq: + * Process.getpgid(pid) => integer + * + * Returns the process group ID for the given process id. Not + * available on all platforms. + * + * Process.getpgid(Process.ppid()) #=> 25527 + */ + +static VALUE +proc_getpgid(VALUE obj, VALUE pid) +{ +#if defined(HAVE_GETPGID) && !defined(__CHECKER__) + rb_pid_t i; + + rb_secure(2); + i = getpgid(NUM2PIDT(pid)); + if (i < 0) rb_sys_fail(0); + return PIDT2NUM(i); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.setpgid(pid, integer) => 0 + * + * Sets the process group ID of _pid_ (0 indicates this + * process) to integer. Not available on all platforms. + */ + +static VALUE +proc_setpgid(VALUE obj, VALUE pid, VALUE pgrp) +{ +#ifdef HAVE_SETPGID + rb_pid_t ipid, ipgrp; + + rb_secure(2); + ipid = NUM2PIDT(pid); + ipgrp = NUM2PIDT(pgrp); + + if (setpgid(ipid, ipgrp) < 0) rb_sys_fail(0); + return INT2FIX(0); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.setsid => fixnum + * + * Establishes this process as a new session and process group + * leader, with no controlling tty. Returns the session id. Not + * available on all platforms. + * + * Process.setsid #=> 27422 + */ + +static VALUE +proc_setsid(void) +{ +#if defined(HAVE_SETSID) + rb_pid_t pid; + + rb_secure(2); + pid = setsid(); + if (pid < 0) rb_sys_fail(0); + return PIDT2NUM(pid); +#elif defined(HAVE_SETPGRP) && defined(TIOCNOTTY) + rb_pid_t pid; + int ret; + + rb_secure(2); + pid = getpid(); +#if defined(SETPGRP_VOID) + ret = setpgrp(); + /* If `pid_t setpgrp(void)' is equivalent to setsid(), + `ret' will be the same value as `pid', and following open() will fail. + In Linux, `int setpgrp(void)' is equivalent to setpgid(0, 0). */ +#else + ret = setpgrp(0, pid); +#endif + if (ret == -1) rb_sys_fail(0); + + if ((fd = open("/dev/tty", O_RDWR)) >= 0) { + ioctl(fd, TIOCNOTTY, NULL); + close(fd); + } + return PIDT2NUM(pid); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.getpriority(kind, integer) => fixnum + * + * Gets the scheduling priority for specified process, process group, + * or user. kind indicates the kind of entity to find: one + * of Process::PRIO_PGRP, + * Process::PRIO_USER, or + * Process::PRIO_PROCESS. _integer_ is an id + * indicating the particular process, process group, or user (an id + * of 0 means _current_). Lower priorities are more favorable + * for scheduling. Not available on all platforms. + * + * Process.getpriority(Process::PRIO_USER, 0) #=> 19 + * Process.getpriority(Process::PRIO_PROCESS, 0) #=> 19 + */ + +static VALUE +proc_getpriority(VALUE obj, VALUE which, VALUE who) +{ +#ifdef HAVE_GETPRIORITY + int prio, iwhich, iwho; + + rb_secure(2); + iwhich = NUM2INT(which); + iwho = NUM2INT(who); + + errno = 0; + prio = getpriority(iwhich, iwho); + if (errno) rb_sys_fail(0); + return INT2FIX(prio); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.setpriority(kind, integer, priority) => 0 + * + * See Process#getpriority. + * + * Process.setpriority(Process::PRIO_USER, 0, 19) #=> 0 + * Process.setpriority(Process::PRIO_PROCESS, 0, 19) #=> 0 + * Process.getpriority(Process::PRIO_USER, 0) #=> 19 + * Process.getpriority(Process::PRIO_PROCESS, 0) #=> 19 + */ + +static VALUE +proc_setpriority(VALUE obj, VALUE which, VALUE who, VALUE prio) +{ +#ifdef HAVE_GETPRIORITY + int iwhich, iwho, iprio; + + rb_secure(2); + iwhich = NUM2INT(which); + iwho = NUM2INT(who); + iprio = NUM2INT(prio); + + if (setpriority(iwhich, iwho, iprio) < 0) + rb_sys_fail(0); + return INT2FIX(0); +#else + rb_notimplement(); +#endif +} + +#if defined(RLIM2NUM) +static int +rlimit_resource_name2int(const char *name, int casetype) +{ + size_t len = strlen(name); + if (16 < len) return -1; + if (casetype == 1) { + int i; + char *name2 = ALLOCA_N(char, len+1); + for (i = 0; i < len; i++) { + if (!ISLOWER(name[i])) + return -1; + name2[i] = TOUPPER(name[i]); + } + name2[len] = '\0'; + name = name2; + } + + switch (*name) { + case 'A': +#ifdef RLIMIT_AS + if (strcmp(name, "AS") == 0) return RLIMIT_AS; +#endif + break; + + case 'C': +#ifdef RLIMIT_CORE + if (strcmp(name, "CORE") == 0) return RLIMIT_CORE; +#endif +#ifdef RLIMIT_CPU + if (strcmp(name, "CPU") == 0) return RLIMIT_CPU; +#endif + break; + + case 'D': +#ifdef RLIMIT_DATA + if (strcmp(name, "DATA") == 0) return RLIMIT_DATA; +#endif + break; + + case 'F': +#ifdef RLIMIT_FSIZE + if (strcmp(name, "FSIZE") == 0) return RLIMIT_FSIZE; +#endif + break; + + case 'M': +#ifdef RLIMIT_MEMLOCK + if (strcmp(name, "MEMLOCK") == 0) return RLIMIT_MEMLOCK; +#endif + break; + + case 'N': +#ifdef RLIMIT_NOFILE + if (strcmp(name, "NOFILE") == 0) return RLIMIT_NOFILE; +#endif +#ifdef RLIMIT_NPROC + if (strcmp(name, "NPROC") == 0) return RLIMIT_NPROC; +#endif + break; + + case 'R': +#ifdef RLIMIT_RSS + if (strcmp(name, "RSS") == 0) return RLIMIT_RSS; +#endif + break; + + case 'S': +#ifdef RLIMIT_STACK + if (strcmp(name, "STACK") == 0) return RLIMIT_STACK; +#endif +#ifdef RLIMIT_SBSIZE + if (strcmp(name, "SBSIZE") == 0) return RLIMIT_SBSIZE; +#endif + break; + } + return -1; +} + +static int +rlimit_type_by_hname(const char *name) +{ + return rlimit_resource_name2int(name, 0); +} + +static int +rlimit_type_by_lname(const char *name) +{ + return rlimit_resource_name2int(name, 1); +} + +static int +rlimit_resource_type(VALUE rtype) +{ + const char *name; + VALUE v; + int r; + + switch (TYPE(rtype)) { + case T_SYMBOL: + name = rb_id2name(SYM2ID(rtype)); + break; + + default: + v = rb_check_string_type(rtype); + if (!NIL_P(v)) { + rtype = v; + case T_STRING: + name = StringValueCStr(rtype); + break; + } + /* fall through */ + + case T_FIXNUM: + case T_BIGNUM: + return NUM2INT(rtype); + } + + r = rlimit_type_by_hname(name); + if (r != -1) + return r; + + rb_raise(rb_eArgError, "invalid resource name: %s", name); +} + +static rlim_t +rlimit_resource_value(VALUE rval) +{ + const char *name; + VALUE v; + + switch (TYPE(rval)) { + case T_SYMBOL: + name = rb_id2name(SYM2ID(rval)); + break; + + default: + v = rb_check_string_type(rval); + if (!NIL_P(v)) { + rval = v; + case T_STRING: + name = StringValueCStr(rval); + break; + } + /* fall through */ + + case T_FIXNUM: + case T_BIGNUM: + return NUM2RLIM(rval); + } + +#ifdef RLIM_INFINITY + if (strcmp(name, "INFINITY") == 0) return RLIM_INFINITY; +#endif +#ifdef RLIM_SAVED_MAX + if (strcmp(name, "SAVED_MAX") == 0) return RLIM_SAVED_MAX; +#endif +#ifdef RLIM_SAVED_CUR + if (strcmp(name, "SAVED_CUR") == 0) return RLIM_SAVED_CUR; +#endif + rb_raise(rb_eArgError, "invalid resource value: %s", name); +} +#endif + +/* + * call-seq: + * Process.getrlimit(resource) => [cur_limit, max_limit] + * + * Gets the resource limit of the process. + * _cur_limit_ means current (soft) limit and + * _max_limit_ means maximum (hard) limit. + * + * _resource_ indicates the kind of resource to limit. + * It is specified as a symbol such as :CORE, + * a string such as "CORE" or + * a constant such as Process::RLIMIT_CORE. + * See Process.setrlimit for details. + * + * _cur_limit_ and _max_limit_ may be Process::RLIM_INFINITY, + * Process::RLIM_SAVED_MAX or + * Process::RLIM_SAVED_CUR. + * See Process.setrlimit and the system getrlimit(2) manual for details. + */ + +static VALUE +proc_getrlimit(VALUE obj, VALUE resource) +{ +#if defined(HAVE_GETRLIMIT) && defined(RLIM2NUM) + struct rlimit rlim; + + rb_secure(2); + + if (getrlimit(rlimit_resource_type(resource), &rlim) < 0) { + rb_sys_fail("getrlimit"); + } + return rb_assoc_new(RLIM2NUM(rlim.rlim_cur), RLIM2NUM(rlim.rlim_max)); +#else + rb_notimplement(); +#endif +} + +/* + * call-seq: + * Process.setrlimit(resource, cur_limit, max_limit) => nil + * Process.setrlimit(resource, cur_limit) => nil + * + * Sets the resource limit of the process. + * _cur_limit_ means current (soft) limit and + * _max_limit_ means maximum (hard) limit. + * + * If _max_limit_ is not given, _cur_limit_ is used. + * + * _resource_ indicates the kind of resource to limit. + * It should be a symbol such as :CORE, + * a string such as "CORE" or + * a constant such as Process::RLIMIT_CORE. + * The available resources are OS dependent. + * Ruby may support following resources. + * + * [CORE] core size (bytes) (SUSv3) + * [CPU] CPU time (seconds) (SUSv3) + * [DATA] data segment (bytes) (SUSv3) + * [FSIZE] file size (bytes) (SUSv3) + * [NOFILE] file descriptors (number) (SUSv3) + * [STACK] stack size (bytes) (SUSv3) + * [AS] total available memory (bytes) (SUSv3, NetBSD, FreeBSD, OpenBSD but 4.4BSD-Lite) + * [MEMLOCK] total size for mlock(2) (bytes) (4.4BSD, GNU/Linux) + * [NPROC] number of processes for the user (number) (4.4BSD, GNU/Linux) + * [RSS] resident memory size (bytes) (4.2BSD, GNU/Linux) + * [SBSIZE] all socket buffers (bytes) (NetBSD, FreeBSD) + * + * _cur_limit_ and _max_limit_ may be + * :INFINITY, "INFINITY" or + * Process::RLIM_INFINITY, + * which means that the resource is not limited. + * They may be Process::RLIM_SAVED_MAX, + * Process::RLIM_SAVED_CUR and + * corresponding symbols and strings too. + * See system setrlimit(2) manual for details. + * + * The following example raise the soft limit of core size to + * the hard limit to try to make core dump possible. + * + * Process.setrlimit(:CORE, Process.getrlimit(:CORE)[1]) + * + */ + +static VALUE +proc_setrlimit(int argc, VALUE *argv, VALUE obj) +{ +#if defined(HAVE_SETRLIMIT) && defined(NUM2RLIM) + VALUE resource, rlim_cur, rlim_max; + struct rlimit rlim; + + rb_secure(2); + + rb_scan_args(argc, argv, "21", &resource, &rlim_cur, &rlim_max); + if (rlim_max == Qnil) + rlim_max = rlim_cur; + + rlim.rlim_cur = rlimit_resource_value(rlim_cur); + rlim.rlim_max = rlimit_resource_value(rlim_max); + + if (setrlimit(rlimit_resource_type(resource), &rlim) < 0) { + rb_sys_fail("setrlimit"); + } + return Qnil; +#else + rb_notimplement(); +#endif +} + +static int under_uid_switch = 0; +static void +check_uid_switch(void) +{ + rb_secure(2); + if (under_uid_switch) { + rb_raise(rb_eRuntimeError, "can't handle UID while evaluating block given to Process::UID.switch method"); + } +} + +static int under_gid_switch = 0; +static void +check_gid_switch(void) +{ + rb_secure(2); + if (under_gid_switch) { + rb_raise(rb_eRuntimeError, "can't handle GID while evaluating block given to Process::UID.switch method"); + } +} + + +/********************************************************************* + * Document-class: Process::Sys + * + * The Process::Sys module contains UID and GID + * functions which provide direct bindings to the system calls of the + * same names instead of the more-portable versions of the same + * functionality found in the Process, + * Process::UID, and Process::GID modules. + */ + + +/* + * call-seq: + * Process::Sys.setuid(integer) => nil + * + * Set the user ID of the current process to _integer_. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setuid(VALUE obj, VALUE id) +{ +#if defined HAVE_SETUID + check_uid_switch(); + if (setuid(NUM2UIDT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + + +/* + * call-seq: + * Process::Sys.setruid(integer) => nil + * + * Set the real user ID of the calling process to _integer_. + * Not available on all platforms. + * + */ + +static VALUE +p_sys_setruid(VALUE obj, VALUE id) +{ +#if defined HAVE_SETRUID + check_uid_switch(); + if (setruid(NUM2UIDT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.seteuid(integer) => nil + * + * Set the effective user ID of the calling process to + * _integer_. Not available on all platforms. + * + */ + +static VALUE +p_sys_seteuid(VALUE obj, VALUE id) +{ +#if defined HAVE_SETEUID + check_uid_switch(); + if (seteuid(NUM2UIDT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setreuid(rid, eid) => nil + * + * Sets the (integer) real and/or effective user IDs of the current + * process to _rid_ and _eid_, respectively. A value of + * -1 for either means to leave that ID unchanged. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setreuid(VALUE obj, VALUE rid, VALUE eid) +{ +#if defined HAVE_SETREUID + check_uid_switch(); + if (setreuid(NUM2UIDT(rid),NUM2UIDT(eid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setresuid(rid, eid, sid) => nil + * + * Sets the (integer) real, effective, and saved user IDs of the + * current process to _rid_, _eid_, and _sid_ respectively. A + * value of -1 for any value means to + * leave that ID unchanged. Not available on all platforms. + * + */ + +static VALUE +p_sys_setresuid(VALUE obj, VALUE rid, VALUE eid, VALUE sid) +{ +#if defined HAVE_SETRESUID + check_uid_switch(); + if (setresuid(NUM2UIDT(rid),NUM2UIDT(eid),NUM2UIDT(sid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process.uid => fixnum + * Process::UID.rid => fixnum + * Process::Sys.getuid => fixnum + * + * Returns the (real) user ID of this process. + * + * Process.uid #=> 501 + */ + +static VALUE +proc_getuid(VALUE obj) +{ + rb_uid_t uid = getuid(); + return UIDT2NUM(uid); +} + + +/* + * call-seq: + * Process.uid= integer => numeric + * + * Sets the (integer) user ID for this process. Not available on all + * platforms. + */ + +static VALUE +proc_setuid(VALUE obj, VALUE id) +{ + rb_uid_t uid; + + check_uid_switch(); + + uid = NUM2UIDT(id); +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (setresuid(uid, -1, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREUID + if (setreuid(uid, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETRUID + if (setruid(uid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETUID + { + if (geteuid() == uid) { + if (setuid(uid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } + } +#else + rb_notimplement(); +#endif + return id; +} + + +/******************************************************************** + * + * Document-class: Process::UID + * + * The Process::UID module contains a collection of + * module functions which can be used to portably get, set, and + * switch the current process's real, effective, and saved user IDs. + * + */ + +static rb_uid_t SAVED_USER_ID = -1; + +#ifdef BROKEN_SETREUID +int +setreuid(rb_uid_t ruid, rb_uid_t euid) +{ + if (ruid != -1 && ruid != getuid()) { + if (euid == -1) euid = geteuid(); + if (setuid(ruid) < 0) return -1; + } + if (euid != -1 && euid != geteuid()) { + if (seteuid(euid) < 0) return -1; + } + return 0; +} +#endif + +/* + * call-seq: + * Process::UID.change_privilege(integer) => fixnum + * + * Change the current process's real and effective user ID to that + * specified by _integer_. Returns the new user ID. Not + * available on all platforms. + * + * [Process.uid, Process.euid] #=> [0, 0] + * Process::UID.change_privilege(31) #=> 31 + * [Process.uid, Process.euid] #=> [31, 31] + */ + +static VALUE +p_uid_change_privilege(VALUE obj, VALUE id) +{ + rb_uid_t uid; + + check_uid_switch(); + + uid = NUM2UIDT(id); + + if (geteuid() == 0) { /* root-user */ +#if defined(HAVE_SETRESUID) + if (setresuid(uid, uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETUID) + if (setuid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (getuid() == uid) { + if (SAVED_USER_ID == uid) { + if (setreuid(-1, uid) < 0) rb_sys_fail(0); + } else { + if (uid == 0) { /* (r,e,s) == (root, root, x) */ + if (setreuid(-1, SAVED_USER_ID) < 0) rb_sys_fail(0); + if (setreuid(SAVED_USER_ID, 0) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; /* (r,e,s) == (x, root, root) */ + if (setreuid(uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else { + if (setreuid(0, -1) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; + if (setreuid(uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } + } + } else { + if (setreuid(uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } +#elif defined(HAVE_SETRUID) && defined(HAVE_SETEUID) + if (getuid() == uid) { + if (SAVED_USER_ID == uid) { + if (seteuid(uid) < 0) rb_sys_fail(0); + } else { + if (uid == 0) { + if (setruid(SAVED_USER_ID) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; + if (setruid(0) < 0) rb_sys_fail(0); + } else { + if (setruid(0) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; + if (seteuid(uid) < 0) rb_sys_fail(0); + if (setruid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } + } + } else { + if (seteuid(uid) < 0) rb_sys_fail(0); + if (setruid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } +#else + rb_notimplement(); +#endif + } else { /* unprivileged user */ +#if defined(HAVE_SETRESUID) + if (setresuid((getuid() == uid)? -1: uid, + (geteuid() == uid)? -1: uid, + (SAVED_USER_ID == uid)? -1: uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (SAVED_USER_ID == uid) { + if (setreuid((getuid() == uid)? -1: uid, + (geteuid() == uid)? -1: uid) < 0) rb_sys_fail(0); + } else if (getuid() != uid) { + if (setreuid(uid, (geteuid() == uid)? -1: uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else if (/* getuid() == uid && */ geteuid() != uid) { + if (setreuid(geteuid(), uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setreuid(uid, -1) < 0) rb_sys_fail(0); + } else { /* getuid() == uid && geteuid() == uid */ + if (setreuid(-1, SAVED_USER_ID) < 0) rb_sys_fail(0); + if (setreuid(SAVED_USER_ID, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setreuid(uid, -1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETRUID) && defined(HAVE_SETEUID) + if (SAVED_USER_ID == uid) { + if (geteuid() != uid && seteuid(uid) < 0) rb_sys_fail(0); + if (getuid() != uid && setruid(uid) < 0) rb_sys_fail(0); + } else if (/* SAVED_USER_ID != uid && */ geteuid() == uid) { + if (getuid() != uid) { + if (setruid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else { + if (setruid(SAVED_USER_ID) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setruid(uid) < 0) rb_sys_fail(0); + } + } else if (/* geteuid() != uid && */ getuid() == uid) { + if (seteuid(uid) < 0) rb_sys_fail(0); + if (setruid(SAVED_USER_ID) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setruid(uid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_44BSD_SETUID + if (getuid() == uid) { + /* (r,e,s)==(uid,?,?) ==> (uid,uid,uid) */ + if (setuid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETEUID + if (getuid() == uid && SAVED_USER_ID == uid) { + if (seteuid(uid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETUID + if (getuid() == uid && SAVED_USER_ID == uid) { + if (setuid(uid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#else + rb_notimplement(); +#endif + } + return id; +} + + + +/* + * call-seq: + * Process::Sys.setgid(integer) => nil + * + * Set the group ID of the current process to _integer_. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setgid(VALUE obj, VALUE id) +{ +#if defined HAVE_SETGID + check_gid_switch(); + if (setgid(NUM2GIDT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setrgid(integer) => nil + * + * Set the real group ID of the calling process to _integer_. + * Not available on all platforms. + * + */ + +static VALUE +p_sys_setrgid(VALUE obj, VALUE id) +{ +#if defined HAVE_SETRGID + check_gid_switch(); + if (setrgid(NUM2GIDT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + + +/* + * call-seq: + * Process::Sys.setegid(integer) => nil + * + * Set the effective group ID of the calling process to + * _integer_. Not available on all platforms. + * + */ + +static VALUE +p_sys_setegid(VALUE obj, VALUE id) +{ +#if defined HAVE_SETEGID + check_gid_switch(); + if (setegid(NUM2GIDT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setregid(rid, eid) => nil + * + * Sets the (integer) real and/or effective group IDs of the current + * process to rid and eid, respectively. A value of + * -1 for either means to leave that ID unchanged. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setregid(VALUE obj, VALUE rid, VALUE eid) +{ +#if defined HAVE_SETREGID + check_gid_switch(); + if (setregid(NUM2GIDT(rid),NUM2GIDT(eid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + +/* + * call-seq: + * Process::Sys.setresgid(rid, eid, sid) => nil + * + * Sets the (integer) real, effective, and saved user IDs of the + * current process to rid, eid, and sid + * respectively. A value of -1 for any value means to + * leave that ID unchanged. Not available on all platforms. + * + */ + +static VALUE +p_sys_setresgid(VALUE obj, VALUE rid, VALUE eid, VALUE sid) +{ +#if defined HAVE_SETRESGID + check_gid_switch(); + if (setresgid(NUM2GIDT(rid),NUM2GIDT(eid),NUM2GIDT(sid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.issetugid => true or false + * + * Returns +true+ if the process was created as a result + * of an execve(2) system call which had either of the setuid or + * setgid bits set (and extra privileges were given as a result) or + * if it has changed any of its real, effective or saved user or + * group IDs since it began execution. + * + */ + +static VALUE +p_sys_issetugid(VALUE obj) +{ +#if defined HAVE_ISSETUGID + rb_secure(2); + if (issetugid()) { + return Qtrue; + } else { + return Qfalse; + } +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + + +/* + * call-seq: + * Process.gid => fixnum + * Process::GID.rid => fixnum + * Process::Sys.getgid => fixnum + * + * Returns the (real) group ID for this process. + * + * Process.gid #=> 500 + */ + +static VALUE +proc_getgid(VALUE obj) +{ + rb_gid_t gid = getgid(); + return GIDT2NUM(gid); +} + + +/* + * call-seq: + * Process.gid= fixnum => fixnum + * + * Sets the group ID for this process. + */ + +static VALUE +proc_setgid(VALUE obj, VALUE id) +{ + rb_gid_t gid; + + check_gid_switch(); + + gid = NUM2GIDT(id); +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (setresgid(gid, -1, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREGID + if (setregid(gid, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETRGID + if (setrgid(gid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETGID + { + if (getegid() == gid) { + if (setgid(gid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } + } +#else + rb_notimplement(); +#endif + return GIDT2NUM(gid); +} + + +static size_t maxgroups = 32; + + +/* + * call-seq: + * Process.groups => array + * + * Get an Array of the gids of groups in the + * supplemental group access list for this process. + * + * Process.groups #=> [27, 6, 10, 11] + * + */ + +static VALUE +proc_getgroups(VALUE obj) +{ +#ifdef HAVE_GETGROUPS + VALUE ary; + size_t ngroups; + rb_gid_t *groups; + int i; + + groups = ALLOCA_N(rb_gid_t, maxgroups); + + ngroups = getgroups(maxgroups, groups); + if (ngroups == -1) + rb_sys_fail(0); + + ary = rb_ary_new(); + for (i = 0; i < ngroups; i++) + rb_ary_push(ary, GIDT2NUM(groups[i])); + + return ary; +#else + rb_notimplement(); + return Qnil; +#endif +} + + +/* + * call-seq: + * Process.groups= array => array + * + * Set the supplemental group access list to the given + * Array of group IDs. + * + * Process.groups #=> [0, 1, 2, 3, 4, 6, 10, 11, 20, 26, 27] + * Process.groups = [27, 6, 10, 11] #=> [27, 6, 10, 11] + * Process.groups #=> [27, 6, 10, 11] + * + */ + +static VALUE +proc_setgroups(VALUE obj, VALUE ary) +{ +#ifdef HAVE_SETGROUPS + size_t ngroups; + rb_gid_t *groups; + int i; + struct group *gr; + + Check_Type(ary, T_ARRAY); + + ngroups = RARRAY_LEN(ary); + if (ngroups > maxgroups) + rb_raise(rb_eArgError, "too many groups, %lu max", (unsigned long)maxgroups); + + groups = ALLOCA_N(rb_gid_t, ngroups); + + for (i = 0; i < ngroups && i < RARRAY_LEN(ary); i++) { + VALUE g = RARRAY_PTR(ary)[i]; + + if (FIXNUM_P(g)) { + groups[i] = NUM2GIDT(g); + } + else { + VALUE tmp = rb_check_string_type(g); + + if (NIL_P(tmp)) { + groups[i] = NUM2GIDT(g); + } + else { + gr = getgrnam(RSTRING_PTR(tmp)); + if (gr == NULL) + rb_raise(rb_eArgError, + "can't find group for %s", RSTRING_PTR(tmp)); + groups[i] = gr->gr_gid; + } + } + } + + i = setgroups(ngroups, groups); + if (i == -1) + rb_sys_fail(0); + + return proc_getgroups(obj); +#else + rb_notimplement(); + return Qnil; +#endif +} + + +/* + * call-seq: + * Process.initgroups(username, gid) => array + * + * Initializes the supplemental group access list by reading the + * system group database and using all groups of which the given user + * is a member. The group with the specified gid is also + * added to the list. Returns the resulting Array of the + * gids of all the groups in the supplementary group access list. Not + * available on all platforms. + * + * Process.groups #=> [0, 1, 2, 3, 4, 6, 10, 11, 20, 26, 27] + * Process.initgroups( "mgranger", 30 ) #=> [30, 6, 10, 11] + * Process.groups #=> [30, 6, 10, 11] + * + */ + +static VALUE +proc_initgroups(VALUE obj, VALUE uname, VALUE base_grp) +{ +#ifdef HAVE_INITGROUPS + if (initgroups(StringValuePtr(uname), NUM2GIDT(base_grp)) != 0) { + rb_sys_fail(0); + } + return proc_getgroups(obj); +#else + rb_notimplement(); + return Qnil; +#endif +} + + +/* + * call-seq: + * Process.maxgroups => fixnum + * + * Returns the maximum number of gids allowed in the supplemental + * group access list. + * + * Process.maxgroups #=> 32 + */ + +static VALUE +proc_getmaxgroups(VALUE obj) +{ + return INT2FIX(maxgroups); +} + + +/* + * call-seq: + * Process.maxgroups= fixnum => fixnum + * + * Sets the maximum number of gids allowed in the supplemental group + * access list. + */ + +static VALUE +proc_setmaxgroups(VALUE obj, VALUE val) +{ + size_t ngroups = FIX2INT(val); + + if (ngroups > 4096) + ngroups = 4096; + + maxgroups = ngroups; + + return INT2FIX(maxgroups); +} + +/* + * call-seq: + * Process.daemon() => fixnum + * Process.daemon(nochdir=nil,noclose=nil) => fixnum + * + * Detach the process from controlling terminal and run in + * the background as system daemon. Unless the argument + * nochdir is true (i.e. non false), it changes the current + * working directory to the root ("/"). Unless the argument + * noclose is true, daemon() will redirect standard input, + * standard output and standard error to /dev/null. + */ + +static VALUE +proc_daemon(int argc, VALUE *argv) +{ + VALUE nochdir, noclose; +#if defined(HAVE_DAEMON) || defined(HAVE_FORK) + int n; +#endif + + rb_secure(2); + rb_scan_args(argc, argv, "02", &nochdir, &noclose); + +#if defined(HAVE_DAEMON) + prefork(); + before_fork(); + n = daemon(RTEST(nochdir), RTEST(noclose)); + after_fork(); + if (n < 0) rb_sys_fail("daemon"); + return INT2FIX(n); +#elif defined(HAVE_FORK) + switch (rb_fork(0, 0, 0, Qnil)) { + case -1: + return INT2FIX(-1); + case 0: + break; + default: + _exit(0); + } + + proc_setsid(); + + /* must not be process-leader */ + switch (rb_fork(0, 0, 0, Qnil)) { + case -1: + return INT2FIX(-1); + case 0: + break; + default: + _exit(0); + } + + if (!RTEST(nochdir)) + (void)chdir("/"); + + if (!RTEST(noclose) && (n = open("/dev/null", O_RDWR, 0)) != -1) { + (void)dup2(n, 0); + (void)dup2(n, 1); + (void)dup2(n, 2); + if (n > 2) + (void)close (n); + } + return INT2FIX(0); +#else + rb_notimplement(); +#endif +} + +/******************************************************************** + * + * Document-class: Process::GID + * + * The Process::GID module contains a collection of + * module functions which can be used to portably get, set, and + * switch the current process's real, effective, and saved group IDs. + * + */ + +static int SAVED_GROUP_ID = -1; + +#ifdef BROKEN_SETREGID +int +setregid(rb_gid_t rgid, rb_gid_t egid) +{ + if (rgid != -1 && rgid != getgid()) { + if (egid == -1) egid = getegid(); + if (setgid(rgid) < 0) return -1; + } + if (egid != -1 && egid != getegid()) { + if (setegid(egid) < 0) return -1; + } + return 0; +} +#endif + +/* + * call-seq: + * Process::GID.change_privilege(integer) => fixnum + * + * Change the current process's real and effective group ID to that + * specified by _integer_. Returns the new group ID. Not + * available on all platforms. + * + * [Process.gid, Process.egid] #=> [0, 0] + * Process::GID.change_privilege(33) #=> 33 + * [Process.gid, Process.egid] #=> [33, 33] + */ + +static VALUE +p_gid_change_privilege(VALUE obj, VALUE id) +{ + rb_gid_t gid; + + check_gid_switch(); + + gid = NUM2GIDT(id); + + if (geteuid() == 0) { /* root-user */ +#if defined(HAVE_SETRESGID) + if (setresgid(gid, gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined HAVE_SETGID + if (setgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (getgid() == gid) { + if (SAVED_GROUP_ID == gid) { + if (setregid(-1, gid) < 0) rb_sys_fail(0); + } else { + if (gid == 0) { /* (r,e,s) == (root, y, x) */ + if (setregid(-1, SAVED_GROUP_ID) < 0) rb_sys_fail(0); + if (setregid(SAVED_GROUP_ID, 0) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; /* (r,e,s) == (x, root, root) */ + if (setregid(gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else { /* (r,e,s) == (z, y, x) */ + if (setregid(0, 0) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; + if (setregid(gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } + } + } else { + if (setregid(gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } +#elif defined(HAVE_SETRGID) && defined (HAVE_SETEGID) + if (getgid() == gid) { + if (SAVED_GROUP_ID == gid) { + if (setegid(gid) < 0) rb_sys_fail(0); + } else { + if (gid == 0) { + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(SAVED_GROUP_ID) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; + if (setrgid(0) < 0) rb_sys_fail(0); + } else { + if (setrgid(0) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } + } + } else { + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } +#else + rb_notimplement(); +#endif + } else { /* unprivileged user */ +#if defined(HAVE_SETRESGID) + if (setresgid((getgid() == gid)? -1: gid, + (getegid() == gid)? -1: gid, + (SAVED_GROUP_ID == gid)? -1: gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (SAVED_GROUP_ID == gid) { + if (setregid((getgid() == gid)? -1: gid, + (getegid() == gid)? -1: gid) < 0) rb_sys_fail(0); + } else if (getgid() != gid) { + if (setregid(gid, (getegid() == gid)? -1: gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else if (/* getgid() == gid && */ getegid() != gid) { + if (setregid(getegid(), gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setregid(gid, -1) < 0) rb_sys_fail(0); + } else { /* getgid() == gid && getegid() == gid */ + if (setregid(-1, SAVED_GROUP_ID) < 0) rb_sys_fail(0); + if (setregid(SAVED_GROUP_ID, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setregid(gid, -1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETRGID) && defined(HAVE_SETEGID) + if (SAVED_GROUP_ID == gid) { + if (getegid() != gid && setegid(gid) < 0) rb_sys_fail(0); + if (getgid() != gid && setrgid(gid) < 0) rb_sys_fail(0); + } else if (/* SAVED_GROUP_ID != gid && */ getegid() == gid) { + if (getgid() != gid) { + if (setrgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else { + if (setrgid(SAVED_GROUP_ID) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setrgid(gid) < 0) rb_sys_fail(0); + } + } else if (/* getegid() != gid && */ getgid() == gid) { + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(SAVED_GROUP_ID) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setrgid(gid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_44BSD_SETGID + if (getgid() == gid) { + /* (r,e,s)==(gid,?,?) ==> (gid,gid,gid) */ + if (setgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETEGID + if (getgid() == gid && SAVED_GROUP_ID == gid) { + if (setegid(gid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETGID + if (getgid() == gid && SAVED_GROUP_ID == gid) { + if (setgid(gid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#else + rb_notimplement(); +#endif + } + return id; +} + + +/* + * call-seq: + * Process.euid => fixnum + * Process::UID.eid => fixnum + * Process::Sys.geteuid => fixnum + * + * Returns the effective user ID for this process. + * + * Process.euid #=> 501 + */ + +static VALUE +proc_geteuid(VALUE obj) +{ + rb_uid_t euid = geteuid(); + return UIDT2NUM(euid); +} + + +/* + * call-seq: + * Process.euid= integer + * + * Sets the effective user ID for this process. Not available on all + * platforms. + */ + +static VALUE +proc_seteuid(VALUE obj, VALUE euid) +{ + rb_uid_t uid; + + check_uid_switch(); + + uid = NUM2UIDT(euid); +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (setresuid(-1, uid, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREUID + if (setreuid(-1, uid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETEUID + if (seteuid(uid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETUID + if (uid == getuid()) { + if (setuid(uid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } +#else + rb_notimplement(); +#endif + return euid; +} + +static rb_uid_t +rb_seteuid_core(rb_uid_t euid) +{ + rb_uid_t uid; + + check_uid_switch(); + + uid = getuid(); + +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (uid != euid) { + if (setresuid(-1,euid,euid) < 0) rb_sys_fail(0); + SAVED_USER_ID = euid; + } else { + if (setresuid(-1,euid,-1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (setreuid(-1, euid) < 0) rb_sys_fail(0); + if (uid != euid) { + if (setreuid(euid,uid) < 0) rb_sys_fail(0); + if (setreuid(uid,euid) < 0) rb_sys_fail(0); + SAVED_USER_ID = euid; + } +#elif defined HAVE_SETEUID + if (seteuid(euid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETUID + if (geteuid() == 0) rb_sys_fail(0); + if (setuid(euid) < 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return euid; +} + + +/* + * call-seq: + * Process::UID.grant_privilege(integer) => fixnum + * Process::UID.eid= integer => fixnum + * + * Set the effective user ID, and if possible, the saved user ID of + * the process to the given _integer_. Returns the new + * effective user ID. Not available on all platforms. + * + * [Process.uid, Process.euid] #=> [0, 0] + * Process::UID.grant_privilege(31) #=> 31 + * [Process.uid, Process.euid] #=> [0, 31] + */ + +static VALUE +p_uid_grant_privilege(VALUE obj, VALUE id) +{ + rb_seteuid_core(NUM2UIDT(id)); + return id; +} + + +/* + * call-seq: + * Process.egid => fixnum + * Process::GID.eid => fixnum + * Process::Sys.geteid => fixnum + * + * Returns the effective group ID for this process. Not available on + * all platforms. + * + * Process.egid #=> 500 + */ + +static VALUE +proc_getegid(VALUE obj) +{ + rb_gid_t egid = getegid(); + + return GIDT2NUM(egid); +} + + +/* + * call-seq: + * Process.egid = fixnum => fixnum + * + * Sets the effective group ID for this process. Not available on all + * platforms. + */ + +static VALUE +proc_setegid(VALUE obj, VALUE egid) +{ + rb_gid_t gid; + + check_gid_switch(); + + gid = NUM2GIDT(egid); +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (setresgid(-1, gid, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREGID + if (setregid(-1, gid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETEGID + if (setegid(gid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETGID + if (gid == getgid()) { + if (setgid(gid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } +#else + rb_notimplement(); +#endif + return egid; +} + +static rb_gid_t +rb_setegid_core(rb_gid_t egid) +{ + rb_gid_t gid; + + check_gid_switch(); + + gid = getgid(); + +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (gid != egid) { + if (setresgid(-1,egid,egid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = egid; + } else { + if (setresgid(-1,egid,-1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (setregid(-1, egid) < 0) rb_sys_fail(0); + if (gid != egid) { + if (setregid(egid,gid) < 0) rb_sys_fail(0); + if (setregid(gid,egid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = egid; + } +#elif defined HAVE_SETEGID + if (setegid(egid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETGID + if (geteuid() == 0 /* root user */) rb_sys_fail(0); + if (setgid(egid) < 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return egid; +} + + +/* + * call-seq: + * Process::GID.grant_privilege(integer) => fixnum + * Process::GID.eid = integer => fixnum + * + * Set the effective group ID, and if possible, the saved group ID of + * the process to the given _integer_. Returns the new + * effective group ID. Not available on all platforms. + * + * [Process.gid, Process.egid] #=> [0, 0] + * Process::GID.grant_privilege(31) #=> 33 + * [Process.gid, Process.egid] #=> [0, 33] + */ + +static VALUE +p_gid_grant_privilege(VALUE obj, VALUE id) +{ + rb_setegid_core(NUM2GIDT(id)); + return id; +} + + +/* + * call-seq: + * Process::UID.re_exchangeable? => true or false + * + * Returns +true+ if the real and effective user IDs of a + * process may be exchanged on the current platform. + * + */ + +static VALUE +p_uid_exchangeable(void) +{ +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + return Qtrue; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + return Qtrue; +#else + return Qfalse; +#endif +} + + +/* + * call-seq: + * Process::UID.re_exchange => fixnum + * + * Exchange real and effective user IDs and return the new effective + * user ID. Not available on all platforms. + * + * [Process.uid, Process.euid] #=> [0, 31] + * Process::UID.re_exchange #=> 0 + * [Process.uid, Process.euid] #=> [31, 0] + */ + +static VALUE +p_uid_exchange(VALUE obj) +{ + rb_uid_t uid, euid; + + check_uid_switch(); + + uid = getuid(); + euid = geteuid(); + +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (setresuid(euid, uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (setreuid(euid,uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#else + rb_notimplement(); +#endif + return UIDT2NUM(uid); +} + + +/* + * call-seq: + * Process::GID.re_exchangeable? => true or false + * + * Returns +true+ if the real and effective group IDs of a + * process may be exchanged on the current platform. + * + */ + +static VALUE +p_gid_exchangeable(void) +{ +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + return Qtrue; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + return Qtrue; +#else + return Qfalse; +#endif +} + + +/* + * call-seq: + * Process::GID.re_exchange => fixnum + * + * Exchange real and effective group IDs and return the new effective + * group ID. Not available on all platforms. + * + * [Process.gid, Process.egid] #=> [0, 33] + * Process::GID.re_exchange #=> 0 + * [Process.gid, Process.egid] #=> [33, 0] + */ + +static VALUE +p_gid_exchange(VALUE obj) +{ + rb_gid_t gid, egid; + + check_gid_switch(); + + gid = getgid(); + egid = getegid(); + +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (setresgid(egid, gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (setregid(egid,gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#else + rb_notimplement(); +#endif + return GIDT2NUM(gid); +} + +/* [MG] :FIXME: Is this correct? I'm not sure how to phrase this. */ + +/* + * call-seq: + * Process::UID.sid_available? => true or false + * + * Returns +true+ if the current platform has saved user + * ID functionality. + * + */ + +static VALUE +p_uid_have_saved_id(void) +{ +#if defined(HAVE_SETRESUID) || defined(HAVE_SETEUID) || defined(_POSIX_SAVED_IDS) + return Qtrue; +#else + return Qfalse; +#endif +} + + +#if defined(HAVE_SETRESUID) || defined(HAVE_SETEUID) || defined(_POSIX_SAVED_IDS) +static VALUE +p_uid_sw_ensure(rb_uid_t id) +{ + under_uid_switch = 0; + id = rb_seteuid_core(id); + return UIDT2NUM(id); +} + + +/* + * call-seq: + * Process::UID.switch => fixnum + * Process::UID.switch {|| block} => object + * + * Switch the effective and real user IDs of the current process. If + * a block is given, the user IDs will be switched back + * after the block is executed. Returns the new effective user ID if + * called without a block, and the return value of the block if one + * is given. + * + */ + +static VALUE +p_uid_switch(VALUE obj) +{ + rb_uid_t uid, euid; + + check_uid_switch(); + + uid = getuid(); + euid = geteuid(); + + if (uid != euid) { + proc_seteuid(obj, UIDT2NUM(uid)); + if (rb_block_given_p()) { + under_uid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_uid_sw_ensure, SAVED_USER_ID); + } else { + return UIDT2NUM(euid); + } + } else if (euid != SAVED_USER_ID) { + proc_seteuid(obj, UIDT2NUM(SAVED_USER_ID)); + if (rb_block_given_p()) { + under_uid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_uid_sw_ensure, euid); + } else { + return UIDT2NUM(uid); + } + } else { + errno = EPERM; + rb_sys_fail(0); + } +} +#else +static VALUE +p_uid_sw_ensure(VALUE obj) +{ + under_uid_switch = 0; + return p_uid_exchange(obj); +} + +static VALUE +p_uid_switch(VALUE obj) +{ + rb_uid_t uid, euid; + + check_uid_switch(); + + uid = getuid(); + euid = geteuid(); + + if (uid == euid) { + errno = EPERM; + rb_sys_fail(0); + } + p_uid_exchange(obj); + if (rb_block_given_p()) { + under_uid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_uid_sw_ensure, obj); + } else { + return UIDT2NUM(euid); + } +} +#endif + + +/* [MG] :FIXME: Is this correct? I'm not sure how to phrase this. */ + +/* + * call-seq: + * Process::GID.sid_available? => true or false + * + * Returns +true+ if the current platform has saved group + * ID functionality. + * + */ + +static VALUE +p_gid_have_saved_id(void) +{ +#if defined(HAVE_SETRESGID) || defined(HAVE_SETEGID) || defined(_POSIX_SAVED_IDS) + return Qtrue; +#else + return Qfalse; +#endif +} + +#if defined(HAVE_SETRESGID) || defined(HAVE_SETEGID) || defined(_POSIX_SAVED_IDS) +static VALUE +p_gid_sw_ensure(rb_gid_t id) +{ + under_gid_switch = 0; + id = rb_setegid_core(id); + return GIDT2NUM(id); +} + + +/* + * call-seq: + * Process::GID.switch => fixnum + * Process::GID.switch {|| block} => object + * + * Switch the effective and real group IDs of the current process. If + * a block is given, the group IDs will be switched back + * after the block is executed. Returns the new effective group ID if + * called without a block, and the return value of the block if one + * is given. + * + */ + +static VALUE +p_gid_switch(VALUE obj) +{ + int gid, egid; + + check_gid_switch(); + + gid = getgid(); + egid = getegid(); + + if (gid != egid) { + proc_setegid(obj, GIDT2NUM(gid)); + if (rb_block_given_p()) { + under_gid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_gid_sw_ensure, SAVED_GROUP_ID); + } else { + return GIDT2NUM(egid); + } + } else if (egid != SAVED_GROUP_ID) { + proc_setegid(obj, GIDT2NUM(SAVED_GROUP_ID)); + if (rb_block_given_p()) { + under_gid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_gid_sw_ensure, egid); + } else { + return GIDT2NUM(gid); + } + } else { + errno = EPERM; + rb_sys_fail(0); + } +} +#else +static VALUE +p_gid_sw_ensure(VALUE obj) +{ + under_gid_switch = 0; + return p_gid_exchange(obj); +} + +static VALUE +p_gid_switch(VALUE obj) +{ + rb_gid_t gid, egid; + + check_gid_switch(); + + gid = getgid(); + egid = getegid(); + + if (gid == egid) { + errno = EPERM; + rb_sys_fail(0); + } + p_gid_exchange(obj); + if (rb_block_given_p()) { + under_gid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_gid_sw_ensure, obj); + } else { + return GIDT2NUM(egid); + } +} +#endif + + +/* + * call-seq: + * Process.times => aStructTms + * + * Returns a Tms structure (see Struct::Tms + * on page 388) that contains user and system CPU times for this + * process. + * + * t = Process.times + * [ t.utime, t.stime ] #=> [0.0, 0.02] + */ + +VALUE +rb_proc_times(VALUE obj) +{ +#if defined(HAVE_TIMES) && !defined(__CHECKER__) + const double hertz = +#ifdef HAVE__SC_CLK_TCK + (double)sysconf(_SC_CLK_TCK); +#else +#ifndef HZ +# ifdef CLK_TCK +# define HZ CLK_TCK +# else +# define HZ 60 +# endif +#endif /* HZ */ + HZ; +#endif + struct tms buf; + volatile VALUE utime, stime, cutime, sctime; + + times(&buf); + return rb_struct_new(rb_cProcessTms, + utime = DBL2NUM(buf.tms_utime / hertz), + stime = DBL2NUM(buf.tms_stime / hertz), + cutime = DBL2NUM(buf.tms_cutime / hertz), + sctime = DBL2NUM(buf.tms_cstime / hertz)); +#else + rb_notimplement(); +#endif +} + +VALUE rb_mProcess; +VALUE rb_mProcUID; +VALUE rb_mProcGID; +VALUE rb_mProcID_Syscall; + + +/* + * The Process module is a collection of methods used to + * manipulate processes. + */ + +void +Init_process(void) +{ + rb_define_virtual_variable("$?", rb_last_status_get, 0); + rb_define_virtual_variable("$$", get_pid, 0); + rb_define_global_function("exec", rb_f_exec, -1); + rb_define_global_function("fork", rb_f_fork, 0); + rb_define_global_function("exit!", rb_f_exit_bang, -1); + rb_define_global_function("system", rb_f_system, -1); + rb_define_global_function("spawn", rb_f_spawn, -1); + rb_define_global_function("sleep", rb_f_sleep, -1); + rb_define_global_function("exit", rb_f_exit, -1); + rb_define_global_function("abort", rb_f_abort, -1); + + rb_mProcess = rb_define_module("Process"); + +#ifdef WNOHANG + rb_define_const(rb_mProcess, "WNOHANG", INT2FIX(WNOHANG)); +#else + rb_define_const(rb_mProcess, "WNOHANG", INT2FIX(0)); +#endif +#ifdef WUNTRACED + rb_define_const(rb_mProcess, "WUNTRACED", INT2FIX(WUNTRACED)); +#else + rb_define_const(rb_mProcess, "WUNTRACED", INT2FIX(0)); +#endif + + rb_define_singleton_method(rb_mProcess, "exec", rb_f_exec, -1); + rb_define_singleton_method(rb_mProcess, "fork", rb_f_fork, 0); + rb_define_singleton_method(rb_mProcess, "spawn", rb_f_spawn, -1); + rb_define_singleton_method(rb_mProcess, "exit!", rb_f_exit_bang, -1); + rb_define_singleton_method(rb_mProcess, "exit", rb_f_exit, -1); + rb_define_singleton_method(rb_mProcess, "abort", rb_f_abort, -1); + + rb_define_module_function(rb_mProcess, "kill", rb_f_kill, -1); /* in signal.c */ + rb_define_module_function(rb_mProcess, "wait", proc_wait, -1); + rb_define_module_function(rb_mProcess, "wait2", proc_wait2, -1); + rb_define_module_function(rb_mProcess, "waitpid", proc_wait, -1); + rb_define_module_function(rb_mProcess, "waitpid2", proc_wait2, -1); + rb_define_module_function(rb_mProcess, "waitall", proc_waitall, 0); + rb_define_module_function(rb_mProcess, "detach", proc_detach, 1); + + rb_cProcessStatus = rb_define_class_under(rb_mProcess, "Status", rb_cObject); + rb_undef_method(CLASS_OF(rb_cProcessStatus), "new"); + + rb_define_method(rb_cProcessStatus, "==", pst_equal, 1); + rb_define_method(rb_cProcessStatus, "&", pst_bitand, 1); + rb_define_method(rb_cProcessStatus, ">>", pst_rshift, 1); + rb_define_method(rb_cProcessStatus, "to_i", pst_to_i, 0); + rb_define_method(rb_cProcessStatus, "to_s", pst_to_s, 0); + rb_define_method(rb_cProcessStatus, "inspect", pst_inspect, 0); + + rb_define_method(rb_cProcessStatus, "pid", pst_pid, 0); + + rb_define_method(rb_cProcessStatus, "stopped?", pst_wifstopped, 0); + rb_define_method(rb_cProcessStatus, "stopsig", pst_wstopsig, 0); + rb_define_method(rb_cProcessStatus, "signaled?", pst_wifsignaled, 0); + rb_define_method(rb_cProcessStatus, "termsig", pst_wtermsig, 0); + rb_define_method(rb_cProcessStatus, "exited?", pst_wifexited, 0); + rb_define_method(rb_cProcessStatus, "exitstatus", pst_wexitstatus, 0); + rb_define_method(rb_cProcessStatus, "success?", pst_success_p, 0); + rb_define_method(rb_cProcessStatus, "coredump?", pst_wcoredump, 0); + + rb_define_module_function(rb_mProcess, "pid", get_pid, 0); + rb_define_module_function(rb_mProcess, "ppid", get_ppid, 0); + + rb_define_module_function(rb_mProcess, "getpgrp", proc_getpgrp, 0); + rb_define_module_function(rb_mProcess, "setpgrp", proc_setpgrp, 0); + rb_define_module_function(rb_mProcess, "getpgid", proc_getpgid, 1); + rb_define_module_function(rb_mProcess, "setpgid", proc_setpgid, 2); + + rb_define_module_function(rb_mProcess, "setsid", proc_setsid, 0); + + rb_define_module_function(rb_mProcess, "getpriority", proc_getpriority, 2); + rb_define_module_function(rb_mProcess, "setpriority", proc_setpriority, 3); + +#ifdef HAVE_GETPRIORITY + rb_define_const(rb_mProcess, "PRIO_PROCESS", INT2FIX(PRIO_PROCESS)); + rb_define_const(rb_mProcess, "PRIO_PGRP", INT2FIX(PRIO_PGRP)); + rb_define_const(rb_mProcess, "PRIO_USER", INT2FIX(PRIO_USER)); +#endif + + rb_define_module_function(rb_mProcess, "getrlimit", proc_getrlimit, 1); + rb_define_module_function(rb_mProcess, "setrlimit", proc_setrlimit, -1); +#ifdef RLIM2NUM + { + VALUE inf = RLIM2NUM(RLIM_INFINITY); +#ifdef RLIM_SAVED_MAX + VALUE v = RLIM_INFINITY == RLIM_SAVED_MAX ? inf : RLIM2NUM(RLIM_SAVED_MAX); + rb_define_const(rb_mProcess, "RLIM_SAVED_MAX", v); +#endif + rb_define_const(rb_mProcess, "RLIM_INFINITY", inf); +#ifdef RLIM_SAVED_CUR + v = RLIM_INFINITY == RLIM_SAVED_CUR ? inf : RLIM2NUM(RLIM_SAVED_CUR); + rb_define_const(rb_mProcess, "RLIM_SAVED_CUR", v); +#endif + } +#ifdef RLIMIT_CORE + rb_define_const(rb_mProcess, "RLIMIT_CORE", INT2FIX(RLIMIT_CORE)); +#endif +#ifdef RLIMIT_CPU + rb_define_const(rb_mProcess, "RLIMIT_CPU", INT2FIX(RLIMIT_CPU)); +#endif +#ifdef RLIMIT_DATA + rb_define_const(rb_mProcess, "RLIMIT_DATA", INT2FIX(RLIMIT_DATA)); +#endif +#ifdef RLIMIT_FSIZE + rb_define_const(rb_mProcess, "RLIMIT_FSIZE", INT2FIX(RLIMIT_FSIZE)); +#endif +#ifdef RLIMIT_NOFILE + rb_define_const(rb_mProcess, "RLIMIT_NOFILE", INT2FIX(RLIMIT_NOFILE)); +#endif +#ifdef RLIMIT_STACK + rb_define_const(rb_mProcess, "RLIMIT_STACK", INT2FIX(RLIMIT_STACK)); +#endif +#ifdef RLIMIT_AS + rb_define_const(rb_mProcess, "RLIMIT_AS", INT2FIX(RLIMIT_AS)); +#endif +#ifdef RLIMIT_MEMLOCK + rb_define_const(rb_mProcess, "RLIMIT_MEMLOCK", INT2FIX(RLIMIT_MEMLOCK)); +#endif +#ifdef RLIMIT_NPROC + rb_define_const(rb_mProcess, "RLIMIT_NPROC", INT2FIX(RLIMIT_NPROC)); +#endif +#ifdef RLIMIT_RSS + rb_define_const(rb_mProcess, "RLIMIT_RSS", INT2FIX(RLIMIT_RSS)); +#endif +#ifdef RLIMIT_SBSIZE + rb_define_const(rb_mProcess, "RLIMIT_SBSIZE", INT2FIX(RLIMIT_SBSIZE)); +#endif +#endif + + rb_define_module_function(rb_mProcess, "uid", proc_getuid, 0); + rb_define_module_function(rb_mProcess, "uid=", proc_setuid, 1); + rb_define_module_function(rb_mProcess, "gid", proc_getgid, 0); + rb_define_module_function(rb_mProcess, "gid=", proc_setgid, 1); + rb_define_module_function(rb_mProcess, "euid", proc_geteuid, 0); + rb_define_module_function(rb_mProcess, "euid=", proc_seteuid, 1); + rb_define_module_function(rb_mProcess, "egid", proc_getegid, 0); + rb_define_module_function(rb_mProcess, "egid=", proc_setegid, 1); + rb_define_module_function(rb_mProcess, "initgroups", proc_initgroups, 2); + rb_define_module_function(rb_mProcess, "groups", proc_getgroups, 0); + rb_define_module_function(rb_mProcess, "groups=", proc_setgroups, 1); + rb_define_module_function(rb_mProcess, "maxgroups", proc_getmaxgroups, 0); + rb_define_module_function(rb_mProcess, "maxgroups=", proc_setmaxgroups, 1); + + rb_define_module_function(rb_mProcess, "daemon", proc_daemon, -1); + + rb_define_module_function(rb_mProcess, "times", rb_proc_times, 0); + +#if defined(HAVE_TIMES) || defined(_WIN32) + rb_cProcessTms = rb_struct_define("Tms", "utime", "stime", "cutime", "cstime", NULL); +#endif + + SAVED_USER_ID = geteuid(); + SAVED_GROUP_ID = getegid(); + + rb_mProcUID = rb_define_module_under(rb_mProcess, "UID"); + rb_mProcGID = rb_define_module_under(rb_mProcess, "GID"); + + rb_define_module_function(rb_mProcUID, "rid", proc_getuid, 0); + rb_define_module_function(rb_mProcGID, "rid", proc_getgid, 0); + rb_define_module_function(rb_mProcUID, "eid", proc_geteuid, 0); + rb_define_module_function(rb_mProcGID, "eid", proc_getegid, 0); + rb_define_module_function(rb_mProcUID, "change_privilege", p_uid_change_privilege, 1); + rb_define_module_function(rb_mProcGID, "change_privilege", p_gid_change_privilege, 1); + rb_define_module_function(rb_mProcUID, "grant_privilege", p_uid_grant_privilege, 1); + rb_define_module_function(rb_mProcGID, "grant_privilege", p_gid_grant_privilege, 1); + rb_define_alias(rb_singleton_class(rb_mProcUID), "eid=", "grant_privilege"); + rb_define_alias(rb_singleton_class(rb_mProcGID), "eid=", "grant_privilege"); + rb_define_module_function(rb_mProcUID, "re_exchange", p_uid_exchange, 0); + rb_define_module_function(rb_mProcGID, "re_exchange", p_gid_exchange, 0); + rb_define_module_function(rb_mProcUID, "re_exchangeable?", p_uid_exchangeable, 0); + rb_define_module_function(rb_mProcGID, "re_exchangeable?", p_gid_exchangeable, 0); + rb_define_module_function(rb_mProcUID, "sid_available?", p_uid_have_saved_id, 0); + rb_define_module_function(rb_mProcGID, "sid_available?", p_gid_have_saved_id, 0); + rb_define_module_function(rb_mProcUID, "switch", p_uid_switch, 0); + rb_define_module_function(rb_mProcGID, "switch", p_gid_switch, 0); + + rb_mProcID_Syscall = rb_define_module_under(rb_mProcess, "Sys"); + + rb_define_module_function(rb_mProcID_Syscall, "getuid", proc_getuid, 0); + rb_define_module_function(rb_mProcID_Syscall, "geteuid", proc_geteuid, 0); + rb_define_module_function(rb_mProcID_Syscall, "getgid", proc_getgid, 0); + rb_define_module_function(rb_mProcID_Syscall, "getegid", proc_getegid, 0); + + rb_define_module_function(rb_mProcID_Syscall, "setuid", p_sys_setuid, 1); + rb_define_module_function(rb_mProcID_Syscall, "setgid", p_sys_setgid, 1); + + rb_define_module_function(rb_mProcID_Syscall, "setruid", p_sys_setruid, 1); + rb_define_module_function(rb_mProcID_Syscall, "setrgid", p_sys_setrgid, 1); + + rb_define_module_function(rb_mProcID_Syscall, "seteuid", p_sys_seteuid, 1); + rb_define_module_function(rb_mProcID_Syscall, "setegid", p_sys_setegid, 1); + + rb_define_module_function(rb_mProcID_Syscall, "setreuid", p_sys_setreuid, 2); + rb_define_module_function(rb_mProcID_Syscall, "setregid", p_sys_setregid, 2); + + rb_define_module_function(rb_mProcID_Syscall, "setresuid", p_sys_setresuid, 3); + rb_define_module_function(rb_mProcID_Syscall, "setresgid", p_sys_setresgid, 3); + rb_define_module_function(rb_mProcID_Syscall, "issetugid", p_sys_issetugid, 0); +} diff --git a/random.c b/random.c new file mode 100644 index 0000000..d833c81 --- /dev/null +++ b/random.c @@ -0,0 +1,560 @@ +/********************************************************************** + + random.c - + + $Author: yugui $ + created at: Fri Dec 24 16:39:21 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +/* +This is based on trimmed version of MT19937. To get the original version, +contact . + +The original copyright notice follows. + + A C-program for MT19937, with initialization improved 2002/2/10. + Coded by Takuji Nishimura and Makoto Matsumoto. + This is a faster version by taking Shawn Cokus's optimization, + Matthe Bellew's simplification, Isaku Wada's real version. + + Before using, initialize the state by using init_genrand(mt, seed) + or init_by_array(mt, init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp +*/ + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UMASK 0x80000000UL /* most significant w-r bits */ +#define LMASK 0x7fffffffUL /* least significant r bits */ +#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) +#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) + +struct MT { + unsigned long state[N]; /* the array for the state vector */ + unsigned long *next; + int left; +}; + +#define genrand_initialized(mt) ((mt)->next != 0) +#define uninit_genrand(mt) ((mt)->next = 0) + +/* initializes state[N] with a seed */ +static void +init_genrand(struct MT *mt, unsigned long s) +{ + int j; + mt->state[0] = s & 0xffffffffUL; + for (j=1; jstate[j] = (1812433253UL * (mt->state[j-1] ^ (mt->state[j-1] >> 30)) + j); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array state[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt->state[j] &= 0xffffffffUL; /* for >32 bit machines */ + } + mt->left = 1; + mt->next = mt->state + N - 1; +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +/* slight change for C++, 2004/2/26 */ +static void +init_by_array(struct MT *mt, unsigned long init_key[], int key_length) +{ + int i, j, k; + init_genrand(mt, 19650218UL); + i=1; j=0; + k = (N>key_length ? N : key_length); + for (; k; k--) { + mt->state[i] = (mt->state[i] ^ ((mt->state[i-1] ^ (mt->state[i-1] >> 30)) * 1664525UL)) + + init_key[j] + j; /* non linear */ + mt->state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; j++; + if (i>=N) { mt->state[0] = mt->state[N-1]; i=1; } + if (j>=key_length) j=0; + } + for (k=N-1; k; k--) { + mt->state[i] = (mt->state[i] ^ ((mt->state[i-1] ^ (mt->state[i-1] >> 30)) * 1566083941UL)) + - i; /* non linear */ + mt->state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i>=N) { mt->state[0] = mt->state[N-1]; i=1; } + } + + mt->state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ +} + +static void +next_state(struct MT *mt) +{ + unsigned long *p = mt->state; + int j; + + /* if init_genrand() has not been called, */ + /* a default initial seed is used */ + if (!genrand_initialized(mt)) init_genrand(mt, 5489UL); + + mt->left = N; + mt->next = mt->state; + + for (j=N-M+1; --j; p++) + *p = p[M] ^ TWIST(p[0], p[1]); + + for (j=M; --j; p++) + *p = p[M-N] ^ TWIST(p[0], p[1]); + + *p = p[M-N] ^ TWIST(p[0], mt->state[0]); +} + +/* generates a random number on [0,0xffffffff]-interval */ +static unsigned long +genrand_int32(struct MT *mt) +{ + unsigned long y; + + if (--mt->left <= 0) next_state(mt); + y = *mt->next++; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +static double +genrand_real(struct MT *mt) +{ + unsigned long a=genrand_int32(mt)>>5, b=genrand_int32(mt)>>6; + return(a*67108864.0+b)*(1.0/9007199254740992.0); +} +/* These real versions are due to Isaku Wada, 2002/01/09 added */ + +#undef N +#undef M + +/* These real versions are due to Isaku Wada, 2002/01/09 added */ + +#include "ruby/ruby.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif + +#define DEFAULT_SEED_CNT 4 + +struct RandSeed { + VALUE value; + unsigned long initial[DEFAULT_SEED_CNT]; +}; + +struct Random { + struct MT mt; + struct RandSeed seed; +}; + +static struct Random default_mt; + +unsigned long +rb_genrand_int32(void) +{ + return genrand_int32(&default_mt.mt); +} + +double +rb_genrand_real(void) +{ + return genrand_real(&default_mt.mt); +} + +static VALUE +rand_init(struct MT *mt, VALUE vseed) +{ + volatile VALUE seed; + long len; + unsigned long *buf; + + seed = rb_to_int(vseed); + switch (TYPE(seed)) { + case T_FIXNUM: + len = sizeof(VALUE); + break; + case T_BIGNUM: + len = RBIGNUM_LEN(seed) * SIZEOF_BDIGITS; + if (len == 0) + len = 4; + break; + default: + rb_raise(rb_eTypeError, "failed to convert %s into Integer", + rb_obj_classname(vseed)); + } + len = (len + 3) / 4; /* number of 32bit words */ + buf = ALLOC_N(unsigned long, len); /* allocate longs for init_by_array */ + memset(buf, 0, len * sizeof(long)); + if (FIXNUM_P(seed)) { + buf[0] = FIX2ULONG(seed) & 0xffffffff; +#if SIZEOF_LONG > 4 + buf[1] = FIX2ULONG(seed) >> 32; +#endif + } + else { + int i, j; + for (i = RBIGNUM_LEN(seed)-1; 0 <= i; i--) { + j = i * SIZEOF_BDIGITS / 4; +#if SIZEOF_BDIGITS < 4 + buf[j] <<= SIZEOF_BDIGITS * 8; +#endif + buf[j] |= RBIGNUM_DIGITS(seed)[i]; + } + } + while (1 < len && buf[len-1] == 0) { + len--; + } + if (len <= 1) { + init_genrand(mt, buf[0]); + } + else { + if (buf[len-1] == 1) /* remove leading-zero-guard */ + len--; + init_by_array(mt, buf, len); + } + xfree(buf); + return seed; +} + +#define DEFAULT_SEED_LEN (DEFAULT_SEED_CNT * sizeof(long)) + +static void +fill_random_seed(unsigned long seed[DEFAULT_SEED_CNT]) +{ + static int n = 0; + struct timeval tv; + int fd; + struct stat statbuf; + + memset(seed, 0, DEFAULT_SEED_LEN); + +#ifdef S_ISCHR + if ((fd = open("/dev/urandom", O_RDONLY +#ifdef O_NONBLOCK + |O_NONBLOCK +#endif +#ifdef O_NOCTTY + |O_NOCTTY +#endif +#ifdef O_NOFOLLOW + |O_NOFOLLOW +#endif + )) >= 0) { + if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) { + read(fd, seed, DEFAULT_SEED_LEN); + } + close(fd); + } +#endif + + gettimeofday(&tv, 0); + seed[0] ^= tv.tv_usec; + seed[1] ^= tv.tv_sec; + seed[2] ^= getpid() ^ (n++ << 16); + seed[3] ^= (unsigned long)&seed; +} + +static VALUE +make_seed_value(const void *ptr) +{ + BDIGIT *digits; + NEWOBJ(big, struct RBignum); + OBJSETUP(big, rb_cBignum, T_BIGNUM); + + RBIGNUM_SET_SIGN(big, 1); + rb_big_resize((VALUE)big, DEFAULT_SEED_LEN / SIZEOF_BDIGITS + 1); + digits = RBIGNUM_DIGITS(big); + + MEMCPY((char *)RBIGNUM_DIGITS(big), ptr, char, DEFAULT_SEED_LEN); + + /* set leading-zero-guard if need. */ + digits[RBIGNUM_LEN(big)-1] = digits[RBIGNUM_LEN(big)-2] <= 1 ? 1 : 0; + + return rb_big_norm((VALUE)big); +} + +static VALUE +random_seed(void) +{ + unsigned long buf[DEFAULT_SEED_CNT]; + fill_random_seed(buf); + return make_seed_value(buf); +} + +/* + * call-seq: + * srand(number=0) => old_seed + * + * Seeds the pseudorandom number generator to the value of + * number. If number is omitted + * or zero, seeds the generator using a combination of the time, the + * process id, and a sequence number. (This is also the behavior if + * Kernel::rand is called without previously calling + * srand, but without the sequence.) By setting the seed + * to a known value, scripts can be made deterministic during testing. + * The previous seed value is returned. Also see Kernel::rand. + */ + +static VALUE +rb_f_srand(int argc, VALUE *argv, VALUE obj) +{ + VALUE seed, old; + + rb_secure(4); + if (argc == 0) { + seed = random_seed(); + } + else { + rb_scan_args(argc, argv, "01", &seed); + } + old = default_mt.seed.value; + default_mt.seed.value = rand_init(&default_mt.mt, seed); + + return old; +} + +static unsigned long +make_mask(unsigned long x) +{ + x = x | x >> 1; + x = x | x >> 2; + x = x | x >> 4; + x = x | x >> 8; + x = x | x >> 16; +#if 4 < SIZEOF_LONG + x = x | x >> 32; +#endif + return x; +} + +static unsigned long +limited_rand(struct MT *mt, unsigned long limit) +{ + unsigned long mask = make_mask(limit); + int i; + unsigned long val; + + retry: + val = 0; + for (i = SIZEOF_LONG/4-1; 0 <= i; i--) { + if (mask >> (i * 32)) { + val |= genrand_int32(mt) << (i * 32); + val &= mask; + if (limit < val) + goto retry; + } + } + return val; +} + +static VALUE +limited_big_rand(struct MT *mt, struct RBignum *limit) +{ + unsigned long mask, lim, rnd; + struct RBignum *val; + int i, len, boundary; + + len = (RBIGNUM_LEN(limit) * SIZEOF_BDIGITS + 3) / 4; + val = (struct RBignum *)rb_big_clone((VALUE)limit); + RBIGNUM_SET_SIGN(val, 1); +#if SIZEOF_BDIGITS == 2 +# define BIG_GET32(big,i) \ + (RBIGNUM_DIGITS(big)[(i)*2] | \ + ((i)*2+1 < RBIGNUM_LEN(big) ? \ + (RBIGNUM_DIGITS(big)[(i)*2+1] << 16) : \ + 0)) +# define BIG_SET32(big,i,d) \ + ((RBIGNUM_DIGITS(big)[(i)*2] = (d) & 0xffff), \ + ((i)*2+1 < RBIGNUM_LEN(big) ? \ + (RBIGNUM_DIGITS(big)[(i)*2+1] = (d) >> 16) : \ + 0)) +#else + /* SIZEOF_BDIGITS == 4 */ +# define BIG_GET32(big,i) (RBIGNUM_DIGITS(big)[i]) +# define BIG_SET32(big,i,d) (RBIGNUM_DIGITS(big)[i] = (d)) +#endif + retry: + mask = 0; + boundary = 1; + for (i = len-1; 0 <= i; i--) { + lim = BIG_GET32(limit, i); + mask = mask ? 0xffffffff : make_mask(lim); + if (mask) { + rnd = genrand_int32(mt) & mask; + if (boundary) { + if (lim < rnd) + goto retry; + if (rnd < lim) + boundary = 0; + } + } + else { + rnd = 0; + } + BIG_SET32(val, i, rnd); + } + return rb_big_norm((VALUE)val); +} + +/* + * call-seq: + * rand(max=0) => number + * + * Converts max to an integer using max1 = + * max.to_i.abs. If the result is zero, returns a + * pseudorandom floating point number greater than or equal to 0.0 and + * less than 1.0. Otherwise, returns a pseudorandom integer greater + * than or equal to zero and less than max1. Kernel::srand + * may be used to ensure repeatable sequences of random numbers between + * different runs of the program. Ruby currently uses a modified + * Mersenne Twister with a period of 2**19937-1. + * + * srand 1234 #=> 0 + * [ rand, rand ] #=> [0.191519450163469, 0.49766366626136] + * [ rand(10), rand(1000) ] #=> [6, 817] + * srand 1234 #=> 1234 + * [ rand, rand ] #=> [0.191519450163469, 0.49766366626136] + */ + +static VALUE +rb_f_rand(int argc, VALUE *argv, VALUE obj) +{ + VALUE vmax; + long val, max; + struct MT *mt = &default_mt.mt; + + rb_scan_args(argc, argv, "01", &vmax); + if (!genrand_initialized(mt)) { + rand_init(mt, random_seed()); + } + switch (TYPE(vmax)) { + case T_FLOAT: + if (RFLOAT_VALUE(vmax) <= LONG_MAX && RFLOAT_VALUE(vmax) >= LONG_MIN) { + max = (long)RFLOAT_VALUE(vmax); + break; + } + if (RFLOAT_VALUE(vmax) < 0) + vmax = rb_dbl2big(-RFLOAT_VALUE(vmax)); + else + vmax = rb_dbl2big(RFLOAT_VALUE(vmax)); + /* fall through */ + case T_BIGNUM: + bignum: + { + struct RBignum *limit = (struct RBignum *)vmax; + if (!RBIGNUM_SIGN(limit)) { + limit = (struct RBignum *)rb_big_clone(vmax); + RBIGNUM_SET_SIGN(limit, 1); + } + limit = (struct RBignum *)rb_big_minus((VALUE)limit, INT2FIX(1)); + if (FIXNUM_P((VALUE)limit)) { + if (FIX2LONG((VALUE)limit) == -1) + return DBL2NUM(genrand_real(mt)); + return LONG2NUM(limited_rand(mt, FIX2LONG((VALUE)limit))); + } + return limited_big_rand(mt, limit); + } + case T_NIL: + max = 0; + break; + default: + vmax = rb_Integer(vmax); + if (TYPE(vmax) == T_BIGNUM) goto bignum; + case T_FIXNUM: + max = FIX2LONG(vmax); + break; + } + + if (max == 0) { + return DBL2NUM(genrand_real(mt)); + } + if (max < 0) max = -max; + val = limited_rand(mt, max-1); + return LONG2NUM(val); +} + +void +Init_RandomSeed(void) +{ + fill_random_seed(default_mt.seed.initial); + init_by_array(&default_mt.mt, default_mt.seed.initial, DEFAULT_SEED_CNT); +} + +static void +Init_RandomSeed2(void) +{ + default_mt.seed.value = make_seed_value(default_mt.seed.initial); + memset(default_mt.seed.initial, 0, DEFAULT_SEED_LEN); +} + +void +rb_reset_random_seed(void) +{ + uninit_genrand(&default_mt.mt); + default_mt.seed.value = INT2FIX(0); +} + +void +Init_Random(void) +{ + Init_RandomSeed2(); + rb_define_global_function("srand", rb_f_srand, -1); + rb_define_global_function("rand", rb_f_rand, -1); + rb_global_variable(&default_mt.seed.value); +} diff --git a/range.c b/range.c new file mode 100644 index 0000000..5f41894 --- /dev/null +++ b/range.c @@ -0,0 +1,964 @@ +/********************************************************************** + + range.c - + + $Author: yugui $ + created at: Thu Aug 19 17:46:47 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" + +VALUE rb_cRange; +static ID id_cmp, id_succ, id_beg, id_end, id_excl; + +extern VALUE rb_struct_init_copy(VALUE copy, VALUE s); + +#define RANGE_BEG(r) (RSTRUCT(r)->as.ary[0]) +#define RANGE_END(r) (RSTRUCT(r)->as.ary[1]) +#define RANGE_EXCL(r) (RSTRUCT(r)->as.ary[2]) + +#define EXCL(r) RTEST(RANGE_EXCL(r)) +#define SET_EXCL(r,v) (RSTRUCT(r)->as.ary[2] = (v) ? Qtrue : Qfalse) + +static VALUE +range_failed(void) +{ + rb_raise(rb_eArgError, "bad value for range"); + return Qnil; /* dummy */ +} + +static VALUE +range_check(VALUE *args) +{ + return rb_funcall(args[0], id_cmp, 1, args[1]); +} + +static void +range_init(VALUE range, VALUE beg, VALUE end, int exclude_end) +{ + VALUE args[2]; + + args[0] = beg; + args[1] = end; + + if (!FIXNUM_P(beg) || !FIXNUM_P(end)) { + VALUE v; + + v = rb_rescue(range_check, (VALUE)args, range_failed, 0); + if (NIL_P(v)) + range_failed(); + } + + SET_EXCL(range, exclude_end); + RSTRUCT(range)->as.ary[0] = beg; + RSTRUCT(range)->as.ary[1] = end; +} + +VALUE +rb_range_new(VALUE beg, VALUE end, int exclude_end) +{ + VALUE range = rb_obj_alloc(rb_cRange); + + range_init(range, beg, end, exclude_end); + return range; +} + +/* + * call-seq: + * Range.new(start, end, exclusive=false) => range + * + * Constructs a range using the given start and end. If the third + * parameter is omitted or is false, the range will include + * the end object; otherwise, it will be excluded. + */ + +static VALUE +range_initialize(int argc, VALUE *argv, VALUE range) +{ + VALUE beg, end, flags; + + rb_scan_args(argc, argv, "21", &beg, &end, &flags); + /* Ranges are immutable, so that they should be initialized only once. */ + if (RANGE_EXCL(range) != Qnil) { + rb_name_error(rb_intern("initialize"), "`initialize' called twice"); + } + range_init(range, beg, end, RTEST(flags)); + return Qnil; +} + +#define range_initialize_copy rb_struct_init_copy /* :nodoc: */ + +/* + * call-seq: + * rng.exclude_end? => true or false + * + * Returns true if rng excludes its end value. + */ + +static VALUE +range_exclude_end_p(VALUE range) +{ + return EXCL(range) ? Qtrue : Qfalse; +} + +static VALUE +recursive_equal(VALUE range, VALUE obj, int recur) +{ + if (recur) return Qtrue; /* Subtle! */ + if (!rb_equal(RANGE_BEG(range), RANGE_BEG(obj))) + return Qfalse; + if (!rb_equal(RANGE_END(range), RANGE_END(obj))) + return Qfalse; + + if (EXCL(range) != EXCL(obj)) + return Qfalse; + return Qtrue; +} + + +/* + * call-seq: + * rng == obj => true or false + * + * Returns true only if obj is a Range, has equivalent + * beginning and end items (by comparing them with ==), and has + * the same exclude_end? setting as rng. + * + * (0..2) == (0..2) #=> true + * (0..2) == Range.new(0,2) #=> true + * (0..2) == (0...2) #=> false + * + */ + +static VALUE +range_eq(VALUE range, VALUE obj) +{ + if (range == obj) + return Qtrue; + if (!rb_obj_is_kind_of(obj, rb_cRange)) + return Qfalse; + + return rb_exec_recursive_paired(recursive_equal, range, obj, obj); +} + +static int +r_lt(VALUE a, VALUE b) +{ + VALUE r = rb_funcall(a, id_cmp, 1, b); + + if (NIL_P(r)) + return Qfalse; + if (rb_cmpint(r, a, b) < 0) + return Qtrue; + return Qfalse; +} + +static int +r_le(VALUE a, VALUE b) +{ + int c; + VALUE r = rb_funcall(a, id_cmp, 1, b); + + if (NIL_P(r)) + return Qfalse; + c = rb_cmpint(r, a, b); + if (c == 0) + return INT2FIX(0); + if (c < 0) + return Qtrue; + return Qfalse; +} + + +static VALUE +recursive_eql(VALUE range, VALUE obj, int recur) +{ + if (recur) return Qtrue; /* Subtle! */ + if (!rb_eql(RANGE_BEG(range), RANGE_BEG(obj))) + return Qfalse; + if (!rb_eql(RANGE_END(range), RANGE_END(obj))) + return Qfalse; + + if (EXCL(range) != EXCL(obj)) + return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * rng.eql?(obj) => true or false + * + * Returns true only if obj is a Range, has equivalent + * beginning and end items (by comparing them with #eql?), and has the same + * #exclude_end? setting as rng. + * + * (0..2) == (0..2) #=> true + * (0..2) == Range.new(0,2) #=> true + * (0..2) == (0...2) #=> false + * + */ + +static VALUE +range_eql(VALUE range, VALUE obj) +{ + if (range == obj) + return Qtrue; + if (!rb_obj_is_kind_of(obj, rb_cRange)) + return Qfalse; + return rb_exec_recursive_paired(recursive_eql, range, obj, obj); +} + +/* + * call-seq: + * rng.hash => fixnum + * + * Generate a hash value such that two ranges with the same start and + * end points, and the same value for the "exclude end" flag, generate + * the same hash value. + */ + +static VALUE +range_hash(VALUE range) +{ + long hash = EXCL(range); + VALUE v; + + v = rb_hash(RANGE_BEG(range)); + hash ^= v << 1; + v = rb_hash(RANGE_END(range)); + hash ^= v << 9; + hash ^= EXCL(range) << 24; + + return LONG2FIX(hash); +} + +static void +range_each_func(VALUE range, VALUE (*func) (VALUE, void *), void *arg) +{ + int c; + VALUE b = RANGE_BEG(range); + VALUE e = RANGE_END(range); + VALUE v = b; + + if (EXCL(range)) { + while (r_lt(v, e)) { + (*func) (v, arg); + v = rb_funcall(v, id_succ, 0, 0); + } + } + else { + while (RTEST(c = r_le(v, e))) { + (*func) (v, arg); + if (c == INT2FIX(0)) + break; + v = rb_funcall(v, id_succ, 0, 0); + } + } +} + +static VALUE +step_i(VALUE i, void *arg) +{ + VALUE *iter = arg; + + if (FIXNUM_P(iter[0])) { + iter[0] -= INT2FIX(1) & ~FIXNUM_FLAG; + } + else { + iter[0] = rb_funcall(iter[0], '-', 1, INT2FIX(1)); + } + if (iter[0] == INT2FIX(0)) { + rb_yield(i); + iter[0] = iter[1]; + } + return Qnil; +} + +extern int ruby_float_step(VALUE from, VALUE to, VALUE step, int excl); + +/* + * call-seq: + * rng.step(n=1) {| obj | block } => rng + * + * Iterates over rng, passing each nth element to the block. If + * the range contains numbers, n is added for each iteration. Otherwise + * step invokes succ to iterate through range + * elements. The following code uses class Xs, which is defined + * in the class-level documentation. + * + * range = Xs.new(1)..Xs.new(10) + * range.step(2) {|x| puts x} + * range.step(3) {|x| puts x} + * + * produces: + * + * 1 x + * 3 xxx + * 5 xxxxx + * 7 xxxxxxx + * 9 xxxxxxxxx + * 1 x + * 4 xxxx + * 7 xxxxxxx + * 10 xxxxxxxxxx + */ + + +static VALUE +range_step(int argc, VALUE *argv, VALUE range) +{ + VALUE b, e, step, tmp; + + RETURN_ENUMERATOR(range, argc, argv); + + b = RANGE_BEG(range); + e = RANGE_END(range); + if (argc == 0) { + step = INT2FIX(1); + } + else { + rb_scan_args(argc, argv, "01", &step); + if (!rb_obj_is_kind_of(step, rb_cNumeric)) { + step = rb_to_int(step); + } + if (rb_funcall(step, '<', 1, INT2FIX(0))) { + rb_raise(rb_eArgError, "step can't be negative"); + } + else if (!rb_funcall(step, '>', 1, INT2FIX(0))) { + rb_raise(rb_eArgError, "step can't be 0"); + } + } + + if (FIXNUM_P(b) && FIXNUM_P(e) && FIXNUM_P(step)) { /* fixnums are special */ + long end = FIX2LONG(e); + long i, unit = FIX2LONG(step); + + if (!EXCL(range)) + end += 1; + i = FIX2LONG(b); + while (i < end) { + rb_yield(LONG2NUM(i)); + if (i + unit < i) break; + i += unit; + } + + } + else if (ruby_float_step(b, e, step, EXCL(range))) { + /* done */ + } + else if (rb_obj_is_kind_of(b, rb_cNumeric) || + !NIL_P(rb_check_to_integer(b, "to_int")) || + !NIL_P(rb_check_to_integer(e, "to_int"))) { + ID op = EXCL(range) ? '<' : rb_intern("<="); + + while (RTEST(rb_funcall(b, op, 1, e))) { + rb_yield(b); + b = rb_funcall(b, '+', 1, step); + } + } + else { + tmp = rb_check_string_type(b); + + if (!NIL_P(tmp)) { + VALUE args[2], iter[2]; + + b = tmp; + args[0] = e; + args[1] = EXCL(range) ? Qtrue : Qfalse; + iter[0] = INT2FIX(1); + iter[1] = step; + rb_block_call(b, rb_intern("upto"), 2, args, step_i, (VALUE)iter); + } + else { + VALUE args[2]; + + if (!rb_respond_to(b, id_succ)) { + rb_raise(rb_eTypeError, "can't iterate from %s", + rb_obj_classname(b)); + } + args[0] = INT2FIX(1); + args[1] = step; + range_each_func(range, step_i, args); + } + } + return range; +} + +static VALUE +each_i(VALUE v, void *arg) +{ + rb_yield(v); + return Qnil; +} + +/* + * call-seq: + * rng.each {| i | block } => rng + * + * Iterates over the elements rng, passing each in turn to the + * block. You can only iterate if the start object of the range + * supports the +succ+ method (which means that you can't iterate over + * ranges of +Float+ objects). + * + * (10..15).each do |n| + * print n, ' ' + * end + * + * produces: + * + * 10 11 12 13 14 15 + */ + +static VALUE +range_each(VALUE range) +{ + VALUE beg, end; + + RETURN_ENUMERATOR(range, 0, 0); + + beg = RANGE_BEG(range); + end = RANGE_END(range); + + if (!rb_respond_to(beg, id_succ)) { + rb_raise(rb_eTypeError, "can't iterate from %s", + rb_obj_classname(beg)); + } + if (FIXNUM_P(beg) && FIXNUM_P(end)) { /* fixnums are special */ + long lim = FIX2LONG(end); + long i; + + if (!EXCL(range)) + lim += 1; + for (i = FIX2LONG(beg); i < lim; i++) { + rb_yield(LONG2FIX(i)); + } + } + else if (TYPE(beg) == T_STRING) { + VALUE args[2]; + + args[0] = end; + args[1] = EXCL(range) ? Qtrue : Qfalse; + rb_block_call(beg, rb_intern("upto"), 2, args, rb_yield, 0); + } + else { + range_each_func(range, each_i, NULL); + } + return range; +} + +/* + * call-seq: + * rng.begin => obj + * + * Returns the first object in rng. + */ + +static VALUE +range_begin(VALUE range) +{ + return RANGE_BEG(range); +} + + +/* + * call-seq: + * rng.end => obj + * + * Returns the object that defines the end of rng. + * + * (1..10).end #=> 10 + * (1...10).end #=> 10 + */ + + +static VALUE +range_end(VALUE range) +{ + return RANGE_END(range); +} + + +static VALUE +first_i(VALUE i, VALUE *ary) +{ + long n = NUM2LONG(ary[0]); + + if (n <= 0) { + rb_iter_break(); + } + rb_ary_push(ary[1], i); + n--; + ary[0] = INT2NUM(n); + return Qnil; +} + +/* + * call-seq: + * rng.first => obj + * rng.first(n) => an_array + * + * Returns the first object in rng, or the first +n+ elements. + */ + +static VALUE +range_first(int argc, VALUE *argv, VALUE range) +{ + VALUE n, ary[2]; + + if (argc == 0) return RANGE_BEG(range); + + rb_scan_args(argc, argv, "1", &n); + ary[0] = n; + ary[1] = rb_ary_new2(NUM2LONG(n)); + rb_block_call(range, rb_intern("each"), 0, 0, first_i, (VALUE)ary); + + return ary[1]; +} + + +/* + * call-seq: + * rng.last => obj + * rng.last(n) => an_array + * + * Returns the last object in rng, or the last +n+ elements. + */ + +static VALUE +range_last(int argc, VALUE *argv, VALUE range) +{ + VALUE rb_ary_last(int, VALUE *, VALUE); + + if (argc == 0) return RANGE_END(range); + return rb_ary_last(argc, argv, rb_Array(range)); +} + + +/* + * call-seq: + * rng.min => obj + * rng.min {| a,b | block } => obj + * + * Returns the minimum value in rng. The second uses + * the block to compare values. Returns nil if the first + * value in range is larger than the last value. + * + */ + + +static VALUE +range_min(VALUE range) +{ + if (rb_block_given_p()) { + return rb_call_super(0, 0); + } + else { + VALUE b = RANGE_BEG(range); + VALUE e = RANGE_END(range); + int c = rb_cmpint(rb_funcall(b, id_cmp, 1, e), b, e); + + if (c > 0 || (c == 0 && EXCL(range))) + return Qnil; + return b; + } +} + +/* + * call-seq: + * rng.max => obj + * rng.max {| a,b | block } => obj + * + * Returns the maximum value in rng. The second uses + * the block to compare values. Returns nil if the first + * value in range is larger than the last value. + * + */ + + +static VALUE +range_max(VALUE range) +{ + VALUE e = RANGE_END(range); + int ip = FIXNUM_P(e) || rb_obj_is_kind_of(e, rb_cInteger); + + if (rb_block_given_p() || (EXCL(range) && !ip)) { + return rb_call_super(0, 0); + } + else { + VALUE b = RANGE_BEG(range); + int c = rb_cmpint(rb_funcall(b, id_cmp, 1, e), b, e); + + if (c > 0) + return Qnil; + if (EXCL(range)) { + if (c == 0) return Qnil; + if (FIXNUM_P(e)) { + return LONG2NUM(FIX2LONG(e) - 1); + } + return rb_funcall(e, '-', 1, INT2FIX(1)); + } + return e; + } +} + +VALUE +rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err) +{ + VALUE b, e; + long beg, end, excl; + + if (rb_obj_is_kind_of(range, rb_cRange)) { + b = RANGE_BEG(range); + e = RANGE_END(range); + excl = EXCL(range); + } + else { + if (!rb_respond_to(range, id_beg)) return Qfalse; + if (!rb_respond_to(range, id_end)) return Qfalse; + b = rb_funcall(range, id_beg, 0); + e = rb_funcall(range, id_end, 0); + excl = RTEST(rb_funcall(range, rb_intern("exclude_end?"), 0)); + } + beg = NUM2LONG(b); + end = NUM2LONG(e); + + if (beg < 0) { + beg += len; + if (beg < 0) + goto out_of_range; + } + if (err == 0 || err == 2) { + if (beg > len) + goto out_of_range; + if (end > len) + end = len; + } + if (end < 0) + end += len; + if (!excl) + end++; /* include end point */ + len = end - beg; + if (len < 0) + len = 0; + + *begp = beg; + *lenp = len; + return Qtrue; + + out_of_range: + if (err) { + rb_raise(rb_eRangeError, "%ld..%s%ld out of range", + NUM2LONG(b), excl ? "." : "", NUM2LONG(e)); + } + return Qnil; +} + +/* + * call-seq: + * rng.to_s => string + * + * Convert this range object to a printable form. + */ + +static VALUE +range_to_s(VALUE range) +{ + VALUE str, str2; + + str = rb_obj_as_string(RANGE_BEG(range)); + str2 = rb_obj_as_string(RANGE_END(range)); + str = rb_str_dup(str); + rb_str_cat(str, "...", EXCL(range) ? 3 : 2); + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + + return str; +} + +static VALUE +inspect_range(VALUE range, VALUE dummy, int recur) +{ + VALUE str, str2; + + if (recur) { + return rb_str_new2(EXCL(range) ? "(... ... ...)" : "(... .. ...)"); + } + str = rb_inspect(RANGE_BEG(range)); + str2 = rb_inspect(RANGE_END(range)); + str = rb_str_dup(str); + rb_str_cat(str, "...", EXCL(range) ? 3 : 2); + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + + return str; +} + +/* + * call-seq: + * rng.inspect => string + * + * Convert this range object to a printable form (using + * inspect to convert the start and end + * objects). + */ + + +static VALUE +range_inspect(VALUE range) +{ + return rb_exec_recursive(inspect_range, range, 0); +} + +/* + * call-seq: + * rng === obj => true or false + * + * Returns true if obj is an element of + * rng, false otherwise. Conveniently, + * === is the comparison operator used by + * case statements. + * + * case 79 + * when 1..50 then print "low\n" + * when 51..75 then print "medium\n" + * when 76..100 then print "high\n" + * end + * + * produces: + * + * high + */ + +static VALUE +range_eqq(VALUE range, VALUE val) +{ + return rb_funcall(range, rb_intern("include?"), 1, val); +} + + +/* + * call-seq: + * rng.member?(val) => true or false + * rng.include?(val) => true or false + * + * Returns true if obj is an element of + * rng, false otherwise. If beg and end are + * numeric, comparison is done according magnitude of values. + * + * ("a".."z").include?("g") # => true + * ("a".."z").include?("A") # => false + */ + +static VALUE +range_include(VALUE range, VALUE val) +{ + VALUE beg = RANGE_BEG(range); + VALUE end = RANGE_END(range); + int nv = FIXNUM_P(beg) || FIXNUM_P(end) || + rb_obj_is_kind_of(beg, rb_cNumeric) || + rb_obj_is_kind_of(end, rb_cNumeric); + + if (nv || + !NIL_P(rb_check_to_integer(beg, "to_int")) || + !NIL_P(rb_check_to_integer(end, "to_int"))) { + if (r_le(beg, val)) { + if (EXCL(range)) { + if (r_lt(val, end)) + return Qtrue; + } + else { + if (r_le(val, end)) + return Qtrue; + } + } + return Qfalse; + } + else if (TYPE(beg) == T_STRING && TYPE(end) == T_STRING && + RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) { + if (NIL_P(val)) return Qfalse; + if (TYPE(val) == T_STRING) { + if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1) + return Qfalse; + else { + char b = RSTRING_PTR(beg)[0]; + char e = RSTRING_PTR(end)[0]; + char v = RSTRING_PTR(val)[0]; + + if (ISASCII(b) && ISASCII(e) && ISASCII(v)) { + if (b <= v && v < e) return Qtrue; + if (!EXCL(range) && v == e) return Qtrue; + return Qfalse; + } + } + } + } + /* TODO: ruby_frame->this_func = rb_intern("include?"); */ + return rb_call_super(1, &val); +} + + +/* + * call-seq: + * rng.cover?(val) => true or false + * + * Returns true if obj is between beg and end, + * i.e beg <= obj <= end (or end exclusive when + * exclude_end? is true). + * + * ("a".."z").cover?("c") #=> true + * ("a".."z").cover?("5") #=> false + */ + +static VALUE +range_cover(VALUE range, VALUE val) +{ + VALUE beg, end; + + beg = RANGE_BEG(range); + end = RANGE_END(range); + if (r_le(beg, val)) { + if (EXCL(range)) { + if (r_lt(val, end)) + return Qtrue; + } + else { + if (r_le(val, end)) + return Qtrue; + } + } + return Qfalse; +} + +static VALUE +range_dumper(VALUE range) +{ + VALUE v; + NEWOBJ(m, struct RObject); + OBJSETUP(m, rb_cObject, T_OBJECT); + + v = (VALUE)m; + + rb_ivar_set(v, id_excl, RANGE_EXCL(range)); + rb_ivar_set(v, id_beg, RANGE_BEG(range)); + rb_ivar_set(v, id_end, RANGE_END(range)); + return v; +} + +static VALUE +range_loader(VALUE range, VALUE obj) +{ + if (TYPE(obj) != T_OBJECT || RBASIC(obj)->klass != rb_cObject) { + rb_raise(rb_eTypeError, "not a dumped range object"); + } + + RSTRUCT(range)->as.ary[0] = rb_ivar_get(obj, id_beg); + RSTRUCT(range)->as.ary[1] = rb_ivar_get(obj, id_end); + RSTRUCT(range)->as.ary[2] = rb_ivar_get(obj, id_excl); + return range; +} + +static VALUE +range_alloc(VALUE klass) +{ + /* rb_struct_alloc_noinit itself should not be used because + * rb_marshal_define_compat uses equality of allocaiton function */ + return rb_struct_alloc_noinit(klass); +} + +/* A Range represents an interval---a set of values with a + * start and an end. Ranges may be constructed using the + * s..e and + * s...e literals, or with + * Range::new. Ranges constructed using .. + * run from the start to the end inclusively. Those created using + * ... exclude the end value. When used as an iterator, + * ranges return each value in the sequence. + * + * (-1..-5).to_a #=> [] + * (-5..-1).to_a #=> [-5, -4, -3, -2, -1] + * ('a'..'e').to_a #=> ["a", "b", "c", "d", "e"] + * ('a'...'e').to_a #=> ["a", "b", "c", "d"] + * + * Ranges can be constructed using objects of any type, as long as the + * objects can be compared using their <=> operator and + * they support the succ method to return the next object + * in sequence. + * + * class Xs # represent a string of 'x's + * include Comparable + * attr :length + * def initialize(n) + * @length = n + * end + * def succ + * Xs.new(@length + 1) + * end + * def <=>(other) + * @length <=> other.length + * end + * def to_s + * sprintf "%2d #{inspect}", @length + * end + * def inspect + * 'x' * @length + * end + * end + * + * r = Xs.new(3)..Xs.new(6) #=> xxx..xxxxxx + * r.to_a #=> [xxx, xxxx, xxxxx, xxxxxx] + * r.member?(Xs.new(5)) #=> true + * + * In the previous code example, class Xs includes the + * Comparable module. This is because + * Enumerable#member? checks for equality using + * ==. Including Comparable ensures that the + * == method is defined in terms of the <=> + * method implemented in Xs. + * + */ + +void +Init_Range(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + id_cmp = rb_intern("<=>"); + id_succ = rb_intern("succ"); + id_beg = rb_intern("begin"); + id_end = rb_intern("end"); + id_excl = rb_intern("excl"); + + rb_cRange = rb_struct_define_without_accessor( + "Range", rb_cObject, range_alloc, + "begin", "end", "excl", NULL); + + rb_include_module(rb_cRange, rb_mEnumerable); + rb_marshal_define_compat(rb_cRange, rb_cObject, range_dumper, range_loader); + rb_define_method(rb_cRange, "initialize", range_initialize, -1); + rb_define_method(rb_cRange, "initialize_copy", range_initialize_copy, 1); + rb_define_method(rb_cRange, "==", range_eq, 1); + rb_define_method(rb_cRange, "===", range_eqq, 1); + rb_define_method(rb_cRange, "eql?", range_eql, 1); + rb_define_method(rb_cRange, "hash", range_hash, 0); + rb_define_method(rb_cRange, "each", range_each, 0); + rb_define_method(rb_cRange, "step", range_step, -1); + rb_define_method(rb_cRange, "begin", range_begin, 0); + rb_define_method(rb_cRange, "end", range_end, 0); + rb_define_method(rb_cRange, "first", range_first, -1); + rb_define_method(rb_cRange, "last", range_last, -1); + rb_define_method(rb_cRange, "min", range_min, 0); + rb_define_method(rb_cRange, "max", range_max, 0); + rb_define_method(rb_cRange, "to_s", range_to_s, 0); + rb_define_method(rb_cRange, "inspect", range_inspect, 0); + + rb_define_method(rb_cRange, "exclude_end?", range_exclude_end_p, 0); + + rb_define_method(rb_cRange, "member?", range_include, 1); + rb_define_method(rb_cRange, "include?", range_include, 1); + rb_define_method(rb_cRange, "cover?", range_cover, 1); +} diff --git a/rational.c b/rational.c new file mode 100644 index 0000000..70f7f16 --- /dev/null +++ b/rational.c @@ -0,0 +1,1668 @@ +/* + rational.c: Coded by Tadayoshi Funaba 2008 + + This implementation is based on Keiju Ishitsuka's Rational library + which is written in ruby. +*/ + +#include "ruby.h" +#include +#include + +#ifdef HAVE_IEEEFP_H +#include +#endif + +#define NDEBUG +#include + +#define ZERO INT2FIX(0) +#define ONE INT2FIX(1) +#define TWO INT2FIX(2) + +VALUE rb_cRational; + +static ID id_abs, id_cmp, id_convert, id_equal_p, id_expt, id_floor, + id_hash, id_idiv, id_inspect, id_integer_p, id_negate, id_to_f, + id_to_i, id_to_s, id_truncate; + +#define f_boolcast(x) ((x) ? Qtrue : Qfalse) + +#define binop(n,op) \ +inline static VALUE \ +f_##n(VALUE x, VALUE y)\ +{\ + return rb_funcall(x, op, 1, y);\ +} + +#define fun1(n) \ +inline static VALUE \ +f_##n(VALUE x)\ +{\ + return rb_funcall(x, id_##n, 0);\ +} + +#define fun2(n) \ +inline static VALUE \ +f_##n(VALUE x, VALUE y)\ +{\ + return rb_funcall(x, id_##n, 1, y);\ +} + +inline static VALUE +f_add(VALUE x, VALUE y) +{ + if (FIXNUM_P(y) && FIX2LONG(y) == 0) + return x; + else if (FIXNUM_P(x) && FIX2LONG(x) == 0) + return y; + return rb_funcall(x, '+', 1, y); +} + +inline static VALUE +f_cmp(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) { + long c = FIX2LONG(x) - FIX2LONG(y); + if (c > 0) + c = 1; + else if (c < 0) + c = -1; + return INT2FIX(c); + } + return rb_funcall(x, id_cmp, 1, y); +} + +inline static VALUE +f_div(VALUE x, VALUE y) +{ + if (FIXNUM_P(y) && FIX2LONG(y) == 1) + return x; + return rb_funcall(x, '/', 1, y); +} + +inline static VALUE +f_gt_p(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) + return f_boolcast(FIX2LONG(x) > FIX2LONG(y)); + return rb_funcall(x, '>', 1, y); +} + +inline static VALUE +f_lt_p(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) + return f_boolcast(FIX2LONG(x) < FIX2LONG(y)); + return rb_funcall(x, '<', 1, y); +} + +binop(mod, '%') + +inline static VALUE +f_mul(VALUE x, VALUE y) +{ + if (FIXNUM_P(y)) { + long iy = FIX2LONG(y); + if (iy == 0) { + if (FIXNUM_P(x) || TYPE(x) == T_BIGNUM) + return ZERO; + } + else if (iy == 1) + return x; + } + else if (FIXNUM_P(x)) { + long ix = FIX2LONG(x); + if (ix == 0) { + if (FIXNUM_P(y) || TYPE(y) == T_BIGNUM) + return ZERO; + } + else if (ix == 1) + return y; + } + return rb_funcall(x, '*', 1, y); +} + +inline static VALUE +f_sub(VALUE x, VALUE y) +{ + if (FIXNUM_P(y) && FIX2LONG(y) == 0) + return x; + return rb_funcall(x, '-', 1, y); +} + +binop(xor, '^') + +fun1(abs) +fun1(floor) +fun1(hash) +fun1(inspect) +fun1(integer_p) +fun1(negate) +fun1(to_f) +fun1(to_i) +fun1(to_s) +fun1(truncate) + +inline static VALUE +f_equal_p(VALUE x, VALUE y) +{ + if (FIXNUM_P(x) && FIXNUM_P(y)) + return f_boolcast(FIX2LONG(x) == FIX2LONG(y)); + return rb_funcall(x, id_equal_p, 1, y); +} + +fun2(expt) +fun2(idiv) + +inline static VALUE +f_negative_p(VALUE x) +{ + if (FIXNUM_P(x)) + return f_boolcast(FIX2LONG(x) < 0); + return rb_funcall(x, '<', 1, ZERO); +} + +#define f_positive_p(x) (!f_negative_p(x)) + +inline static VALUE +f_zero_p(VALUE x) +{ + if (FIXNUM_P(x)) + return f_boolcast(FIX2LONG(x) == 0); + return rb_funcall(x, id_equal_p, 1, ZERO); +} + +#define f_nonzero_p(x) (!f_zero_p(x)) + +inline static VALUE +f_one_p(VALUE x) +{ + if (FIXNUM_P(x)) + return f_boolcast(FIX2LONG(x) == 1); + return rb_funcall(x, id_equal_p, 1, ONE); +} + +inline static VALUE +f_kind_of_p(VALUE x, VALUE c) +{ + return rb_obj_is_kind_of(x, c); +} + +inline static VALUE +k_numeric_p(VALUE x) +{ + return f_kind_of_p(x, rb_cNumeric); +} + +inline static VALUE +k_integer_p(VALUE x) +{ + return f_kind_of_p(x, rb_cInteger); +} + +inline static VALUE +k_float_p(VALUE x) +{ + return f_kind_of_p(x, rb_cFloat); +} + +inline static VALUE +k_rational_p(VALUE x) +{ + return f_kind_of_p(x, rb_cRational); +} + +#define k_exact_p(x) (!k_float_p(x)) +#define k_inexact_p(x) k_float_p(x) + +#ifndef NDEBUG +#define f_gcd f_gcd_orig +#endif + +inline static long +i_gcd(long x, long y) +{ + if (x < 0) + x = -x; + if (y < 0) + y = -y; + + if (x == 0) + return y; + if (y == 0) + return x; + + while (x > 0) { + long t = x; + x = y % x; + y = t; + } + return y; +} + +inline static VALUE +f_gcd(VALUE x, VALUE y) +{ + VALUE z; + + if (FIXNUM_P(x) && FIXNUM_P(y)) + return LONG2NUM(i_gcd(FIX2LONG(x), FIX2LONG(y))); + + if (f_negative_p(x)) + x = f_negate(x); + if (f_negative_p(y)) + y = f_negate(y); + + if (f_zero_p(x)) + return y; + if (f_zero_p(y)) + return x; + + for (;;) { + if (FIXNUM_P(x)) { + if (FIX2LONG(x) == 0) + return y; + if (FIXNUM_P(y)) + return LONG2NUM(i_gcd(FIX2LONG(x), FIX2LONG(y))); + } + z = x; + x = f_mod(y, x); + y = z; + } + /* NOTREACHED */ +} + +#ifndef NDEBUG +#undef f_gcd + +inline static VALUE +f_gcd(VALUE x, VALUE y) +{ + VALUE r = f_gcd_orig(x, y); + if (f_nonzero_p(r)) { + assert(f_zero_p(f_mod(x, r))); + assert(f_zero_p(f_mod(y, r))); + } + return r; +} +#endif + +inline static VALUE +f_lcm(VALUE x, VALUE y) +{ + if (f_zero_p(x) || f_zero_p(y)) + return ZERO; + return f_abs(f_mul(f_div(x, f_gcd(x, y)), y)); +} + +#define get_dat1(x) \ + struct RRational *dat;\ + dat = ((struct RRational *)(x)) + +#define get_dat2(x,y) \ + struct RRational *adat, *bdat;\ + adat = ((struct RRational *)(x));\ + bdat = ((struct RRational *)(y)) + +inline static VALUE +nurat_s_new_internal(VALUE klass, VALUE num, VALUE den) +{ + NEWOBJ(obj, struct RRational); + OBJSETUP(obj, klass, T_RATIONAL); + + obj->num = num; + obj->den = den; + + return (VALUE)obj; +} + +static VALUE +nurat_s_alloc(VALUE klass) +{ + return nurat_s_new_internal(klass, ZERO, ONE); +} + +#define rb_raise_zerodiv() rb_raise(rb_eZeroDivError, "divided by zero") + +#if 0 +static VALUE +nurat_s_new_bang(int argc, VALUE *argv, VALUE klass) +{ + VALUE num, den; + + switch (rb_scan_args(argc, argv, "11", &num, &den)) { + case 1: + if (!k_integer_p(num)) + num = f_to_i(num); + den = ONE; + break; + default: + if (!k_integer_p(num)) + num = f_to_i(num); + if (!k_integer_p(den)) + den = f_to_i(den); + + switch (FIX2INT(f_cmp(den, ZERO))) { + case -1: + num = f_negate(num); + den = f_negate(den); + break; + case 0: + rb_raise_zerodiv(); + break; + } + break; + } + + return nurat_s_new_internal(klass, num, den); +} +#endif + +inline static VALUE +f_rational_new_bang1(VALUE klass, VALUE x) +{ + return nurat_s_new_internal(klass, x, ONE); +} + +inline static VALUE +f_rational_new_bang2(VALUE klass, VALUE x, VALUE y) +{ + assert(f_positive_p(y)); + assert(f_nonzero_p(y)); + return nurat_s_new_internal(klass, x, y); +} + +#ifdef CANONICALIZATION_FOR_MATHN +#define CANON +#endif + +#ifdef CANON +static int canonicalization = 0; + +void +nurat_canonicalization(int f) +{ + canonicalization = f; +} +#endif + +inline static void +nurat_int_check(VALUE num) +{ + switch (TYPE(num)) { + case T_FIXNUM: + case T_BIGNUM: + break; + default: + if (!k_numeric_p(num) || !f_integer_p(num)) + rb_raise(rb_eArgError, "not an integer"); + } +} + +inline static VALUE +nurat_int_value(VALUE num) +{ + nurat_int_check(num); + if (!k_integer_p(num)) + num = f_to_i(num); + return num; +} + +inline static VALUE +nurat_s_canonicalize_internal(VALUE klass, VALUE num, VALUE den) +{ + VALUE gcd; + + switch (FIX2INT(f_cmp(den, ZERO))) { + case -1: + num = f_negate(num); + den = f_negate(den); + break; + case 0: + rb_raise_zerodiv(); + break; + } + + gcd = f_gcd(num, den); + num = f_idiv(num, gcd); + den = f_idiv(den, gcd); + +#ifdef CANON + if (f_one_p(den) && canonicalization) + return num; +#endif + return nurat_s_new_internal(klass, num, den); +} + +inline static VALUE +nurat_s_canonicalize_internal_no_reduce(VALUE klass, VALUE num, VALUE den) +{ + switch (FIX2INT(f_cmp(den, ZERO))) { + case -1: + num = f_negate(num); + den = f_negate(den); + break; + case 0: + rb_raise_zerodiv(); + break; + } + +#ifdef CANON + if (f_one_p(den) && canonicalization) + return num; +#endif + return nurat_s_new_internal(klass, num, den); +} + +static VALUE +nurat_s_new(int argc, VALUE *argv, VALUE klass) +{ + VALUE num, den; + + switch (rb_scan_args(argc, argv, "11", &num, &den)) { + case 1: + num = nurat_int_value(num); + den = ONE; + break; + default: + num = nurat_int_value(num); + den = nurat_int_value(den); + break; + } + + return nurat_s_canonicalize_internal(klass, num, den); +} + +inline static VALUE +f_rational_new1(VALUE klass, VALUE x) +{ + assert(!k_rational_p(x)); + return nurat_s_canonicalize_internal(klass, x, ONE); +} + +inline static VALUE +f_rational_new2(VALUE klass, VALUE x, VALUE y) +{ + assert(!k_rational_p(x)); + assert(!k_rational_p(y)); + return nurat_s_canonicalize_internal(klass, x, y); +} + +inline static VALUE +f_rational_new_no_reduce1(VALUE klass, VALUE x) +{ + assert(!k_rational_p(x)); + return nurat_s_canonicalize_internal_no_reduce(klass, x, ONE); +} + +inline static VALUE +f_rational_new_no_reduce2(VALUE klass, VALUE x, VALUE y) +{ + assert(!k_rational_p(x)); + assert(!k_rational_p(y)); + return nurat_s_canonicalize_internal_no_reduce(klass, x, y); +} + +static VALUE +nurat_f_rational(int argc, VALUE *argv, VALUE klass) +{ + return rb_funcall2(rb_cRational, id_convert, argc, argv); +} + +static VALUE +nurat_numerator(VALUE self) +{ + get_dat1(self); + return dat->num; +} + +static VALUE +nurat_denominator(VALUE self) +{ + get_dat1(self); + return dat->den; +} + +#ifndef NDEBUG +#define f_imul f_imul_orig +#endif + +inline static VALUE +f_imul(long a, long b) +{ + VALUE r; + long c; + + if (a == 0 || b == 0) + return ZERO; + else if (a == 1) + return LONG2NUM(b); + else if (b == 1) + return LONG2NUM(a); + + c = a * b; + r = LONG2NUM(c); + if (NUM2LONG(r) != c || (c / a) != b) + r = rb_big_mul(rb_int2big(a), rb_int2big(b)); + return r; +} + +#ifndef NDEBUG +#undef f_imul + +inline static VALUE +f_imul(long x, long y) +{ + VALUE r = f_imul_orig(x, y); + assert(f_equal_p(r, f_mul(LONG2NUM(x), LONG2NUM(y)))); + return r; +} +#endif + +inline static VALUE +f_addsub(VALUE self, VALUE anum, VALUE aden, VALUE bnum, VALUE bden, int k) +{ + VALUE num, den; + + if (FIXNUM_P(anum) && FIXNUM_P(aden) && + FIXNUM_P(bnum) && FIXNUM_P(bden)) { + long an = FIX2LONG(anum); + long ad = FIX2LONG(aden); + long bn = FIX2LONG(bnum); + long bd = FIX2LONG(bden); + long ig = i_gcd(ad, bd); + + VALUE g = LONG2NUM(ig); + VALUE a = f_imul(an, bd / ig); + VALUE b = f_imul(bn, ad / ig); + VALUE c; + + if (k == '+') + c = f_add(a, b); + else + c = f_sub(a, b); + + b = f_idiv(aden, g); + g = f_gcd(c, g); + num = f_idiv(c, g); + a = f_idiv(bden, g); + den = f_mul(a, b); + } + else { + VALUE g = f_gcd(aden, bden); + VALUE a = f_mul(anum, f_idiv(bden, g)); + VALUE b = f_mul(bnum, f_idiv(aden, g)); + VALUE c; + + if (k == '+') + c = f_add(a, b); + else + c = f_sub(a, b); + + b = f_idiv(aden, g); + g = f_gcd(c, g); + num = f_idiv(c, g); + a = f_idiv(bden, g); + den = f_mul(a, b); + } + return f_rational_new_no_reduce2(CLASS_OF(self), num, den); +} + +static VALUE +nurat_add(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + { + get_dat1(self); + + return f_addsub(self, + dat->num, dat->den, + other, ONE, '+'); + } + case T_FLOAT: + return f_add(f_to_f(self), other); + case T_RATIONAL: + { + get_dat2(self, other); + + return f_addsub(self, + adat->num, adat->den, + bdat->num, bdat->den, '+'); + } + default: + return rb_num_coerce_bin(self, other, '+'); + } +} + +static VALUE +nurat_sub(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + { + get_dat1(self); + + return f_addsub(self, + dat->num, dat->den, + other, ONE, '-'); + } + case T_FLOAT: + return f_sub(f_to_f(self), other); + case T_RATIONAL: + { + get_dat2(self, other); + + return f_addsub(self, + adat->num, adat->den, + bdat->num, bdat->den, '-'); + } + default: + return rb_num_coerce_bin(self, other, '-'); + } +} + +inline static VALUE +f_muldiv(VALUE self, VALUE anum, VALUE aden, VALUE bnum, VALUE bden, int k) +{ + VALUE num, den; + + if (k == '/') { + VALUE t; + + if (f_negative_p(bnum)) { + anum = f_negate(anum); + bnum = f_negate(bnum); + } + t = bnum; + bnum = bden; + bden = t; + } + + if (FIXNUM_P(anum) && FIXNUM_P(aden) && + FIXNUM_P(bnum) && FIXNUM_P(bden)) { + long an = FIX2LONG(anum); + long ad = FIX2LONG(aden); + long bn = FIX2LONG(bnum); + long bd = FIX2LONG(bden); + long g1 = i_gcd(an, bd); + long g2 = i_gcd(ad, bn); + + num = f_imul(an / g1, bn / g2); + den = f_imul(ad / g2, bd / g1); + } + else { + VALUE g1 = f_gcd(anum, bden); + VALUE g2 = f_gcd(aden, bnum); + + num = f_mul(f_idiv(anum, g1), f_idiv(bnum, g2)); + den = f_mul(f_idiv(aden, g2), f_idiv(bden, g1)); + } + return f_rational_new_no_reduce2(CLASS_OF(self), num, den); +} + +static VALUE +nurat_mul(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + { + get_dat1(self); + + return f_muldiv(self, + dat->num, dat->den, + other, ONE, '*'); + } + case T_FLOAT: + return f_mul(f_to_f(self), other); + case T_RATIONAL: + { + get_dat2(self, other); + + return f_muldiv(self, + adat->num, adat->den, + bdat->num, bdat->den, '*'); + } + default: + return rb_num_coerce_bin(self, other, '*'); + } +} + +static VALUE +nurat_div(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + if (f_zero_p(other)) + rb_raise_zerodiv(); + { + get_dat1(self); + + return f_muldiv(self, + dat->num, dat->den, + other, ONE, '/'); + } + case T_FLOAT: + return rb_funcall(f_to_f(self), '/', 1, other); + case T_RATIONAL: + if (f_zero_p(other)) + rb_raise_zerodiv(); + { + get_dat2(self, other); + + return f_muldiv(self, + adat->num, adat->den, + bdat->num, bdat->den, '/'); + } + default: + return rb_num_coerce_bin(self, other, '/'); + } +} + +static VALUE +nurat_fdiv(VALUE self, VALUE other) +{ + return f_div(f_to_f(self), other); +} + +static VALUE +nurat_expt(VALUE self, VALUE other) +{ + if (k_exact_p(other) && f_zero_p(other)) + return f_rational_new_bang1(CLASS_OF(self), ONE); + + if (k_rational_p(other)) { + get_dat1(other); + + if (f_one_p(dat->den)) + other = dat->num; /* good? */ + } + + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + { + VALUE num, den; + + get_dat1(self); + + switch (FIX2INT(f_cmp(other, ZERO))) { + case 1: + num = f_expt(dat->num, other); + den = f_expt(dat->den, other); + break; + case -1: + num = f_expt(dat->den, f_negate(other)); + den = f_expt(dat->num, f_negate(other)); + break; + default: + num = ONE; + den = ONE; + break; + } + return f_rational_new2(CLASS_OF(self), num, den); + } + case T_FLOAT: + case T_RATIONAL: + return f_expt(f_to_f(self), other); + default: + return rb_num_coerce_bin(self, other, id_expt); + } +} + +static VALUE +nurat_cmp(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + { + get_dat1(self); + + if (FIXNUM_P(dat->den) && FIX2LONG(dat->den) == 1) + return f_cmp(dat->num, other); + return f_cmp(self, f_rational_new_bang1(CLASS_OF(self), other)); + } + case T_FLOAT: + return f_cmp(f_to_f(self), other); + case T_RATIONAL: + { + VALUE num1, num2; + + get_dat2(self, other); + + if (FIXNUM_P(adat->num) && FIXNUM_P(adat->den) && + FIXNUM_P(bdat->num) && FIXNUM_P(bdat->den)) { + num1 = f_imul(FIX2LONG(adat->num), FIX2LONG(bdat->den)); + num2 = f_imul(FIX2LONG(bdat->num), FIX2LONG(adat->den)); + } + else { + num1 = f_mul(adat->num, bdat->den); + num2 = f_mul(bdat->num, adat->den); + } + return f_cmp(f_sub(num1, num2), ZERO); + } + default: + return rb_num_coerce_bin(self, other, id_cmp); + } +} + +static VALUE +nurat_equal_p(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + { + get_dat1(self); + + if (f_zero_p(dat->num) && f_zero_p(other)) + return Qtrue; + + if (!FIXNUM_P(dat->den)) + return Qfalse; + if (FIX2LONG(dat->den) != 1) + return Qfalse; + if (f_equal_p(dat->num, other)) + return Qtrue; + return Qfalse; + } + case T_FLOAT: + return f_equal_p(f_to_f(self), other); + case T_RATIONAL: + { + get_dat2(self, other); + + if (f_zero_p(adat->num) && f_zero_p(bdat->num)) + return Qtrue; + + return f_boolcast(f_equal_p(adat->num, bdat->num) && + f_equal_p(adat->den, bdat->den)); + } + default: + return f_equal_p(other, self); + } +} + +static VALUE +nurat_coerce(VALUE self, VALUE other) +{ + switch (TYPE(other)) { + case T_FIXNUM: + case T_BIGNUM: + return rb_assoc_new(f_rational_new_bang1(CLASS_OF(self), other), self); + case T_FLOAT: + return rb_assoc_new(other, f_to_f(self)); + case T_RATIONAL: + return rb_assoc_new(other, self); + case T_COMPLEX: + if (k_exact_p(RCOMPLEX(other)->imag) && f_zero_p(RCOMPLEX(other)->imag)) + return rb_assoc_new(f_rational_new_bang1 + (CLASS_OF(self), RCOMPLEX(other)->real), self); + } + + rb_raise(rb_eTypeError, "%s can't be coerced into %s", + rb_obj_classname(other), rb_obj_classname(self)); + return Qnil; +} + +static VALUE +nurat_idiv(VALUE self, VALUE other) +{ + return f_floor(f_div(self, other)); +} + +static VALUE +nurat_mod(VALUE self, VALUE other) +{ + VALUE val = f_floor(f_div(self, other)); + return f_sub(self, f_mul(other, val)); +} + +static VALUE +nurat_divmod(VALUE self, VALUE other) +{ + VALUE val = f_floor(f_div(self, other)); + return rb_assoc_new(val, f_sub(self, f_mul(other, val))); +} + +#if 0 +static VALUE +nurat_quot(VALUE self, VALUE other) +{ + return f_truncate(f_div(self, other)); +} +#endif + +static VALUE +nurat_rem(VALUE self, VALUE other) +{ + VALUE val = f_truncate(f_div(self, other)); + return f_sub(self, f_mul(other, val)); +} + +#if 0 +static VALUE +nurat_quotrem(VALUE self, VALUE other) +{ + VALUE val = f_truncate(f_div(self, other)); + return rb_assoc_new(val, f_sub(self, f_mul(other, val))); +} +#endif + +static VALUE +nurat_abs(VALUE self) +{ + if (f_positive_p(self)) + return self; + return f_negate(self); +} + +#if 0 +static VALUE +nurat_true(VALUE self) +{ + return Qtrue; +} +#endif + +static VALUE +nurat_floor(VALUE self) +{ + get_dat1(self); + return f_idiv(dat->num, dat->den); +} + +static VALUE +nurat_ceil(VALUE self) +{ + get_dat1(self); + return f_negate(f_idiv(f_negate(dat->num), dat->den)); +} + +static VALUE +nurat_truncate(VALUE self) +{ + get_dat1(self); + if (f_negative_p(dat->num)) + return f_negate(f_idiv(f_negate(dat->num), dat->den)); + return f_idiv(dat->num, dat->den); +} + +static VALUE +nurat_round(VALUE self) +{ + get_dat1(self); + + if (f_negative_p(dat->num)) { + VALUE num, den; + + num = f_negate(dat->num); + num = f_add(f_mul(num, TWO), dat->den); + den = f_mul(dat->den, TWO); + return f_negate(f_idiv(num, den)); + } + else { + VALUE num = f_add(f_mul(dat->num, TWO), dat->den); + VALUE den = f_mul(dat->den, TWO); + return f_idiv(num, den); + } +} + +#define f_size(x) rb_funcall(x, rb_intern("size"), 0) +#define f_rshift(x,y) rb_funcall(x, rb_intern(">>"), 1, y) + +inline static long +i_ilog2(VALUE x) +{ + long q, r, fx; + + assert(!f_lt_p(x, ONE)); + + q = (NUM2LONG(f_size(x)) - sizeof(long)) * 8 + 1; + + if (q > 0) + x = f_rshift(x, LONG2NUM(q)); + + fx = NUM2LONG(x); + + r = -1; + while (fx) { + fx >>= 1; + r += 1; + } + + return q + r; +} + +static long ml; + +static VALUE +nurat_to_f(VALUE self) +{ + VALUE num, den; + int minus = 0; + long nl, dl, ne, de; + int e; + double f; + + { + get_dat1(self); + + if (f_zero_p(dat->num)) + return rb_float_new(0.0); + + num = dat->num; + den = dat->den; + } + + if (f_negative_p(num)) { + num = f_negate(num); + minus = 1; + } + + nl = i_ilog2(num); + dl = i_ilog2(den); + + ne = 0; + if (nl > ml) { + ne = nl - ml; + num = f_rshift(num, LONG2NUM(ne)); + } + + de = 0; + if (dl > ml) { + de = dl - ml; + den = f_rshift(den, LONG2NUM(de)); + } + + e = (int)(ne - de); + + if ((e > DBL_MAX_EXP) || (e < DBL_MIN_EXP)) { + rb_warning("%s out of Float range", rb_obj_classname(self)); + return rb_float_new(e > 0 ? HUGE_VAL : 0.0); + } + + f = NUM2DBL(num) / NUM2DBL(den); + if (minus) + f = -f; + f = ldexp(f, e); + + if (isinf(f) || isnan(f)) + rb_warning("%s out of Float range", rb_obj_classname(self)); + + return rb_float_new(f); +} + +static VALUE +nurat_to_r(VALUE self) +{ + return self; +} + +static VALUE +nurat_hash(VALUE self) +{ + get_dat1(self); + return f_xor(f_hash(dat->num), f_hash(dat->den)); +} + +static VALUE +nurat_format(VALUE self, VALUE (*func)(VALUE)) +{ + VALUE s; + get_dat1(self); + + s = (*func)(dat->num); + rb_str_cat2(s, "/"); + rb_str_concat(s, (*func)(dat->den)); + + return s; +} + +static VALUE +nurat_to_s(VALUE self) +{ + return nurat_format(self, f_to_s); +} + +static VALUE +nurat_inspect(VALUE self) +{ + VALUE s; + + s = rb_usascii_str_new2("("); + rb_str_concat(s, nurat_format(self, f_inspect)); + rb_str_cat2(s, ")"); + + return s; +} + +static VALUE +nurat_marshal_dump(VALUE self) +{ + VALUE a; + get_dat1(self); + + a = rb_assoc_new(dat->num, dat->den); + rb_copy_generic_ivar(a, self); + return a; +} + +static VALUE +nurat_marshal_load(VALUE self, VALUE a) +{ + get_dat1(self); + dat->num = RARRAY_PTR(a)[0]; + dat->den = RARRAY_PTR(a)[1]; + rb_copy_generic_ivar(self, a); + + if (f_zero_p(dat->den)) + rb_raise_zerodiv(); + + return self; +} + +/* --- */ + +VALUE +rb_gcd(VALUE self, VALUE other) +{ + other = nurat_int_value(other); + return f_gcd(self, other); +} + +VALUE +rb_lcm(VALUE self, VALUE other) +{ + other = nurat_int_value(other); + return f_lcm(self, other); +} + +VALUE +rb_gcdlcm(VALUE self, VALUE other) +{ + other = nurat_int_value(other); + return rb_assoc_new(f_gcd(self, other), f_lcm(self, other)); +} + +VALUE +rb_rational_raw(VALUE x, VALUE y) +{ + return nurat_s_new_internal(rb_cRational, x, y); +} + +VALUE +rb_rational_new(VALUE x, VALUE y) +{ + return nurat_s_canonicalize_internal(rb_cRational, x, y); +} + +static VALUE nurat_s_convert(int argc, VALUE *argv, VALUE klass); + +VALUE +rb_Rational(VALUE x, VALUE y) +{ + VALUE a[2]; + a[0] = x; + a[1] = y; + return nurat_s_convert(2, a, rb_cRational); +} + +#define id_numerator rb_intern("numerator") +#define f_numerator(x) rb_funcall(x, id_numerator, 0) + +#define id_denominator rb_intern("denominator") +#define f_denominator(x) rb_funcall(x, id_denominator, 0) + +#define id_to_r rb_intern("to_r") +#define f_to_r(x) rb_funcall(x, id_to_r, 0) + +static VALUE +numeric_numerator(VALUE self) +{ + return f_numerator(f_to_r(self)); +} + +static VALUE +numeric_denominator(VALUE self) +{ + return f_denominator(f_to_r(self)); +} + +static VALUE +integer_numerator(VALUE self) +{ + return self; +} + +static VALUE +integer_denominator(VALUE self) +{ + return INT2FIX(1); +} + +static VALUE +float_numerator(VALUE self) +{ + double d = RFLOAT_VALUE(self); + if (isinf(d) || isnan(d)) + return self; + return rb_call_super(0, 0); +} + +static VALUE +float_denominator(VALUE self) +{ + double d = RFLOAT_VALUE(self); + if (isinf(d) || isnan(d)) + return INT2FIX(1); + return rb_call_super(0, 0); +} + +static VALUE +nilclass_to_r(VALUE self) +{ + return rb_rational_new1(INT2FIX(0)); +} + +static VALUE +integer_to_r(VALUE self) +{ + return rb_rational_new1(self); +} + +static void +float_decode_internal(VALUE self, VALUE *rf, VALUE *rn) +{ + double f; + int n; + + f = frexp(RFLOAT_VALUE(self), &n); + f = ldexp(f, DBL_MANT_DIG); + n -= DBL_MANT_DIG; + *rf = rb_dbl2big(f); + *rn = INT2FIX(n); +} + +#if 0 +static VALUE +float_decode(VALUE self) +{ + VALUE f, n; + + float_decode_internal(self, &f, &n); + return rb_assoc_new(f, n); +} +#endif + +static VALUE +float_to_r(VALUE self) +{ + VALUE f, n; + + float_decode_internal(self, &f, &n); + return f_mul(f, f_expt(INT2FIX(FLT_RADIX), n)); +} + +static VALUE rat_pat, an_e_pat, a_dot_pat, underscores_pat, an_underscore; + +#define WS "\\s*" +#define DIGITS "(?:\\d(?:_\\d|\\d)*)" +#define NUMERATOR "(?:" DIGITS "?\\.)?" DIGITS "(?:[eE][-+]?" DIGITS ")?" +#define DENOMINATOR DIGITS +#define PATTERN "\\A" WS "([-+])?(" NUMERATOR ")(?:\\/(" DENOMINATOR "))?" WS + +static void +make_patterns(void) +{ + static const char rat_pat_source[] = PATTERN; + static const char an_e_pat_source[] = "[eE]"; + static const char a_dot_pat_source[] = "\\."; + static const char underscores_pat_source[] = "_+"; + + if (rat_pat) return; + + rat_pat = rb_reg_new(rat_pat_source, sizeof rat_pat_source - 1, 0); + rb_gc_register_mark_object(rat_pat); + + an_e_pat = rb_reg_new(an_e_pat_source, sizeof an_e_pat_source - 1, 0); + rb_gc_register_mark_object(an_e_pat); + + a_dot_pat = rb_reg_new(a_dot_pat_source, sizeof a_dot_pat_source - 1, 0); + rb_gc_register_mark_object(a_dot_pat); + + underscores_pat = rb_reg_new(underscores_pat_source, + sizeof underscores_pat_source - 1, 0); + rb_gc_register_mark_object(underscores_pat); + + an_underscore = rb_usascii_str_new2("_"); + rb_gc_register_mark_object(an_underscore); +} + +#define id_match rb_intern("match") +#define f_match(x,y) rb_funcall(x, id_match, 1, y) + +#define id_aref rb_intern("[]") +#define f_aref(x,y) rb_funcall(x, id_aref, 1, y) + +#define id_post_match rb_intern("post_match") +#define f_post_match(x) rb_funcall(x, id_post_match, 0) + +#define id_split rb_intern("split") +#define f_split(x,y) rb_funcall(x, id_split, 1, y) + +#include + +static VALUE +string_to_r_internal(VALUE self) +{ + VALUE s, m; + + s = self; + + if (RSTRING_LEN(s) == 0) + return rb_assoc_new(Qnil, self); + + m = f_match(rat_pat, s); + + if (!NIL_P(m)) { + VALUE v, ifp, exp, ip, fp; + VALUE si = f_aref(m, INT2FIX(1)); + VALUE nu = f_aref(m, INT2FIX(2)); + VALUE de = f_aref(m, INT2FIX(3)); + VALUE re = f_post_match(m); + + { + VALUE a; + + a = f_split(nu, an_e_pat); + ifp = RARRAY_PTR(a)[0]; + if (RARRAY_LEN(a) != 2) + exp = Qnil; + else + exp = RARRAY_PTR(a)[1]; + + a = f_split(ifp, a_dot_pat); + ip = RARRAY_PTR(a)[0]; + if (RARRAY_LEN(a) != 2) + fp = Qnil; + else + fp = RARRAY_PTR(a)[1]; + } + + v = rb_rational_new1(f_to_i(ip)); + + if (!NIL_P(fp)) { + char *p = StringValuePtr(fp); + long count = 0; + VALUE l; + + while (*p) { + if (rb_isdigit(*p)) + count++; + p++; + } + + l = f_expt(INT2FIX(10), LONG2NUM(count)); + v = f_mul(v, l); + v = f_add(v, f_to_i(fp)); + v = f_div(v, l); + } + if (!NIL_P(si) && *StringValuePtr(si) == '-') + v = f_negate(v); + if (!NIL_P(exp)) + v = f_mul(v, f_expt(INT2FIX(10), f_to_i(exp))); +#if 0 + if (!NIL_P(de) && (!NIL_P(fp) || !NIL_P(exp))) + return rb_assoc_new(v, rb_usascii_str_new2("dummy")); +#endif + if (!NIL_P(de)) + v = f_div(v, f_to_i(de)); + + return rb_assoc_new(v, re); + } + return rb_assoc_new(Qnil, self); +} + +static VALUE +string_to_r_strict(VALUE self) +{ + VALUE a = string_to_r_internal(self); + if (NIL_P(RARRAY_PTR(a)[0]) || RSTRING_LEN(RARRAY_PTR(a)[1]) > 0) { + VALUE s = f_inspect(self); + rb_raise(rb_eArgError, "invalid value for Rational: %s", + StringValuePtr(s)); + } + return RARRAY_PTR(a)[0]; +} + +#define id_gsub rb_intern("gsub") +#define f_gsub(x,y,z) rb_funcall(x, id_gsub, 2, y, z) + +static VALUE +string_to_r(VALUE self) +{ + VALUE s, a, backref; + + backref = rb_backref_get(); + rb_match_busy(backref); + + s = f_gsub(self, underscores_pat, an_underscore); + a = string_to_r_internal(s); + + rb_backref_set(backref); + + if (!NIL_P(RARRAY_PTR(a)[0])) + return RARRAY_PTR(a)[0]; + return rb_rational_new1(INT2FIX(0)); +} + +#define id_to_r rb_intern("to_r") +#define f_to_r(x) rb_funcall(x, id_to_r, 0) + +static VALUE +nurat_s_convert(int argc, VALUE *argv, VALUE klass) +{ + VALUE a1, a2, backref; + + rb_scan_args(argc, argv, "11", &a1, &a2); + + switch (TYPE(a1)) { + case T_COMPLEX: + if (k_exact_p(RCOMPLEX(a1)->imag) && f_zero_p(RCOMPLEX(a1)->imag)) + a1 = RCOMPLEX(a1)->real; + } + + switch (TYPE(a2)) { + case T_COMPLEX: + if (k_exact_p(RCOMPLEX(a2)->imag) && f_zero_p(RCOMPLEX(a2)->imag)) + a2 = RCOMPLEX(a2)->real; + } + + backref = rb_backref_get(); + rb_match_busy(backref); + + switch (TYPE(a1)) { + case T_FIXNUM: + case T_BIGNUM: + break; + case T_FLOAT: + a1 = f_to_r(a1); + break; + case T_STRING: + a1 = string_to_r_strict(a1); + break; + } + + switch (TYPE(a2)) { + case T_FIXNUM: + case T_BIGNUM: + break; + case T_FLOAT: + a2 = f_to_r(a2); + break; + case T_STRING: + a2 = string_to_r_strict(a2); + break; + } + + rb_backref_set(backref); + + switch (TYPE(a1)) { + case T_RATIONAL: + if (argc == 1 || (k_exact_p(a2) && f_one_p(a2))) + return a1; + } + + if (argc == 1) { + if (k_numeric_p(a1) && !f_integer_p(a1)) + return a1; + } + else { + if ((k_numeric_p(a1) && k_numeric_p(a2)) && + (!f_integer_p(a1) || !f_integer_p(a2))) + return f_div(a1, a2); + } + + { + VALUE argv2[2]; + argv2[0] = a1; + argv2[1] = a2; + return nurat_s_new(argc, argv2, klass); + } +} + +void +Init_Rational(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + assert(fprintf(stderr, "assert() is now active\n")); + + id_abs = rb_intern("abs"); + id_cmp = rb_intern("<=>"); + id_convert = rb_intern("convert"); + id_equal_p = rb_intern("=="); + id_expt = rb_intern("**"); + id_floor = rb_intern("floor"); + id_hash = rb_intern("hash"); + id_idiv = rb_intern("div"); + id_inspect = rb_intern("inspect"); + id_integer_p = rb_intern("integer?"); + id_negate = rb_intern("-@"); + id_to_f = rb_intern("to_f"); + id_to_i = rb_intern("to_i"); + id_to_s = rb_intern("to_s"); + id_truncate = rb_intern("truncate"); + + ml = (long)(log(DBL_MAX) / log(2.0) - 1); + + rb_cRational = rb_define_class("Rational", rb_cNumeric); + + rb_define_alloc_func(rb_cRational, nurat_s_alloc); + rb_undef_method(CLASS_OF(rb_cRational), "allocate"); + +#if 0 + rb_define_private_method(CLASS_OF(rb_cRational), "new!", nurat_s_new_bang, -1); + rb_define_private_method(CLASS_OF(rb_cRational), "new", nurat_s_new, -1); +#else + rb_undef_method(CLASS_OF(rb_cRational), "new"); +#endif + + rb_define_global_function("Rational", nurat_f_rational, -1); + + rb_define_method(rb_cRational, "numerator", nurat_numerator, 0); + rb_define_method(rb_cRational, "denominator", nurat_denominator, 0); + + rb_define_method(rb_cRational, "+", nurat_add, 1); + rb_define_method(rb_cRational, "-", nurat_sub, 1); + rb_define_method(rb_cRational, "*", nurat_mul, 1); + rb_define_method(rb_cRational, "/", nurat_div, 1); + rb_define_method(rb_cRational, "quo", nurat_div, 1); + rb_define_method(rb_cRational, "fdiv", nurat_fdiv, 1); + rb_define_method(rb_cRational, "**", nurat_expt, 1); + + rb_define_method(rb_cRational, "<=>", nurat_cmp, 1); + rb_define_method(rb_cRational, "==", nurat_equal_p, 1); + rb_define_method(rb_cRational, "coerce", nurat_coerce, 1); + + rb_define_method(rb_cRational, "div", nurat_idiv, 1); + +#if 0 /* NUBY */ + rb_define_method(rb_cRational, "//", nurat_idiv, 1); +#endif + + rb_define_method(rb_cRational, "modulo", nurat_mod, 1); + rb_define_method(rb_cRational, "%", nurat_mod, 1); + rb_define_method(rb_cRational, "divmod", nurat_divmod, 1); + +#if 0 + rb_define_method(rb_cRational, "quot", nurat_quot, 1); +#endif + rb_define_method(rb_cRational, "remainder", nurat_rem, 1); +#if 0 + rb_define_method(rb_cRational, "quotrem", nurat_quotrem, 1); +#endif + + rb_define_method(rb_cRational, "abs", nurat_abs, 0); + +#if 0 + rb_define_method(rb_cRational, "rational?", nurat_true, 0); + rb_define_method(rb_cRational, "exact?", nurat_true, 0); +#endif + + rb_define_method(rb_cRational, "floor", nurat_floor, 0); + rb_define_method(rb_cRational, "ceil", nurat_ceil, 0); + rb_define_method(rb_cRational, "truncate", nurat_truncate, 0); + rb_define_method(rb_cRational, "round", nurat_round, 0); + + rb_define_method(rb_cRational, "to_i", nurat_truncate, 0); + rb_define_method(rb_cRational, "to_f", nurat_to_f, 0); + rb_define_method(rb_cRational, "to_r", nurat_to_r, 0); + + rb_define_method(rb_cRational, "hash", nurat_hash, 0); + + rb_define_method(rb_cRational, "to_s", nurat_to_s, 0); + rb_define_method(rb_cRational, "inspect", nurat_inspect, 0); + + rb_define_method(rb_cRational, "marshal_dump", nurat_marshal_dump, 0); + rb_define_method(rb_cRational, "marshal_load", nurat_marshal_load, 1); + + /* --- */ + + rb_define_method(rb_cInteger, "gcd", rb_gcd, 1); + rb_define_method(rb_cInteger, "lcm", rb_lcm, 1); + rb_define_method(rb_cInteger, "gcdlcm", rb_gcdlcm, 1); + + rb_define_method(rb_cNumeric, "numerator", numeric_numerator, 0); + rb_define_method(rb_cNumeric, "denominator", numeric_denominator, 0); + + rb_define_method(rb_cInteger, "numerator", integer_numerator, 0); + rb_define_method(rb_cInteger, "denominator", integer_denominator, 0); + + rb_define_method(rb_cFloat, "numerator", float_numerator, 0); + rb_define_method(rb_cFloat, "denominator", float_denominator, 0); + + rb_define_method(rb_cNilClass, "to_r", nilclass_to_r, 0); + rb_define_method(rb_cInteger, "to_r", integer_to_r, 0); + rb_define_method(rb_cFloat, "to_r", float_to_r, 0); + + make_patterns(); + + rb_define_method(rb_cString, "to_r", string_to_r, 0); + + rb_define_private_method(CLASS_OF(rb_cRational), "convert", nurat_s_convert, -1); +} + +/* +Local variables: +c-file-style: "ruby" +End: +*/ diff --git a/re.c b/re.c new file mode 100644 index 0000000..a53ae37 --- /dev/null +++ b/re.c @@ -0,0 +1,3434 @@ +/********************************************************************** + + re.c - + + $Author: yugui $ + created at: Mon Aug 9 18:24:49 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/re.h" +#include "ruby/encoding.h" +#include "ruby/util.h" +#include "regint.h" +#include + +VALUE rb_eRegexpError; + +typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN]; +#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN) + +#define BEG(no) regs->beg[no] +#define END(no) regs->end[no] + +#if 'a' == 97 /* it's ascii */ +static const char casetable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + /* ' ' '!' '"' '#' '$' '%' '&' ''' */ + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + /* '(' ')' '*' '+' ',' '-' '.' '/' */ + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + /* '0' '1' '2' '3' '4' '5' '6' '7' */ + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + /* '8' '9' ':' ';' '<' '=' '>' '?' */ + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */ + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */ + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'x' 'y' 'z' '{' '|' '}' '~' */ + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#else +# error >>> "You lose. You will need a translation table for your character set." <<< +#endif + +int +rb_memcicmp(const void *x, const void *y, long len) +{ + const unsigned char *p1 = x, *p2 = y; + int tmp; + + while (len--) { + if ((tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++])) + return tmp; + } + return 0; +} + +#undef rb_memcmp + +int +rb_memcmp(const void *p1, const void *p2, long len) +{ + return memcmp(p1, p2, len); +} + +static inline long +rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys, *ye = ys + n; +#ifndef VALUE_MAX +# if SIZEOF_VALUE == 8 +# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL +# elif SIZEOF_VALUE == 4 +# define VALUE_MAX 0xFFFFFFFFUL +# endif +#endif + VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT); + + if (m > SIZEOF_VALUE) + rb_bug("!!too long pattern string!!"); + + /* Prepare hash value */ + for (hx = *x++, hy = *y++; x < xe; ++x, ++y) { + hx <<= CHAR_BIT; + hy <<= CHAR_BIT; + hx |= *x; + hy |= *y; + } + /* Searching */ + while (hx != hy) { + if (y == ye) + return -1; + hy <<= CHAR_BIT; + hy |= *y; + hy &= mask; + y++; + } + return y - ys - m; +} + +static inline long +rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys; + VALUE i, qstable[256]; + + /* Preprocessing */ + for (i = 0; i < 256; ++i) + qstable[i] = m + 1; + for (; x < xe; ++x) + qstable[*x] = xe - x; + /* Searching */ + for (; y + m <= ys + n; y += *(qstable + y[m])) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; + } + return -1; +} + +static inline unsigned int +rb_memsearch_qs_utf8_hash(const unsigned char *x) +{ + register const unsigned int mix = 8353; + register unsigned int h = *x; + if (h < 0xC0) { + return h + 256; + } + else if (h < 0xE0) { + h *= mix; + h += x[1]; + } + else if (h < 0xF0) { + h *= mix; + h += x[1]; + h *= mix; + h += x[2]; + } + else if (h < 0xF5) { + h *= mix; + h += x[1]; + h *= mix; + h += x[2]; + h *= mix; + h += x[3]; + } + else { + return h + 256; + } + return (unsigned char)h; +} + +static inline long +rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, *xe = xs + m; + const unsigned char *y = ys; + VALUE i, qstable[512]; + + /* Preprocessing */ + for (i = 0; i < 512; ++i) { + qstable[i] = m + 1; + } + for (; x < xe; ++x) { + qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x; + } + /* Searching */ + for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) { + if (*xs == *y && memcmp(xs, y, m) == 0) + return y - ys; + } + return -1; +} + +long +rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) +{ + const unsigned char *x = x0, *y = y0; + + if (m > n) return -1; + else if (m == n) { + return memcmp(x0, y0, m) == 0 ? 0 : -1; + } + else if (m < 1) { + return 0; + } + else if (m == 1) { + const unsigned char *ys = y, *ye = ys + n; + for (; y < ye; ++y) { + if (*x == *y) + return y - ys; + } + return -1; + } + else if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } + else { + return rb_memsearch_qs(x0, m, y0, n); + } +} + +#define REG_LITERAL FL_USER5 +#define REG_ENCODING_NONE FL_USER6 + +#define KCODE_FIXED FL_USER4 + +#define ARG_REG_OPTION_MASK \ + (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) +#define ARG_ENCODING_FIXED 16 +#define ARG_ENCODING_NONE 32 + +static int +char_to_option(int c) +{ + int val; + + switch (c) { + case 'i': + val = ONIG_OPTION_IGNORECASE; + break; + case 'x': + val = ONIG_OPTION_EXTEND; + break; + case 'm': + val = ONIG_OPTION_MULTILINE; + break; + default: + val = 0; + break; + } + return val; +} + +static char * +option_to_str(char str[4], int options) +{ + char *p = str; + if (options & ONIG_OPTION_MULTILINE) *p++ = 'm'; + if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i'; + if (options & ONIG_OPTION_EXTEND) *p++ = 'x'; + *p = 0; + return str; +} + +extern int +rb_char_to_option_kcode(int c, int *option, int *kcode) +{ + *option = 0; + + switch (c) { + case 'n': + *kcode = -1; + return (*option = ARG_ENCODING_NONE); + case 'e': + *kcode = rb_enc_find_index("EUC-JP"); + break; + case 's': + *kcode = rb_enc_find_index("Windows-31J"); + break; + case 'u': + *kcode = rb_enc_find_index("UTF-8"); + break; + default: + *kcode = -1; + return (*option = char_to_option(c)); + } + *option = ARG_ENCODING_FIXED; + return 1; +} + +static void +rb_reg_check(VALUE re) +{ + if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) { + rb_raise(rb_eTypeError, "uninitialized Regexp"); + } +} + +static void +rb_reg_expr_str(VALUE str, const char *s, long len) +{ + rb_encoding *enc = rb_enc_get(str); + const char *p, *pend; + int need_escape = 0; + int c, clen; + + p = s; pend = p + len; + while (pptr->options)) + rb_str_buf_cat2(str, opts); + } + OBJ_INFECT(str, re); + return str; +} + + +/* + * call-seq: + * rxp.source => str + * + * Returns the original string of the pattern. + * + * /ab+c/ix.source #=> "ab+c" + * + * Note that escape sequences are retained as is. + * + * /\x20\+/.source #=> "\\x20\\+" + * + */ + +static VALUE +rb_reg_source(VALUE re) +{ + VALUE str; + + rb_reg_check(re); + str = rb_enc_str_new(RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), rb_enc_get(re)); + if (OBJ_TAINTED(re)) OBJ_TAINT(str); + return str; +} + +/* + * call-seq: + * rxp.inspect => string + * + * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly, + * #inspect actually produces the more natural version of + * the string than #to_s. + * + * /ab+c/ix.inspect #=> "/ab+c/ix" + * + */ + +static VALUE +rb_reg_inspect(VALUE re) +{ + if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) { + return rb_any_to_s(re); + } + return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re); +} + + +/* + * call-seq: + * rxp.to_s => str + * + * Returns a string containing the regular expression and its options (using the + * (?opts:source) notation. This string can be fed back in to + * Regexp::new to a regular expression with the same semantics as + * the original. (However, Regexp#== may not return true when + * comparing the two, as the source of the regular expression itself may + * differ, as the example shows). Regexp#inspect produces a + * generally more readable version of rxp. + * + * r1 = /ab+c/ix #=> /ab+c/ix + * s1 = r1.to_s #=> "(?ix-m:ab+c)" + * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ + * r1 == r2 #=> false + * r1.source #=> "ab+c" + * r2.source #=> "(?ix-m:ab+c)" + */ + +static VALUE +rb_reg_to_s(VALUE re) +{ + int options, opt; + const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; + long len; + const UChar* ptr; + VALUE str = rb_str_buf_new2("(?"); + char optbuf[5]; + + rb_reg_check(re); + + rb_enc_copy(str, re); + options = RREGEXP(re)->ptr->options; + ptr = (UChar*)RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); + again: + if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') { + int err = 1; + ptr += 2; + if ((len -= 2) > 0) { + do { + opt = char_to_option((int )*ptr); + if (opt != 0) { + options |= opt; + } + else { + break; + } + ++ptr; + } while (--len > 0); + } + if (len > 1 && *ptr == '-') { + ++ptr; + --len; + do { + opt = char_to_option((int )*ptr); + if (opt != 0) { + options &= ~opt; + } + else { + break; + } + ++ptr; + } while (--len > 0); + } + if (*ptr == ')') { + --len; + ++ptr; + goto again; + } + if (*ptr == ':' && ptr[len-1] == ')') { + int r; + Regexp *rp; + r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT, + ONIGENC_CASE_FOLD_DEFAULT, + rb_enc_get(re), + OnigDefaultSyntax); + if (r == 0) { + ++ptr; + len -= 2; + err = (onig_compile(rp, ptr, ptr + len, NULL) != 0); + } + onig_free(rp); + } + if (err) { + options = RREGEXP(re)->ptr->options; + ptr = (UChar*)RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); + } + } + + if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf); + + if ((options & embeddable) != embeddable) { + optbuf[0] = '-'; + option_to_str(optbuf + 1, ~options); + rb_str_buf_cat2(str, optbuf); + } + + rb_str_buf_cat2(str, ":"); + rb_reg_expr_str(str, (char*)ptr, len); + rb_str_buf_cat2(str, ")"); + rb_enc_copy(str, re); + + OBJ_INFECT(str, re); + return str; +} + +static void +rb_reg_raise(const char *s, long len, const char *err, VALUE re) +{ + VALUE desc = rb_reg_desc(s, len, re); + + rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING_PTR(desc)); +} + +static VALUE +rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err) +{ + char opts[6]; + VALUE desc = rb_str_buf_new2(err); + + rb_enc_associate(desc, enc); + rb_str_buf_cat2(desc, ": /"); + rb_reg_expr_str(desc, s, len); + opts[0] = '/'; + option_to_str(opts + 1, options); + rb_str_buf_cat2(desc, opts); + return rb_exc_new3(rb_eRegexpError, desc); +} + +static void +rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err) +{ + rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err)); +} + +static VALUE +rb_reg_error_desc(VALUE str, int options, const char *err) +{ + return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str), + rb_enc_get(str), options, err); +} + +static void +rb_reg_raise_str(VALUE str, int options, const char *err) +{ + rb_exc_raise(rb_reg_error_desc(str, options, err)); +} + + +/* + * call-seq: + * rxp.casefold? => true or false + * + * Returns the value of the case-insensitive flag. + * + * /a/.casefold? #=> false + * /a/i.casefold? #=> true + * /(?i:a)/.casefold? #=> false + */ + +static VALUE +rb_reg_casefold_p(VALUE re) +{ + rb_reg_check(re); + if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * rxp.options => fixnum + * + * Returns the set of bits corresponding to the options used when creating this + * Regexp (see Regexp::new for details. Note that additional bits + * may be set in the returned options: these are used internally by the regular + * expression code. These extra bits are ignored if the options are passed to + * Regexp::new. + * + * Regexp::IGNORECASE #=> 1 + * Regexp::EXTENDED #=> 2 + * Regexp::MULTILINE #=> 4 + * + * /cat/.options #=> 0 + * /cat/ix.options #=> 3 + * Regexp.new('cat', true).options #=> 1 + * /\xa1\xa2/e.options #=> 16 + * + * r = /cat/ix + * Regexp.new(r.source, r.options) #=> /cat/ix + */ + +static VALUE +rb_reg_options_m(VALUE re) +{ + int options = rb_reg_options(re); + return INT2NUM(options); +} + +static int +reg_names_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg) +{ + VALUE ary = (VALUE)arg; + rb_ary_push(ary, rb_str_new((const char *)name, name_end-name)); + return 0; +} + +/* + * call-seq: + * rxp.names => [name1, name2, ...] + * + * Returns a list of names of captures as an array of strings. + * + * /(?.)(?.)(?.)/.names + * #=> ["foo", "bar", "baz"] + * + * /(?.)(?.)/.names + * #=> ["foo"] + * + * /(.)(.)/.names + * #=> [] + */ + +static VALUE +rb_reg_names(VALUE re) +{ + VALUE ary = rb_ary_new(); + rb_reg_check(re); + onig_foreach_name(RREGEXP(re)->ptr, reg_names_iter, (void*)ary); + return ary; +} + +static int +reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg) +{ + VALUE hash = (VALUE)arg; + VALUE ary = rb_ary_new2(back_num); + int i; + + for(i = 0; i < back_num; i++) + rb_ary_store(ary, i, INT2NUM(back_refs[i])); + + rb_hash_aset(hash, rb_str_new((const char*)name, name_end-name),ary); + + return 0; +} + +/* + * call-seq: + * rxp.named_captures => hash + * + * Returns a hash representing information about named captures of rxp. + * + * A key of the hash is a name of the named captures. + * A value of the hash is an array which is list of indexes of corresponding + * named captures. + * + * /(?.)(?.)/.named_captures + * #=> {"foo"=>[1], "bar"=>[2]} + * + * /(?.)(?.)/.named_captures + * #=> {"foo"=>[1, 2]} + * + * If there are no named captures, an empty hash is returned. + * + * /(.)(.)/.named_captures + * #=> {} + */ + +static VALUE +rb_reg_named_captures(VALUE re) +{ + VALUE hash = rb_hash_new(); + rb_reg_check(re); + onig_foreach_name(RREGEXP(re)->ptr, reg_named_captures_iter, (void*)hash); + return hash; +} + +static Regexp* +make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err) +{ + Regexp *rp; + int r; + OnigErrorInfo einfo; + + /* Handle escaped characters first. */ + + /* Build a copy of the string (in dest) with the + escaped characters translated, and generate the regex + from that. + */ + + r = onig_alloc_init(&rp, flags, ONIGENC_CASE_FOLD_DEFAULT, + enc, OnigDefaultSyntax); + if (r) { + onig_error_code_to_str((UChar*)err, r); + return 0; + } + + r = onig_compile(rp, (UChar*)s, (UChar*)(s + len), &einfo); + + if (r != 0) { + onig_free(rp); + (void )onig_error_code_to_str((UChar*)err, r, &einfo); + return 0; + } + return rp; +} + + +/* + * Document-class: MatchData + * + * MatchData is the type of the special variable $~, + * and is the type of the object returned by Regexp#match and + * Regexp.last_match. It encapsulates all the results of a pattern + * match, results normally accessed through the special variables + * $&, $', $`, $1, + * $2, and so on. + * + */ + +VALUE rb_cMatch; + +static VALUE +match_alloc(VALUE klass) +{ + NEWOBJ(match, struct RMatch); + OBJSETUP(match, klass, T_MATCH); + + match->str = 0; + match->rmatch = 0; + match->regexp = 0; + match->rmatch = ALLOC(struct rmatch); + MEMZERO(match->rmatch, struct rmatch, 1); + + return (VALUE)match; +} + +typedef struct { + int byte_pos; + int char_pos; +} pair_t; + +static int +pair_byte_cmp(const void *pair1, const void *pair2) +{ + return ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos; +} + +static void +update_char_offset(VALUE match) +{ + struct rmatch *rm = RMATCH(match)->rmatch; + struct re_registers *regs; + int num_regs; + int i, num_pos, c; + char *s, *p, *q, *e; + rb_encoding *enc; + pair_t *pairs; + + if (rm->char_offset_updated) + return; + + regs = &rm->regs; + num_regs = rm->regs.num_regs; + + if (rm->char_offset_num_allocated < num_regs) { + REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs); + rm->char_offset_num_allocated = num_regs; + } + + enc = rb_enc_get(RMATCH(match)->str); + if (rb_enc_mbmaxlen(enc) == 1) { + for (i = 0; i < num_regs; i++) { + rm->char_offset[i].beg = BEG(i); + rm->char_offset[i].end = END(i); + } + rm->char_offset_updated = 1; + return; + } + + pairs = ALLOCA_N(pair_t, num_regs*2); + num_pos = 0; + for (i = 0; i < num_regs; i++) { + if (BEG(i) < 0) + continue; + pairs[num_pos++].byte_pos = BEG(i); + pairs[num_pos++].byte_pos = END(i); + } + qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp); + + s = p = RSTRING_PTR(RMATCH(match)->str); + e = s + RSTRING_LEN(RMATCH(match)->str); + c = 0; + for (i = 0; i < num_pos; i++) { + q = s + pairs[i].byte_pos; + c += rb_enc_strlen(p, q, enc); + pairs[i].char_pos = c; + p = q; + } + + for (i = 0; i < num_regs; i++) { + pair_t key, *found; + if (BEG(i) < 0) { + rm->char_offset[i].beg = -1; + rm->char_offset[i].end = -1; + continue; + } + + key.byte_pos = BEG(i); + found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); + rm->char_offset[i].beg = found->char_pos; + + key.byte_pos = END(i); + found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp); + rm->char_offset[i].end = found->char_pos; + } + + rm->char_offset_updated = 1; +} + +static void +match_check(VALUE match) +{ + if (!RMATCH(match)->regexp) { + rb_raise(rb_eTypeError, "uninitialized Match"); + } +} + +/* :nodoc: */ +static VALUE +match_init_copy(VALUE obj, VALUE orig) +{ + struct rmatch *rm; + + if (obj == orig) return obj; + + if (!rb_obj_is_instance_of(orig, rb_obj_class(obj))) { + rb_raise(rb_eTypeError, "wrong argument class"); + } + RMATCH(obj)->str = RMATCH(orig)->str; + RMATCH(obj)->regexp = RMATCH(orig)->regexp; + + rm = RMATCH(obj)->rmatch; + onig_region_copy(&rm->regs, RMATCH_REGS(orig)); + + if (!RMATCH(orig)->rmatch->char_offset_updated) { + rm->char_offset_updated = 0; + } + else { + if (rm->char_offset_num_allocated < rm->regs.num_regs) { + REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs); + rm->char_offset_num_allocated = rm->regs.num_regs; + } + MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset, + struct rmatch_offset, rm->regs.num_regs); + rm->char_offset_updated = 1; + } + + return obj; +} + + +/* + * call-seq: + * mtch.regexp => regexp + * + * Returns the regexp. + * + * m = /a.*b/.match("abc") + * m.regexp #=> /a.*b/ + */ + +static VALUE +match_regexp(VALUE match) +{ + match_check(match); + return RMATCH(match)->regexp; +} + +/* + * call-seq: + * mtch.names => [name1, name2, ...] + * + * Returns a list of names of captures as an array of strings. + * It is same as mtch.regexp.names. + * + * /(?.)(?.)(?.)/.match("hoge").names + * #=> ["foo", "bar", "baz"] + * + * m = /(?.)(?.)?/.match("a") #=> # + * m.names #=> ["x", "y"] + */ + +static VALUE +match_names(VALUE match) +{ + match_check(match); + return rb_reg_names(RMATCH(match)->regexp); +} + +/* + * call-seq: + * mtch.length => integer + * mtch.size => integer + * + * Returns the number of elements in the match array. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.length #=> 5 + * m.size #=> 5 + */ + +static VALUE +match_size(VALUE match) +{ + match_check(match); + return INT2FIX(RMATCH_REGS(match)->num_regs); +} + +static int +match_backref_number(VALUE match, VALUE backref) +{ + const char *name; + int num; + + struct re_registers *regs = RMATCH_REGS(match); + VALUE regexp = RMATCH(match)->regexp; + + match_check(match); + switch(TYPE(backref)) { + default: + return NUM2INT(backref); + + case T_SYMBOL: + name = rb_id2name(SYM2ID(backref)); + break; + + case T_STRING: + name = StringValueCStr(backref); + break; + } + + num = onig_name_to_backref_number(RREGEXP(regexp)->ptr, + (const unsigned char*)name, + (const unsigned char*)name + strlen(name), + regs); + + if (num < 1) { + rb_raise(rb_eIndexError, "undefined group name reference: %s", name); + } + + return num; +} + + +/* + * call-seq: + * mtch.offset(n) => array + * + * Returns a two-element array containing the beginning and ending offsets of + * the nth match. + * n can be a string or symbol to reference a named capture. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.offset(0) #=> [1, 7] + * m.offset(4) #=> [6, 7] + * + * m = /(?.)(.)(?.)/.match("hoge") + * p m.offset(:foo) #=> [0, 1] + * p m.offset(:bar) #=> [2, 3] + * + */ + +static VALUE +match_offset(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + if (i < 0 || regs->num_regs <= i) + rb_raise(rb_eIndexError, "index %d out of matches", i); + + if (BEG(i) < 0) + return rb_assoc_new(Qnil, Qnil); + + update_char_offset(match); + return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg), + INT2FIX(RMATCH(match)->rmatch->char_offset[i].end)); +} + + +/* + * call-seq: + * mtch.begin(n) => integer + * + * Returns the offset of the start of the nth element of the match + * array in the string. + * n can be a string or symbol to reference a named capture. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.begin(0) #=> 1 + * m.begin(2) #=> 2 + * + * m = /(?.)(.)(?.)/.match("hoge") + * p m.begin(:foo) #=> 0 + * p m.begin(:bar) #=> 2 + */ + +static VALUE +match_begin(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + if (i < 0 || regs->num_regs <= i) + rb_raise(rb_eIndexError, "index %d out of matches", i); + + if (BEG(i) < 0) + return Qnil; + + update_char_offset(match); + return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg); +} + + +/* + * call-seq: + * mtch.end(n) => integer + * + * Returns the offset of the character immediately following the end of the + * nth element of the match array in the string. + * n can be a string or symbol to reference a named capture. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.end(0) #=> 7 + * m.end(2) #=> 3 + * + * m = /(?.)(.)(?.)/.match("hoge") + * p m.end(:foo) #=> 1 + * p m.end(:bar) #=> 3 + */ + +static VALUE +match_end(VALUE match, VALUE n) +{ + int i = match_backref_number(match, n); + struct re_registers *regs = RMATCH_REGS(match); + + match_check(match); + if (i < 0 || regs->num_regs <= i) + rb_raise(rb_eIndexError, "index %d out of matches", i); + + if (BEG(i) < 0) + return Qnil; + + update_char_offset(match); + return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end); +} + +#define MATCH_BUSY FL_USER2 + +void +rb_match_busy(VALUE match) +{ + FL_SET(match, MATCH_BUSY); +} + +/* + * call-seq: + * rxp.fixed_encoding? => true or false + * + * Returns false if rxp is applicable to + * a string with any ASCII compatible encoding. + * Returns true otherwise. + * + * r = /a/ + * r.fixed_encoding? #=> false + * r =~ "\u{6666} a" #=> 2 + * r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2 + * r =~ "abc".force_encoding("euc-jp") #=> 0 + * + * r = /a/u + * r.fixed_encoding? #=> true + * r.encoding #=> # + * r =~ "\u{6666} a" #=> 2 + * r =~ "\xa1\xa2".force_encoding("euc-jp") #=> ArgumentError + * r =~ "abc".force_encoding("euc-jp") #=> 0 + * + * r = /\u{6666}/ + * r.fixed_encoding? #=> true + * r.encoding #=> # + * r =~ "\u{6666} a" #=> 0 + * r =~ "\xa1\xa2".force_encoding("euc-jp") #=> ArgumentError + * r =~ "abc".force_encoding("euc-jp") #=> nil + */ + +static VALUE +rb_reg_fixed_encoding_p(VALUE re) +{ + if (FL_TEST(re, KCODE_FIXED)) + return Qtrue; + else + return Qfalse; +} + +static VALUE +rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc, + rb_encoding **fixed_enc, onig_errmsg_buffer err); + + +static void +reg_enc_error(VALUE re, VALUE str) +{ + rb_raise(rb_eEncCompatError, + "incompatible encoding regexp match (%s regexp with %s string)", + rb_enc_name(rb_enc_get(re)), + rb_enc_name(rb_enc_get(str))); +} + +static rb_encoding* +rb_reg_prepare_enc(VALUE re, VALUE str, int warn) +{ + rb_encoding *enc = 0; + + if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eArgError, + "invalid byte sequence in %s", + rb_enc_name(rb_enc_get(str))); + } + + rb_reg_check(re); + enc = rb_enc_get(str); + if (!rb_enc_str_asciicompat_p(str)) { + if (RREGEXP(re)->ptr->enc != enc) { + reg_enc_error(re, str); + } + } + else if (rb_reg_fixed_encoding_p(re)) { + if (RREGEXP(re)->ptr->enc != enc && + (!rb_enc_asciicompat(RREGEXP(re)->ptr->enc) || + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT)) { + reg_enc_error(re, str); + } + enc = RREGEXP(re)->ptr->enc; + } + if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && + enc != rb_ascii8bit_encoding() && + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + rb_warn("regexp match /.../n against to %s string", + rb_enc_name(enc)); + } + return enc; +} + +regex_t * +rb_reg_prepare_re(VALUE re, VALUE str) +{ + regex_t *reg = RREGEXP(re)->ptr; + onig_errmsg_buffer err = ""; + int r; + OnigErrorInfo einfo; + const char *pattern; + VALUE unescaped; + rb_encoding *fixed_enc = 0; + rb_encoding *enc = rb_reg_prepare_enc(re, str, 1); + + if (reg->enc == enc) return reg; + + rb_reg_check(re); + reg = RREGEXP(re)->ptr; + pattern = RREGEXP_SRC_PTR(re); + + unescaped = rb_reg_preprocess( + pattern, pattern + RREGEXP_SRC_LEN(re), enc, + &fixed_enc, err); + + if (unescaped == Qnil) { + rb_raise(rb_eArgError, "regexp preprocess failed: %s", err); + } + + r = onig_new(®, (UChar* )RSTRING_PTR(unescaped), + (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)), + reg->options, enc, + OnigDefaultSyntax, &einfo); + if (r) { + onig_error_code_to_str((UChar*)err, r, &einfo); + rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re); + } + + RB_GC_GUARD(unescaped); + return reg; +} + +int +rb_reg_adjust_startpos(VALUE re, VALUE str, int pos, int reverse) +{ + int range; + rb_encoding *enc; + UChar *p, *string; + + enc = rb_reg_prepare_enc(re, str, 0); + + if (reverse) { + range = -pos; + } + else { + range = RSTRING_LEN(str) - pos; + } + + if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(str)) { + string = (UChar*)RSTRING_PTR(str); + + if (range > 0) { + p = onigenc_get_right_adjust_char_head(enc, string, string + pos, string + RSTRING_LEN(str)); + } + else { + p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos, string + RSTRING_LEN(str)); + } + return p - string; + } + + return pos; +} + +int +rb_reg_search(VALUE re, VALUE str, int pos, int reverse) +{ + int result; + VALUE match; + struct re_registers regi, *regs = ®i; + char *range = RSTRING_PTR(str); + regex_t *reg; + int tmpreg; + + if (pos > RSTRING_LEN(str) || pos < 0) { + rb_backref_set(Qnil); + return -1; + } + + reg = rb_reg_prepare_re(re, str); + tmpreg = reg != RREGEXP(re)->ptr; + if (!tmpreg) RREGEXP(re)->usecnt++; + + match = rb_backref_get(); + if (!NIL_P(match)) { + if (FL_TEST(match, MATCH_BUSY)) { + match = Qnil; + } + else { + regs = RMATCH_REGS(match); + } + } + if (NIL_P(match)) { + MEMZERO(regs, struct re_registers, 1); + } + if (!reverse) { + range += RSTRING_LEN(str); + } + result = onig_search(reg, + (UChar*)(RSTRING_PTR(str)), + ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)), + ((UChar*)(RSTRING_PTR(str)) + pos), + ((UChar*)range), + regs, ONIG_OPTION_NONE); + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP(re)->ptr); + RREGEXP(re)->ptr = reg; + } + } + if (result < 0) { + if (regs == ®i) + onig_region_free(regs, 0); + if (result == ONIG_MISMATCH) { + rb_backref_set(Qnil); + return result; + } + else { + onig_errmsg_buffer err = ""; + onig_error_code_to_str((UChar*)err, result); + rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, 0); + } + } + + if (NIL_P(match)) { + match = match_alloc(rb_cMatch); + onig_region_copy(RMATCH_REGS(match), regs); + onig_region_free(regs, 0); + } + else { + if (rb_safe_level() >= 3) + OBJ_TAINT(match); + else + FL_UNSET(match, FL_TAINT); + } + + RMATCH(match)->str = rb_str_new4(str); + RMATCH(match)->regexp = re; + RMATCH(match)->rmatch->char_offset_updated = 0; + rb_backref_set(match); + + OBJ_INFECT(match, re); + OBJ_INFECT(match, str); + + return result; +} + +VALUE +rb_reg_nth_defined(int nth, VALUE match) +{ + struct re_registers *regs; + if (NIL_P(match)) return Qnil; + match_check(match); + regs = RMATCH_REGS(match); + if (nth >= regs->num_regs) { + return Qnil; + } + if (nth < 0) { + nth += regs->num_regs; + if (nth <= 0) return Qnil; + } + if (BEG(nth) == -1) return Qfalse; + return Qtrue; +} + +VALUE +rb_reg_nth_match(int nth, VALUE match) +{ + VALUE str; + long start, end, len; + struct re_registers *regs; + + if (NIL_P(match)) return Qnil; + match_check(match); + regs = RMATCH_REGS(match); + if (nth >= regs->num_regs) { + return Qnil; + } + if (nth < 0) { + nth += regs->num_regs; + if (nth <= 0) return Qnil; + } + start = BEG(nth); + if (start == -1) return Qnil; + end = END(nth); + len = end - start; + str = rb_str_subseq(RMATCH(match)->str, start, len); + OBJ_INFECT(str, match); + return str; +} + +VALUE +rb_reg_last_match(VALUE match) +{ + return rb_reg_nth_match(0, match); +} + + +/* + * call-seq: + * mtch.pre_match => str + * + * Returns the portion of the original string before the current match. + * Equivalent to the special variable $`. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.pre_match #=> "T" + */ + +VALUE +rb_reg_match_pre(VALUE match) +{ + VALUE str; + struct re_registers *regs; + + if (NIL_P(match)) return Qnil; + match_check(match); + regs = RMATCH_REGS(match); + if (BEG(0) == -1) return Qnil; + str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0)); + if (OBJ_TAINTED(match)) OBJ_TAINT(str); + return str; +} + + +/* + * call-seq: + * mtch.post_match => str + * + * Returns the portion of the original string after the current match. + * Equivalent to the special variable $'. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.post_match #=> ": The Movie" + */ + +VALUE +rb_reg_match_post(VALUE match) +{ + VALUE str; + long pos; + struct re_registers *regs; + + if (NIL_P(match)) return Qnil; + match_check(match); + regs = RMATCH_REGS(match); + if (BEG(0) == -1) return Qnil; + str = RMATCH(match)->str; + pos = END(0); + str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); + if (OBJ_TAINTED(match)) OBJ_TAINT(str); + return str; +} + +VALUE +rb_reg_match_last(VALUE match) +{ + int i; + struct re_registers *regs; + + if (NIL_P(match)) return Qnil; + match_check(match); + regs = RMATCH_REGS(match); + if (BEG(0) == -1) return Qnil; + + for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--) + ; + if (i == 0) return Qnil; + return rb_reg_nth_match(i, match); +} + +static VALUE +last_match_getter(void) +{ + return rb_reg_last_match(rb_backref_get()); +} + +static VALUE +prematch_getter(void) +{ + return rb_reg_match_pre(rb_backref_get()); +} + +static VALUE +postmatch_getter(void) +{ + return rb_reg_match_post(rb_backref_get()); +} + +static VALUE +last_paren_match_getter(void) +{ + return rb_reg_match_last(rb_backref_get()); +} + +static VALUE +match_array(VALUE match, int start) +{ + struct re_registers *regs = RMATCH_REGS(match); + VALUE ary = rb_ary_new2(regs->num_regs); + VALUE target = RMATCH(match)->str; + int i; + int taint = OBJ_TAINTED(match); + + match_check(match); + for (i=start; inum_regs; i++) { + if (regs->beg[i] == -1) { + rb_ary_push(ary, Qnil); + } + else { + VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]); + if (taint) OBJ_TAINT(str); + rb_ary_push(ary, str); + } + } + return ary; +} + + +/* [MG]:FIXME: I put parens around the /.../.match() in the first line of the + second example to prevent the '*' followed by a '/' from ending the + comment. */ + +/* + * call-seq: + * mtch.to_a => anArray + * + * Returns the array of matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * + * Because to_a is called when expanding + * *variable, there's a useful assignment + * shortcut for extracting matched fields. This is slightly slower than + * accessing the fields directly (as an intermediate array is + * generated). + * + * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) + * all #=> "HX1138" + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + */ + +static VALUE +match_to_a(VALUE match) +{ + return match_array(match, 0); +} + + +/* + * call-seq: + * mtch.captures => array + * + * Returns the array of captures; equivalent to mtch.to_a[1..-1]. + * + * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + * f4 #=> "8" + */ +static VALUE +match_captures(VALUE match) +{ + return match_array(match, 1); +} + +static int +name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end) +{ + int num; + + num = onig_name_to_backref_number(RREGEXP(regexp)->ptr, + (const unsigned char* )name, (const unsigned char* )name_end, regs); + if (num >= 1) { + return num; + } + else { + VALUE s = rb_str_new(name, (long )(name_end - name)); + rb_raise(rb_eIndexError, "undefined group name reference: %s", + StringValuePtr(s)); + } +} + +/* + * call-seq: + * mtch[i] => str or nil + * mtch[start, length] => array + * mtch[range] => array + * mtch[name] => str or nil + * + * Match Reference---MatchData acts as an array, and may be + * accessed using the normal array indexing techniques. mtch[0] is + * equivalent to the special variable $&, and returns the entire + * matched string. mtch[1], mtch[2], and so on return the values + * of the matched backreferences (portions of the pattern between parentheses). + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m #=> # + * m[0] #=> "HX1138" + * m[1, 2] #=> ["H", "X"] + * m[1..3] #=> ["H", "X", "113"] + * m[-3, 2] #=> ["X", "113"] + * + * m = /(?a+)b/.match("ccaaab") + * m #=> # + * m["foo"] #=> "aaa" + * m[:foo] #=> "aaa" + */ + +static VALUE +match_aref(int argc, VALUE *argv, VALUE match) +{ + VALUE idx, rest; + + match_check(match); + rb_scan_args(argc, argv, "11", &idx, &rest); + + if (NIL_P(rest)) { + if (FIXNUM_P(idx)) { + if (FIX2INT(idx) >= 0) { + return rb_reg_nth_match(FIX2INT(idx), match); + } + } + else { + const char *p; + int num; + + switch (TYPE(idx)) { + case T_SYMBOL: + p = rb_id2name(SYM2ID(idx)); + goto name_to_backref; + break; + case T_STRING: + p = StringValuePtr(idx); + + name_to_backref: + num = name_to_backref_number(RMATCH_REGS(match), + RMATCH(match)->regexp, p, p + strlen(p)); + return rb_reg_nth_match(num, match); + break; + + default: + break; + } + } + } + + return rb_ary_aref(argc, argv, match_to_a(match)); +} + +static VALUE +match_entry(VALUE match, long n) +{ + return rb_reg_nth_match(n, match); +} + + +/* + * call-seq: + * + * mtch.values_at([index]*) => array + * + * Uses each index to access the matching values, returning an array of + * the corresponding matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"] + */ + +static VALUE +match_values_at(int argc, VALUE *argv, VALUE match) +{ + struct re_registers *regs = RMATCH_REGS(match); + match_check(match); + return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry); +} + + +/* + * call-seq: + * mtch.to_s => str + * + * Returns the entire matched string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_s #=> "HX1138" + */ + +static VALUE +match_to_s(VALUE match) +{ + VALUE str = rb_reg_last_match(match); + + match_check(match); + if (NIL_P(str)) str = rb_str_new(0,0); + if (OBJ_TAINTED(match)) OBJ_TAINT(str); + if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); + return str; +} + + +/* + * call-seq: + * mtch.string => str + * + * Returns a frozen copy of the string passed in to match. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.string #=> "THX1138." + */ + +static VALUE +match_string(VALUE match) +{ + match_check(match); + return RMATCH(match)->str; /* str is frozen */ +} + +struct backref_name_tag { + const UChar *name; + long len; +}; + +static int +match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg0) +{ + struct backref_name_tag *arg = (struct backref_name_tag *)arg0; + int i; + + for (i = 0; i < back_num; i++) { + arg[back_refs[i]].name = name; + arg[back_refs[i]].len = name_end - name; + } + return 0; +} + +/* + * call-seq: + * mtch.inspect => str + * + * Returns a printable version of mtch. + * + * puts /.$/.match("foo").inspect + * #=> # + * + * puts /(.)(.)(.)/.match("foo").inspect + * #=> # + * + * puts /(.)(.)?(.)/.match("fo").inspect + * #=> # + * + * puts /(?.)(?.)(?.)/.match("hoge").inspect + * #=> # + * + */ + +static VALUE +match_inspect(VALUE match) +{ + const char *cname = rb_obj_classname(match); + VALUE str; + int i; + struct re_registers *regs = RMATCH_REGS(match); + int num_regs = regs->num_regs; + struct backref_name_tag *names; + VALUE regexp = RMATCH(match)->regexp; + + if (regexp == 0) { + return rb_sprintf("#<%s:%p>", cname, (void*)match); + } + + names = ALLOCA_N(struct backref_name_tag, num_regs); + MEMZERO(names, struct backref_name_tag, num_regs); + + onig_foreach_name(RREGEXP(regexp)->ptr, + match_inspect_name_iter, names); + + str = rb_str_buf_new2("#<"); + rb_str_buf_cat2(str, cname); + + for (i = 0; i < num_regs; i++) { + VALUE v; + rb_str_buf_cat2(str, " "); + if (0 < i) { + if (names[i].name) + rb_str_buf_cat(str, (const char *)names[i].name, names[i].len); + else { + rb_str_catf(str, "%d", i); + } + rb_str_buf_cat2(str, ":"); + } + v = rb_reg_nth_match(i, match); + if (v == Qnil) + rb_str_buf_cat2(str, "nil"); + else + rb_str_buf_append(str, rb_str_inspect(v)); + } + rb_str_buf_cat2(str, ">"); + + return str; +} + +VALUE rb_cRegexp; + +static int +read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err) +{ + const char *p = *pp; + int code; + int meta_prefix = 0, ctrl_prefix = 0; + int len; + int retbyte; + + retbyte = -1; + if (p == end || *p++ != '\\') { + errcpy(err, "too short escaped multibyte character"); + return -1; + } + +again: + if (p == end) { + errcpy(err, "too short escape sequence"); + return -1; + } + switch (*p++) { + case '\\': code = '\\'; break; + case 'n': code = '\n'; break; + case 't': code = '\t'; break; + case 'r': code = '\r'; break; + case 'f': code = '\f'; break; + case 'v': code = '\013'; break; + case 'a': code = '\007'; break; + case 'e': code = '\033'; break; + + /* \OOO */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + p--; + code = ruby_scan_oct(p, end < p+3 ? end-p : 3, &len); + p += len; + break; + + case 'x': /* \xHH */ + code = ruby_scan_hex(p, end < p+2 ? end-p : 2, &len); + if (len < 1) { + errcpy(err, "invalid hex escape"); + return -1; + } + p += len; + break; + + case 'M': /* \M-X, \M-\C-X, \M-\cX */ + if (meta_prefix) { + errcpy(err, "duplicate meta escape"); + return -1; + } + meta_prefix = 1; + if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) { + if (*p == '\\') { + p++; + goto again; + } + else { + code = *p++; + break; + } + } + errcpy(err, "too short meta escape"); + return -1; + + case 'C': /* \C-X, \C-\M-X */ + if (p == end || *p++ != '-') { + errcpy(err, "too short control escape"); + return -1; + } + case 'c': /* \cX, \c\M-X */ + if (ctrl_prefix) { + errcpy(err, "duplicate control escape"); + return -1; + } + ctrl_prefix = 1; + if (p < end && (*p & 0x80) == 0) { + if (*p == '\\') { + p++; + goto again; + } + else { + code = *p++; + break; + } + } + errcpy(err, "too short control escape"); + return -1; + + default: + errcpy(err, "unexpected escape sequence"); + return -1; + } + if (code < 0 || 0xff < code) { + errcpy(err, "invalid escape code"); + return -1; + } + + if (ctrl_prefix) + code &= 0x1f; + if (meta_prefix) + code |= 0x80; + + *pp = p; + return code; +} + +static int +unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc, + VALUE buf, rb_encoding **encp, onig_errmsg_buffer err) +{ + const char *p = *pp; + int chmaxlen = rb_enc_mbmaxlen(enc); + char *chbuf = ALLOCA_N(char, chmaxlen); + int chlen = 0; + int byte; + int l; + + memset(chbuf, 0, chmaxlen); + + byte = read_escaped_byte(&p, end, err); + if (byte == -1) { + return -1; + } + + chbuf[chlen++] = byte; + while (chlen < chmaxlen && + MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) { + byte = read_escaped_byte(&p, end, err); + if (byte == -1) { + return -1; + } + chbuf[chlen++] = byte; + } + + l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc); + if (MBCLEN_INVALID_P(l)) { + errcpy(err, "invalid multibyte escape"); + return -1; + } + if (1 < chlen || (chbuf[0] & 0x80)) { + rb_str_buf_cat(buf, chbuf, chlen); + + if (*encp == 0) + *encp = enc; + else if (*encp != enc) { + errcpy(err, "escaped non ASCII character in UTF-8 regexp"); + return -1; + } + } + else { + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff); + rb_str_buf_cat(buf, escbuf, 4); + } + *pp = p; + return 0; +} + +static int +check_unicode_range(unsigned long code, onig_errmsg_buffer err) +{ + if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */ + 0x10ffff < code) { + errcpy(err, "invalid Unicode range"); + return -1; + } + return 0; +} + +static int +append_utf8(unsigned long uv, + VALUE buf, rb_encoding **encp, onig_errmsg_buffer err) +{ + if (check_unicode_range(uv, err) != 0) + return -1; + if (uv < 0x80) { + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv); + rb_str_buf_cat(buf, escbuf, 4); + } + else { + int len; + char utf8buf[6]; + len = rb_uv_to_utf8(utf8buf, uv); + rb_str_buf_cat(buf, utf8buf, len); + + if (*encp == 0) + *encp = rb_utf8_encoding(); + else if (*encp != rb_utf8_encoding()) { + errcpy(err, "UTF-8 character in non UTF-8 regexp"); + return -1; + } + } + return 0; +} + +static int +unescape_unicode_list(const char **pp, const char *end, + VALUE buf, rb_encoding **encp, onig_errmsg_buffer err) +{ + const char *p = *pp; + int has_unicode = 0; + unsigned long code; + int len; + + while (p < end && ISSPACE(*p)) p++; + + while (1) { + code = ruby_scan_hex(p, end-p, &len); + if (len == 0) + break; + if (6 < len) { /* max 10FFFF */ + errcpy(err, "invalid Unicode range"); + return -1; + } + p += len; + if (append_utf8(code, buf, encp, err) != 0) + return -1; + has_unicode = 1; + + while (p < end && ISSPACE(*p)) p++; + } + + if (has_unicode == 0) { + errcpy(err, "invalid Unicode list"); + return -1; + } + + *pp = p; + + return 0; +} + +static int +unescape_unicode_bmp(const char **pp, const char *end, + VALUE buf, rb_encoding **encp, onig_errmsg_buffer err) +{ + const char *p = *pp; + int len; + unsigned long code; + + if (end < p+4) { + errcpy(err, "invalid Unicode escape"); + return -1; + } + code = ruby_scan_hex(p, 4, &len); + if (len != 4) { + errcpy(err, "invalid Unicode escape"); + return -1; + } + if (append_utf8(code, buf, encp, err) != 0) + return -1; + *pp = p + 4; + return 0; +} + +static int +unescape_nonascii(const char *p, const char *end, rb_encoding *enc, + VALUE buf, rb_encoding **encp, int *has_property, + onig_errmsg_buffer err) +{ + char c; + char smallbuf[2]; + + while (p < end) { + int chlen = rb_enc_precise_mbclen(p, end, enc); + if (!MBCLEN_CHARFOUND_P(chlen)) { + errcpy(err, "invalid multibyte character"); + return -1; + } + chlen = MBCLEN_CHARFOUND_LEN(chlen); + if (1 < chlen || (*p & 0x80)) { + rb_str_buf_cat(buf, p, chlen); + p += chlen; + if (*encp == 0) + *encp = enc; + else if (*encp != enc) { + errcpy(err, "non ASCII character in UTF-8 regexp"); + return -1; + } + continue; + } + + switch (c = *p++) { + case '\\': + if (p == end) { + errcpy(err, "too short escape sequence"); + return -1; + } + switch (c = *p++) { + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */ + { + int octlen; + if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) { + /* backref or 7bit octal. + no need to unescape anyway. + re-escaping may break backref */ + goto escape_asis; + } + } + /* xxx: How about more than 199 subexpressions? */ + + case '0': /* \0, \0O, \0OO */ + + case 'x': /* \xHH */ + case 'c': /* \cX, \c\M-X */ + case 'C': /* \C-X, \C-\M-X */ + case 'M': /* \M-X, \M-\C-X, \M-\cX */ + p = p-2; + if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0) + return -1; + break; + + case 'u': + if (p == end) { + errcpy(err, "too short escape sequence"); + return -1; + } + if (*p == '{') { + /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */ + p++; + if (unescape_unicode_list(&p, end, buf, encp, err) != 0) + return -1; + if (p == end || *p++ != '}') { + errcpy(err, "invalid Unicode list"); + return -1; + } + break; + } + else { + /* \uHHHH */ + if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0) + return -1; + break; + } + + case 'p': /* \p{Hiragana} */ + if (!*encp) { + *has_property = 1; + } + goto escape_asis; + + default: /* \n, \\, \d, \9, etc. */ +escape_asis: + smallbuf[0] = '\\'; + smallbuf[1] = c; + rb_str_buf_cat(buf, smallbuf, 2); + break; + } + break; + + default: + rb_str_buf_cat(buf, &c, 1); + break; + } + } + + return 0; +} + +static VALUE +rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc, + rb_encoding **fixed_enc, onig_errmsg_buffer err) +{ + VALUE buf; + int has_property = 0; + + buf = rb_str_buf_new(0); + + if (rb_enc_asciicompat(enc)) + *fixed_enc = 0; + else { + *fixed_enc = enc; + rb_enc_associate(buf, enc); + } + + if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err) != 0) + return Qnil; + + if (has_property && !*fixed_enc) { + *fixed_enc = enc; + } + + if (*fixed_enc) { + rb_enc_associate(buf, *fixed_enc); + } + + return buf; +} + +VALUE +rb_reg_check_preprocess(VALUE str) +{ + rb_encoding *fixed_enc = 0; + onig_errmsg_buffer err = ""; + VALUE buf; + char *p, *end; + rb_encoding *enc; + + StringValue(str); + p = RSTRING_PTR(str); + end = p + RSTRING_LEN(str); + enc = rb_enc_get(str); + + buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err); + RB_GC_GUARD(str); + + if (buf == Qnil) { + return rb_reg_error_desc(str, 0, err); + } + return Qnil; +} + +static VALUE +rb_reg_preprocess_dregexp(VALUE ary) +{ + rb_encoding *fixed_enc = 0; + rb_encoding *regexp_enc = 0; + onig_errmsg_buffer err = ""; + int i; + VALUE result = 0; + + if (RARRAY_LEN(ary) == 0) { + rb_raise(rb_eArgError, "no arguments given"); + } + + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE str = RARRAY_PTR(ary)[i]; + VALUE buf; + char *p, *end; + rb_encoding *src_enc; + + StringValue(str); + p = RSTRING_PTR(str); + end = p + RSTRING_LEN(str); + src_enc = rb_enc_get(str); + + buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err); + + if (buf == Qnil) + rb_raise(rb_eArgError, "%s", err); + + if (fixed_enc != 0) { + if (regexp_enc != 0 && regexp_enc != fixed_enc) { + rb_raise(rb_eArgError, "encoding mismatch in dynamic regexp : %s and %s", + rb_enc_name(regexp_enc), rb_enc_name(fixed_enc)); + } + regexp_enc = fixed_enc; + } + + if (!result) + result = rb_str_new3(str); + else + rb_str_buf_append(result, str); + } + if (regexp_enc) { + rb_enc_associate(result, regexp_enc); + } + + return result; +} + +static int +rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc, + int options, onig_errmsg_buffer err) +{ + struct RRegexp *re = RREGEXP(obj); + VALUE unescaped; + rb_encoding *fixed_enc = 0; + rb_encoding *a_enc = rb_ascii8bit_encoding(); + + if (!OBJ_UNTRUSTED(obj) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify regexp"); + rb_check_frozen(obj); + if (FL_TEST(obj, REG_LITERAL)) + rb_raise(rb_eSecurityError, "can't modify literal regexp"); + if (re->ptr) + rb_raise(rb_eTypeError, "already initialized regexp"); + re->ptr = 0; + + if (rb_enc_dummy_p(enc)) { + errcpy(err, "can't make regexp with dummy encoding"); + return -1; + } + + unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err); + if (unescaped == Qnil) + return -1; + + if (fixed_enc) { + if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) || + (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) { + errcpy(err, "incompatible character encoding"); + return -1; + } + if (fixed_enc != a_enc) { + options |= ARG_ENCODING_FIXED; + enc = fixed_enc; + } + } + else if (!(options & ARG_ENCODING_FIXED)) { + enc = rb_usascii_encoding(); + } + + rb_enc_associate((VALUE)re, enc); + if ((options & ARG_ENCODING_FIXED) || fixed_enc) { + re->basic.flags |= KCODE_FIXED; + } + if (options & ARG_ENCODING_NONE) { + re->basic.flags |= REG_ENCODING_NONE; + } + + re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc, + options & ARG_REG_OPTION_MASK, err); + if (!re->ptr) return -1; + re->src = rb_enc_str_new(s, len, enc); + OBJ_FREEZE(re->src); + RB_GC_GUARD(unescaped); + return 0; +} + +static int +rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err) +{ + int ret; + rb_encoding *enc = rb_enc_get(str); + if (options & ARG_ENCODING_NONE) { + rb_encoding *ascii8bit = rb_ascii8bit_encoding(); + if (enc != ascii8bit) { + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); + return -1; + } + enc = ascii8bit; + } + } + ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc, + options, err); + RB_GC_GUARD(str); + return ret; +} + +static VALUE +rb_reg_s_alloc(VALUE klass) +{ + NEWOBJ(re, struct RRegexp); + OBJSETUP(re, klass, T_REGEXP); + + re->ptr = 0; + re->src = 0; + re->usecnt = 0; + + return (VALUE)re; +} + +VALUE +rb_reg_new_str(VALUE s, int options) +{ + VALUE re = rb_reg_s_alloc(rb_cRegexp); + onig_errmsg_buffer err = ""; + + if (rb_reg_initialize_str(re, s, options, err) != 0) { + rb_reg_raise_str(s, options, err); + } + + return re; +} + +VALUE +rb_reg_new_ary(VALUE ary, int opt) +{ + return rb_reg_new_str(rb_reg_preprocess_dregexp(ary), opt); +} + +VALUE +rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options) +{ + VALUE re = rb_reg_s_alloc(rb_cRegexp); + onig_errmsg_buffer err = ""; + + if (rb_reg_initialize(re, s, len, enc, options, err) != 0) { + rb_enc_reg_raise(s, len, enc, options, err); + } + + return re; +} + +VALUE +rb_reg_new(const char *s, long len, int options) +{ + return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options); +} + +VALUE +rb_reg_compile(VALUE str, int options) +{ + VALUE re = rb_reg_s_alloc(rb_cRegexp); + onig_errmsg_buffer err = ""; + + if (!str) str = rb_str_new(0,0); + if (rb_reg_initialize_str(re, str, options, err) != 0) { + rb_set_errinfo(rb_reg_error_desc(str, options, err)); + return Qnil; + } + FL_SET(re, REG_LITERAL); + return re; +} + +static VALUE reg_cache; + +VALUE +rb_reg_regcomp(VALUE str) +{ + volatile VALUE save_str = str; + if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str) + && ENCODING_GET(reg_cache) == ENCODING_GET(str) + && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0) + return reg_cache; + + return reg_cache = rb_reg_new_str(save_str, 0); +} + +/* + * call-seq: + * rxp.hash => fixnum + * + * Produce a hash based on the text and options of this regular expression. + */ + +static VALUE +rb_reg_hash(VALUE re) +{ + int hashval, len; + char *p; + + rb_reg_check(re); + hashval = RREGEXP(re)->ptr->options; + len = RREGEXP_SRC_LEN(re); + p = RREGEXP_SRC_PTR(re); + while (len--) { + hashval = hashval * 33 + *p++; + } + hashval = hashval + (hashval>>5); + + return INT2FIX(hashval); +} + + +/* + * call-seq: + * rxp == other_rxp => true or false + * rxp.eql?(other_rxp) => true or false + * + * Equality---Two regexps are equal if their patterns are identical, they have + * the same character set code, and their casefold? values are the + * same. + * + * /abc/ == /abc/x #=> false + * /abc/ == /abc/i #=> false + * /abc/ == /abc/n #=> false + * /abc/u == /abc/n #=> false + */ + +static VALUE +rb_reg_equal(VALUE re1, VALUE re2) +{ + if (re1 == re2) return Qtrue; + if (TYPE(re2) != T_REGEXP) return Qfalse; + rb_reg_check(re1); rb_reg_check(re2); + if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse; + if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse; + if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse; + if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse; + if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) { + return Qtrue; + } + return Qfalse; +} + +static VALUE +reg_operand(VALUE s, int check) +{ + if (SYMBOL_P(s)) { + return rb_sym_to_s(s); + } + else { + VALUE tmp = rb_check_string_type(s); + if (check && NIL_P(tmp)) { + rb_raise(rb_eTypeError, "can't convert %s to String", + rb_obj_classname(s)); + } + return tmp; + } +} + +static long +reg_match_pos(VALUE re, VALUE *strp, long pos) +{ + VALUE str = *strp; + + if (NIL_P(str)) { + rb_backref_set(Qnil); + return -1; + } + *strp = str = reg_operand(str, Qtrue); + if (pos != 0) { + if (pos < 0) { + VALUE l = rb_str_length(str); + pos += NUM2INT(l); + if (pos < 0) { + return pos; + } + } + pos = rb_str_offset(str, pos); + } + return rb_reg_search(re, str, pos, 0); +} + +/* + * call-seq: + * rxp =~ str => integer or nil + * + * Match---Matches rxp against str. + * + * /at/ =~ "input data" #=> 7 + * /ax/ =~ "input data" #=> nil + * + * If =~ is used with a regexp literal with named captures, + * captured strings (or nil) is assigned to local variables named by + * the capture names. + * + * /(?\w+)\s*=\s*(?\w+)/ =~ " x = y " + * p lhs #=> "x" + * p rhs #=> "y" + * + * If it is not matched, nil is assigned for the variables. + * + * /(?\w+)\s*=\s*(?\w+)/ =~ " x = " + * p lhs #=> nil + * p rhs #=> nil + * + * This assignment is implemented in the Ruby parser. + * The parser detects 'regexp-literal =~ expression' for the assignment. + * The regexp must be a literal without interpolation and placed at left hand side. + * + * The assignment is not occur if the regexp is not a literal. + * + * re = /(?\w+)\s*=\s*(?\w+)/ + * re =~ " x = y " + * p lhs # undefined local variable + * p rhs # undefined local variable + * + * A regexp interpolation, #{}, also disables + * the assignment. + * + * rhs_pat = /(?\w+)/ + * /(?\w+)\s*=\s*#{rhs_pat}/ =~ "x = y" + * p lhs # undefined local variable + * + * The assignment is not occur if the regexp is placed at right hand side. + * + * " x = y " =~ /(?\w+)\s*=\s*(?\w+)/ + * p lhs, rhs # undefined local variable + * + */ + +VALUE +rb_reg_match(VALUE re, VALUE str) +{ + long pos = reg_match_pos(re, &str, 0); + if (pos < 0) return Qnil; + pos = rb_str_sublen(str, pos); + return LONG2FIX(pos); +} + +/* + * call-seq: + * rxp === str => true or false + * + * Case Equality---Synonym for Regexp#=~ used in case statements. + * + * a = "HELLO" + * case a + * when /^[a-z]*$/; print "Lower case\n" + * when /^[A-Z]*$/; print "Upper case\n" + * else; print "Mixed case\n" + * end + * + * produces: + * + * Upper case + */ + +VALUE +rb_reg_eqq(VALUE re, VALUE str) +{ + long start; + + str = reg_operand(str, Qfalse); + if (NIL_P(str)) { + rb_backref_set(Qnil); + return Qfalse; + } + start = rb_reg_search(re, str, 0, 0); + if (start < 0) { + return Qfalse; + } + return Qtrue; +} + + +/* + * call-seq: + * ~ rxp => integer or nil + * + * Match---Matches rxp against the contents of $_. + * Equivalent to rxp =~ $_. + * + * $_ = "input data" + * ~ /at/ #=> 7 + */ + +VALUE +rb_reg_match2(VALUE re) +{ + long start; + VALUE line = rb_lastline_get(); + + if (TYPE(line) != T_STRING) { + rb_backref_set(Qnil); + return Qnil; + } + + start = rb_reg_search(re, line, 0, 0); + if (start < 0) { + return Qnil; + } + start = rb_str_sublen(line, start); + return LONG2FIX(start); +} + + +/* + * call-seq: + * rxp.match(str) => matchdata or nil + * rxp.match(str,pos) => matchdata or nil + * + * Returns a MatchData object describing the match, or + * nil if there was no match. This is equivalent to retrieving the + * value of the special variable $~ following a normal match. + * If the second parameter is present, it specifies the position in the string + * to begin the search. + * + * /(.)(.)(.)/.match("abc")[2] #=> "b" + * /(.)(.)/.match("abc", 1)[2] #=> "c" + * + * If a block is given, invoke the block with MatchData if match succeed, so + * that you can write + * + * pat.match(str) {|m| ...} + * + * instead of + * + * if m = pat.match(str) + * ... + * end + * + * The return value is a value from block execution in this case. + */ + +static VALUE +rb_reg_match_m(int argc, VALUE *argv, VALUE re) +{ + VALUE result, str, initpos; + long pos; + + if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) { + pos = NUM2LONG(initpos); + } + else { + pos = 0; + } + + pos = reg_match_pos(re, &str, pos); + if (pos < 0) { + rb_backref_set(Qnil); + return Qnil; + } + result = rb_backref_get(); + rb_match_busy(result); + if (!NIL_P(result) && rb_block_given_p()) { + return rb_yield(result); + } + return result; +} + +/* + * Document-method: compile + * + * Synonym for Regexp.new + */ + + +/* + * call-seq: + * Regexp.new(string [, options]) => regexp + * Regexp.new(regexp) => regexp + * Regexp.compile(string [, options]) => regexp + * Regexp.compile(regexp) => regexp + * + * Constructs a new regular expression from pattern, which can be either + * a String or a Regexp (in which case that regexp's + * options are propagated, and new options may not be specified (a change as of + * Ruby 1.8). If options is a Fixnum, it should be one or + * more of the constants Regexp::EXTENDED, + * Regexp::IGNORECASE, and Regexp::MULTILINE, + * or-ed together. Otherwise, if options is not + * nil, the regexp will be case insensitive. + * + * r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ + * r2 = Regexp.new('cat', true) #=> /cat/i + * r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x + * r4 = Regexp.new(r2) #=> /cat/i + */ + +static VALUE +rb_reg_initialize_m(int argc, VALUE *argv, VALUE self) +{ + onig_errmsg_buffer err = ""; + int flags = 0; + VALUE str; + rb_encoding *enc; + const char *ptr; + long len; + + if (argc == 0 || argc > 3) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + if (TYPE(argv[0]) == T_REGEXP) { + VALUE re = argv[0]; + + if (argc > 1) { + rb_warn("flags ignored"); + } + rb_reg_check(re); + flags = rb_reg_options(re); + ptr = RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); + enc = rb_enc_get(re); + if (rb_reg_initialize(self, ptr, len, enc, flags, err)) { + str = rb_enc_str_new(ptr, len, enc); + rb_reg_raise_str(str, flags, err); + } + } + else { + if (argc >= 2) { + if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]); + else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE; + } + enc = 0; + if (argc == 3 && !NIL_P(argv[2])) { + char *kcode = StringValuePtr(argv[2]); + if (kcode[0] == 'n' || kcode[1] == 'N') { + enc = rb_ascii8bit_encoding(); + flags |= ARG_ENCODING_NONE; + } + else { + rb_warn("encoding option is ignored - %s", kcode); + } + } + str = argv[0]; + ptr = StringValuePtr(str); + if (enc + ? rb_reg_initialize(self, ptr, RSTRING_LEN(str), enc, flags, err) + : rb_reg_initialize_str(self, str, flags, err)) { + rb_reg_raise_str(str, flags, err); + } + } + return self; +} + +VALUE +rb_reg_quote(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + char *s, *send, *t; + VALUE tmp; + int c, clen; + int ascii_only = rb_enc_str_asciionly_p(str); + + s = RSTRING_PTR(str); + send = s + RSTRING_LEN(str); + while (s < send) { + c = rb_enc_ascget(s, send, &clen, enc); + if (c == -1) { + s += mbclen(s, send, enc); + continue; + } + switch (c) { + case '[': case ']': case '{': case '}': + case '(': case ')': case '|': case '-': + case '*': case '.': case '\\': + case '?': case '+': case '^': case '$': + case ' ': case '#': + case '\t': case '\f': case '\v': case '\n': case '\r': + goto meta_found; + } + s += clen; + } + tmp = rb_str_new3(str); + if (ascii_only) { + rb_enc_associate(tmp, rb_usascii_encoding()); + } + return tmp; + + meta_found: + tmp = rb_str_new(0, RSTRING_LEN(str)*2); + if (ascii_only) { + rb_enc_associate(tmp, rb_usascii_encoding()); + } + else { + rb_enc_copy(tmp, str); + } + t = RSTRING_PTR(tmp); + /* copy upto metacharacter */ + memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str)); + t += s - RSTRING_PTR(str); + + while (s < send) { + c = rb_enc_ascget(s, send, &clen, enc); + if (c == -1) { + int n = mbclen(s, send, enc); + + while (n--) + *t++ = *s++; + continue; + } + s += clen; + switch (c) { + case '[': case ']': case '{': case '}': + case '(': case ')': case '|': case '-': + case '*': case '.': case '\\': + case '?': case '+': case '^': case '$': + case '#': + t += rb_enc_mbcput('\\', t, enc); + break; + case ' ': + t += rb_enc_mbcput('\\', t, enc); + t += rb_enc_mbcput(' ', t, enc); + continue; + case '\t': + t += rb_enc_mbcput('\\', t, enc); + t += rb_enc_mbcput('t', t, enc); + continue; + case '\n': + t += rb_enc_mbcput('\\', t, enc); + t += rb_enc_mbcput('n', t, enc); + continue; + case '\r': + t += rb_enc_mbcput('\\', t, enc); + t += rb_enc_mbcput('r', t, enc); + continue; + case '\f': + t += rb_enc_mbcput('\\', t, enc); + t += rb_enc_mbcput('f', t, enc); + continue; + case '\v': + t += rb_enc_mbcput('\\', t, enc); + t += rb_enc_mbcput('v', t, enc); + continue; + } + t += rb_enc_mbcput(c, t, enc); + } + rb_str_resize(tmp, t - RSTRING_PTR(tmp)); + OBJ_INFECT(tmp, str); + return tmp; +} + + +/* + * call-seq: + * Regexp.escape(str) => string + * Regexp.quote(str) => string + * + * Escapes any characters that would have special meaning in a regular + * expression. Returns a new escaped string, or self if no characters are + * escaped. For any string, + * Regexp.new(Regexp.escape(str))=~str will be true. + * + * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\. + * + */ + +static VALUE +rb_reg_s_quote(VALUE c, VALUE str) +{ + return rb_reg_quote(reg_operand(str, Qtrue)); +} + +int +rb_reg_options(VALUE re) +{ + int options; + + rb_reg_check(re); + options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK; + if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED; + if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE; + return options; +} + +VALUE +rb_check_regexp_type(VALUE re) +{ + return rb_check_convert_type(re, T_REGEXP, "Regexp", "to_regexp"); +} + +/* + * call-seq: + * Regexp.try_convert(obj) -> re or nil + * + * Try to convert obj into a Regexp, using to_regexp method. + * Returns converted regexp or nil if obj cannot be converted + * for any reason. + * + * Regexp.try_convert(/re/) #=> /re/ + * Regexp.try_convert("re") #=> nil + * + * o = Object.new + * Regexp.try_convert(o) #=> nil + * def o.to_regexp() /foo/ end + * Regexp.try_convert(o) #=> /foo/ + * + */ +static VALUE +rb_reg_s_try_convert(VALUE dummy, VALUE re) +{ + return rb_check_regexp_type(re); +} + +static VALUE +rb_reg_s_union(VALUE self, VALUE args0) +{ + long argc = RARRAY_LEN(args0); + + if (argc == 0) { + VALUE args[1]; + args[0] = rb_str_new2("(?!)"); + return rb_class_new_instance(1, args, rb_cRegexp); + } + else if (argc == 1) { + VALUE arg = rb_ary_entry(args0, 0); + VALUE re = rb_check_regexp_type(arg); + if (!NIL_P(re)) + return re; + else { + VALUE quoted; + quoted = rb_reg_s_quote(Qnil, arg); + return rb_reg_new_str(quoted, 0); + } + } + else { + int i; + VALUE source = rb_str_buf_new(0); + rb_encoding *result_enc; + + int has_asciionly = 0; + rb_encoding *has_ascii_compat_fixed = 0; + rb_encoding *has_ascii_incompat = 0; + + for (i = 0; i < argc; i++) { + volatile VALUE v; + VALUE e = rb_ary_entry(args0, i); + + if (0 < i) + rb_str_buf_cat_ascii(source, "|"); + + v = rb_check_regexp_type(e); + if (!NIL_P(v)) { + rb_encoding *enc = rb_enc_get(v); + if (!rb_enc_asciicompat(enc)) { + if (!has_ascii_incompat) + has_ascii_incompat = enc; + else if (has_ascii_incompat != enc) + rb_raise(rb_eArgError, "incompatible encodings: %s and %s", + rb_enc_name(has_ascii_incompat), rb_enc_name(enc)); + } + else if (rb_reg_fixed_encoding_p(v)) { + if (!has_ascii_compat_fixed) + has_ascii_compat_fixed = enc; + else if (has_ascii_compat_fixed != enc) + rb_raise(rb_eArgError, "incompatible encodings: %s and %s", + rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc)); + } + else { + has_asciionly = 1; + } + v = rb_reg_to_s(v); + } + else { + rb_encoding *enc; + StringValue(e); + enc = rb_enc_get(e); + if (!rb_enc_str_asciicompat_p(e)) { + if (!has_ascii_incompat) + has_ascii_incompat = enc; + else if (has_ascii_incompat != enc) + rb_raise(rb_eArgError, "incompatible encodings: %s and %s", + rb_enc_name(has_ascii_incompat), rb_enc_name(enc)); + } + else if (rb_enc_str_asciionly_p(e)) { + has_asciionly = 1; + } + else { + if (!has_ascii_compat_fixed) + has_ascii_compat_fixed = enc; + else if (has_ascii_compat_fixed != enc) + rb_raise(rb_eArgError, "incompatible encodings: %s and %s", + rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc)); + } + v = rb_reg_s_quote(Qnil, e); + } + if (has_ascii_incompat) { + if (has_asciionly) { + rb_raise(rb_eArgError, "ASCII incompatible encoding: %s", + rb_enc_name(has_ascii_incompat)); + } + if (has_ascii_compat_fixed) { + rb_raise(rb_eArgError, "incompatible encodings: %s and %s", + rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed)); + } + } + + if (i == 0) { + rb_enc_copy(source, v); + } + rb_str_append(source, v); + } + + if (has_ascii_incompat) { + result_enc = has_ascii_incompat; + } + else if (has_ascii_compat_fixed) { + result_enc = has_ascii_compat_fixed; + } + else { + result_enc = rb_ascii8bit_encoding(); + } + + rb_enc_associate(source, result_enc); + return rb_class_new_instance(1, &source, rb_cRegexp); + } +} + +/* + * call-seq: + * Regexp.union(pat1, pat2, ...) => new_regexp + * Regexp.union(pats_ary) => new_regexp + * + * Return a Regexp object that is the union of the given + * patterns, i.e., will match any of its parts. The patterns + * can be Regexp objects, in which case their options will be preserved, or + * Strings. If no patterns are given, returns /(?!)/. + * + * Regexp.union #=> /(?!)/ + * Regexp.union("penzance") #=> /penzance/ + * Regexp.union("a+b*c") #=> /a\+b\*c/ + * Regexp.union("skiing", "sledding") #=> /skiing|sledding/ + * Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/ + * Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/ + */ +static VALUE +rb_reg_s_union_m(VALUE self, VALUE args) +{ + VALUE v; + if (RARRAY_LEN(args) == 1 && + !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) { + return rb_reg_s_union(self, v); + } + return rb_reg_s_union(self, args); +} + +/* :nodoc: */ +static VALUE +rb_reg_init_copy(VALUE copy, VALUE re) +{ + onig_errmsg_buffer err = ""; + const char *s; + long len; + + if (copy == re) return copy; + rb_check_frozen(copy); + /* need better argument type check */ + if (!rb_obj_is_instance_of(re, rb_obj_class(copy))) { + rb_raise(rb_eTypeError, "wrong argument type"); + } + rb_reg_check(re); + s = RREGEXP_SRC_PTR(re); + len = RREGEXP_SRC_LEN(re); + if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re), err) != 0) { + rb_reg_raise(s, len, err, re); + } + return copy; +} + +VALUE +rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) +{ + VALUE val = 0; + char *p, *s, *e; + int no, clen; + rb_encoding *str_enc = rb_enc_get(str); + rb_encoding *src_enc = rb_enc_get(src); + int acompat = rb_enc_asciicompat(str_enc); +#define ASCGET(s,e,cl) (acompat ? (*cl=1,ISASCII(s[0])?s[0]:-1) : rb_enc_ascget(s, e, cl, str_enc)) + + p = s = RSTRING_PTR(str); + e = s + RSTRING_LEN(str); + + while (s < e) { + int c = ASCGET(s, e, &clen); + char *ss; + + if (c == -1) { + s += mbclen(s, e, str_enc); + continue; + } + ss = s; + s += clen; + + if (c != '\\' || s == e) continue; + + if (!val) { + val = rb_str_buf_new(ss-p); + } + rb_enc_str_buf_cat(val, p, ss-p, str_enc); + + c = ASCGET(s, e, &clen); + if (c == -1) { + s += mbclen(s, e, str_enc); + rb_enc_str_buf_cat(val, ss, s-ss, str_enc); + p = s; + continue; + } + s += clen; + + p = s; + switch (c) { + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) { + no = c - '0'; + } + else { + continue; + } + break; + + case 'k': + if (s < e && ASCGET(s, e, &clen) == '<') { + char *name, *name_end; + + name_end = name = s + clen; + while (name_end < e) { + c = ASCGET(name_end, e, &clen); + if (c == '>') break; + name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen; + } + if (name_end < e) { + no = name_to_backref_number(regs, regexp, name, name_end); + p = s = name_end + clen; + break; + } + else { + rb_raise(rb_eRuntimeError, "invalid group name reference format"); + } + } + + rb_enc_str_buf_cat(val, ss, s-ss, str_enc); + continue; + + case '0': + case '&': + no = 0; + break; + + case '`': + rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc); + continue; + + case '\'': + rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc); + continue; + + case '+': + no = regs->num_regs-1; + while (BEG(no) == -1 && no > 0) no--; + if (no == 0) continue; + break; + + case '\\': + rb_enc_str_buf_cat(val, s-clen, clen, str_enc); + continue; + + default: + rb_enc_str_buf_cat(val, ss, s-ss, str_enc); + continue; + } + + if (no >= 0) { + if (no >= regs->num_regs) continue; + if (BEG(no) == -1) continue; + rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc); + } + } + + if (!val) return str; + if (p < e) { + rb_enc_str_buf_cat(val, p, e-p, str_enc); + } + + return val; +} + +static VALUE +kcode_getter(void) +{ + rb_warn("variable $KCODE is no longer effective"); + return Qnil; +} + +static void +kcode_setter(VALUE val, ID id) +{ + rb_warn("variable $KCODE is no longer effective; ignored"); +} + +static VALUE +ignorecase_getter(void) +{ + rb_warn("variable $= is no longer effective"); + return Qfalse; +} + +static void +ignorecase_setter(VALUE val, ID id) +{ + rb_warn("variable $= is no longer effective; ignored"); +} + +static VALUE +match_getter(void) +{ + VALUE match = rb_backref_get(); + + if (NIL_P(match)) return Qnil; + rb_match_busy(match); + return match; +} + +static void +match_setter(VALUE val) +{ + if (!NIL_P(val)) { + Check_Type(val, T_MATCH); + } + rb_backref_set(val); +} + +/* + * call-seq: + * Regexp.last_match => matchdata + * Regexp.last_match(n) => str + * + * The first form returns the MatchData object generated by the + * last successful pattern match. Equivalent to reading the global variable + * $~. The second form returns the nth field in this + * MatchData object. + * n can be a string or symbol to reference a named capture. + * + * /c(.)t/ =~ 'cat' #=> 0 + * Regexp.last_match #=> # + * Regexp.last_match(0) #=> "cat" + * Regexp.last_match(1) #=> "a" + * Regexp.last_match(2) #=> nil + * + * /(?\w+)\s*=\s*(?\w+)/ =~ "var = val" + * Regexp.last_match #=> # + * Regexp.last_match(:lhs) #=> "var" + * Regexp.last_match(:rhs) #=> "val" + */ + +static VALUE +rb_reg_s_last_match(int argc, VALUE *argv) +{ + VALUE nth; + + if (argc > 0 && rb_scan_args(argc, argv, "01", &nth) == 1) { + VALUE match = rb_backref_get(); + int n; + if (NIL_P(match)) return Qnil; + n = match_backref_number(match, nth); + return rb_reg_nth_match(n, match); + } + return match_getter(); +} + +static void +re_warn(const char *s) +{ + rb_warn("%s", s); +} + +/* + * Document-class: Regexp + * + * A Regexp holds a regular expression, used to match a pattern + * against strings. Regexps are created using the /.../ and + * %r{...} literals, and by the Regexp::new + * constructor. + * + */ + +void +Init_Regexp(void) +{ + rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError); + + onigenc_set_default_caseconv_table((UChar*)casetable); + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); + onig_set_warn_func(re_warn); + onig_set_verb_warn_func(re_warn); + + rb_define_virtual_variable("$~", match_getter, match_setter); + rb_define_virtual_variable("$&", last_match_getter, 0); + rb_define_virtual_variable("$`", prematch_getter, 0); + rb_define_virtual_variable("$'", postmatch_getter, 0); + rb_define_virtual_variable("$+", last_paren_match_getter, 0); + + rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter); + rb_define_virtual_variable("$KCODE", kcode_getter, kcode_setter); + rb_define_virtual_variable("$-K", kcode_getter, kcode_setter); + + rb_cRegexp = rb_define_class("Regexp", rb_cObject); + rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc); + rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance, -1); + rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, 1); + rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, 1); + rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2); + rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1); + rb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1); + + rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1); + rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1); + rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0); + rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1); + rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1); + rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1); + rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1); + rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0); + rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1); + rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0); + rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0); + rb_define_method(rb_cRegexp, "source", rb_reg_source, 0); + rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0); + rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0); + rb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */ + rb_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0); + rb_define_method(rb_cRegexp, "names", rb_reg_names, 0); + rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0); + + rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE)); + rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND)); + rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE)); + + rb_global_variable(®_cache); + + rb_cMatch = rb_define_class("MatchData", rb_cObject); + rb_define_alloc_func(rb_cMatch, match_alloc); + rb_undef_method(CLASS_OF(rb_cMatch), "new"); + + rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1); + rb_define_method(rb_cMatch, "regexp", match_regexp, 0); + rb_define_method(rb_cMatch, "names", match_names, 0); + rb_define_method(rb_cMatch, "size", match_size, 0); + rb_define_method(rb_cMatch, "length", match_size, 0); + rb_define_method(rb_cMatch, "offset", match_offset, 1); + rb_define_method(rb_cMatch, "begin", match_begin, 1); + rb_define_method(rb_cMatch, "end", match_end, 1); + rb_define_method(rb_cMatch, "to_a", match_to_a, 0); + rb_define_method(rb_cMatch, "[]", match_aref, -1); + rb_define_method(rb_cMatch, "captures", match_captures, 0); + rb_define_method(rb_cMatch, "values_at", match_values_at, -1); + rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0); + rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0); + rb_define_method(rb_cMatch, "to_s", match_to_s, 0); + rb_define_method(rb_cMatch, "inspect", match_inspect, 0); + rb_define_method(rb_cMatch, "string", match_string, 0); +} diff --git a/regcomp.c b/regcomp.c new file mode 100644 index 0000000..cb54c44 --- /dev/null +++ b/regcomp.c @@ -0,0 +1,6311 @@ +/********************************************************************** + regcomp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; + +extern OnigCaseFoldType +onig_get_default_case_fold_flag(void) +{ + return OnigDefaultCaseFoldFlag; +} + +extern int +onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) +{ + OnigDefaultCaseFoldFlag = case_fold_flag; + return 0; +} + + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + +static UChar* +str_dup(UChar* s, UChar* end) +{ + int len = end - s; + + if (len > 0) { + UChar* r = (UChar* )xmalloc(len + 1); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, len); + r[len] = (UChar )0; + return r; + } + else return NULL; +} + +static void +swap_node(Node* a, Node* b) +{ + Node c; + c = *a; *a = *b; *b = c; + + if (NTYPE(a) == NT_STR) { + StrNode* sn = NSTR(a); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } + + if (NTYPE(b) == NT_STR) { + StrNode* sn = NSTR(b); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } +} + +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) +{ + if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) + return ONIG_INFINITE_DISTANCE; + else { + if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; + else return ONIG_INFINITE_DISTANCE; + } +} + +static OnigDistance +distance_multiply(OnigDistance d, int m) +{ + if (m == 0) return 0; + + if (d < ONIG_INFINITE_DISTANCE / m) + return d * m; + else + return ONIG_INFINITE_DISTANCE; +} + +static int +bitset_is_empty(BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { + if (bs[i] != 0) return 0; + } + return 1; +} + +#ifdef ONIG_DEBUG +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} +#endif + +extern int +onig_bbuf_init(BBuf* buf, int size) +{ + if (size <= 0) { + size = 0; + buf->p = NULL; + } + else { + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + } + + buf->alloc = size; + buf->used = 0; + return 0; +} + + +#ifdef USE_SUBEXP_CALL + +static int +unset_addr_list_init(UnsetAddrList* uslist, int size) +{ + UnsetAddr* p; + + p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + CHECK_NULL_RETURN_MEMERR(p); + uslist->num = 0; + uslist->alloc = size; + uslist->us = p; + return 0; +} + +static void +unset_addr_list_end(UnsetAddrList* uslist) +{ + if (IS_NOT_NULL(uslist->us)) + xfree(uslist->us); +} + +static int +unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +{ + UnsetAddr* p; + int size; + + if (uslist->num >= uslist->alloc) { + size = uslist->alloc * 2; + p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); + CHECK_NULL_RETURN_MEMERR(p); + uslist->alloc = size; + uslist->us = p; + } + + uslist->us[uslist->num].offset = offset; + uslist->us[uslist->num].target = node; + uslist->num++; + return 0; +} +#endif /* USE_SUBEXP_CALL */ + + +static int +add_opcode(regex_t* reg, int opcode) +{ + BBUF_ADD1(reg, opcode); + return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +static int +add_state_check_num(regex_t* reg, int num) +{ + StateCheckNumType n = (StateCheckNumType )num; + + BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); + return 0; +} +#endif + +static int +add_rel_addr(regex_t* reg, int addr) +{ + RelAddrType ra = (RelAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_RELADDR); + return 0; +} + +static int +add_abs_addr(regex_t* reg, int addr) +{ + AbsAddrType ra = (AbsAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_ABSADDR); + return 0; +} + +static int +add_length(regex_t* reg, int len) +{ + LengthType l = (LengthType )len; + + BBUF_ADD(reg, &l, SIZE_LENGTH); + return 0; +} + +static int +add_mem_num(regex_t* reg, int num) +{ + MemNumType n = (MemNumType )num; + + BBUF_ADD(reg, &n, SIZE_MEMNUM); + return 0; +} + +static int +add_pointer(regex_t* reg, void* addr) +{ + PointerType ptr = (PointerType )addr; + + BBUF_ADD(reg, &ptr, SIZE_POINTER); + return 0; +} + +static int +add_option(regex_t* reg, OnigOptionType option) +{ + BBUF_ADD(reg, &option, SIZE_OPTION); + return 0; +} + +static int +add_opcode_rel_addr(regex_t* reg, int opcode, int addr) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_rel_addr(reg, addr); + return r; +} + +static int +add_bytes(regex_t* reg, UChar* bytes, int len) +{ + BBUF_ADD(reg, bytes, len); + return 0; +} + +static int +add_bitset(regex_t* reg, BitSetRef bs) +{ + BBUF_ADD(reg, bs, SIZE_BITSET); + return 0; +} + +static int +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_option(reg, option); + return r; +} + +static int compile_length_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg); + + +#define IS_NEED_STR_LEN_OP_EXACT(op) \ + ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ + (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) + +static int +select_str_opcode(int mb_len, int str_len, int ignore_case) +{ + int op; + + if (ignore_case) { + switch (str_len) { + case 1: op = OP_EXACT1_IC; break; + default: op = OP_EXACTN_IC; break; + } + } + else { + switch (mb_len) { + case 1: + switch (str_len) { + case 1: op = OP_EXACT1; break; + case 2: op = OP_EXACT2; break; + case 3: op = OP_EXACT3; break; + case 4: op = OP_EXACT4; break; + case 5: op = OP_EXACT5; break; + default: op = OP_EXACTN; break; + } + break; + + case 2: + switch (str_len) { + case 1: op = OP_EXACTMB2N1; break; + case 2: op = OP_EXACTMB2N2; break; + case 3: op = OP_EXACTMB2N3; break; + default: op = OP_EXACTMB2N; break; + } + break; + + case 3: + op = OP_EXACTMB3N; + break; + + default: + op = OP_EXACTMBN; + break; + } + } + return op; +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +{ + int r; + int saved_num_null_check = reg->num_null_check; + + if (empty_info != 0) { + r = add_opcode(reg, OP_NULL_CHECK_START); + if (r) return r; + r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ + if (r) return r; + reg->num_null_check++; + } + + r = compile_tree(node, reg); + if (r) return r; + + if (empty_info != 0) { + if (empty_info == NQ_TARGET_IS_EMPTY) + r = add_opcode(reg, OP_NULL_CHECK_END); + else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); + else if (empty_info == NQ_TARGET_IS_EMPTY_REC) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + + if (r) return r; + r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ + } + return r; +} + +#ifdef USE_SUBEXP_CALL +static int +compile_call(CallNode* node, regex_t* reg) +{ + int r; + + r = add_opcode(reg, OP_CALL); + if (r) return r; + r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), + node->target); + if (r) return r; + r = add_abs_addr(reg, 0 /*dummy addr.*/); + return r; +} +#endif + +static int +compile_tree_n_times(Node* node, int n, regex_t* reg) +{ + int i, r; + + for (i = 0; i < n; i++) { + r = compile_tree(node, reg); + if (r) return r; + } + return 0; +} + +static int +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, + regex_t* reg ARG_UNUSED, int ignore_case) +{ + int len; + int op = select_str_opcode(mb_len, str_len, ignore_case); + + len = SIZE_OPCODE; + + if (op == OP_EXACTMBN) len += SIZE_LENGTH; + if (IS_NEED_STR_LEN_OP_EXACT(op)) + len += SIZE_LENGTH; + + len += mb_len * str_len; + return len; +} + +static int +add_compile_string(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int op = select_str_opcode(mb_len, str_len, ignore_case); + add_opcode(reg, op); + + if (op == OP_EXACTMBN) + add_length(reg, mb_len); + + if (IS_NEED_STR_LEN_OP_EXACT(op)) { + if (op == OP_EXACTN_IC) + add_length(reg, mb_len * str_len); + else + add_length(reg, str_len); + } + + add_bytes(reg, s, mb_len * str_len); + return 0; +} + + +static int +compile_length_string_node(Node* node, regex_t* reg) +{ + int rlen, r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev; + StrNode* sn; + + sn = NSTR(node); + if (sn->end <= sn->s) + return 0; + + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enclen(enc, p, sn->end); + p += prev_len; + slen = 1; + rlen = 0; + + for (; p < sn->end; ) { + len = enclen(enc, p, sn->end); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + prev = p; + slen = 1; + prev_len = len; + } + p += len; + } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + return rlen; +} + +static int +compile_length_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +compile_string_node(Node* node, regex_t* reg) +{ + int r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev, *end; + StrNode* sn; + + sn = NSTR(node); + if (sn->end <= sn->s) + return 0; + + end = sn->end; + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enclen(enc, p, end); + p += prev_len; + slen = 1; + + for (; p < end; ) { + len = enclen(enc, p, end); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string(prev, prev_len, slen, reg, ambig); + if (r) return r; + + prev = p; + slen = 1; + prev_len = len; + } + + p += len; + } + return add_compile_string(prev, prev_len, slen, reg, ambig); +} + +static int +compile_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) +{ +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + add_length(reg, mbuf->used); + return add_bytes(reg, mbuf->p, mbuf->used); +#else + int r, pad_size; + UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; + + GET_ALIGNMENT_PAD_SIZE(p, pad_size); + add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + + r = add_bytes(reg, mbuf->p, mbuf->used); + + /* padding for return value from compile_length_cclass_node() to be fix. */ + pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + return r; +#endif +} + +static int +compile_length_cclass_node(CClassNode* cc, regex_t* reg) +{ + int len; + + if (IS_NCCLASS_SHARE(cc)) { + len = SIZE_OPCODE + SIZE_POINTER; + return len; + } + + if (IS_NULL(cc->mbuf)) { + len = SIZE_OPCODE + SIZE_BITSET; + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + len = SIZE_OPCODE; + } + else { + len = SIZE_OPCODE + SIZE_BITSET; + } +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + len += SIZE_LENGTH + cc->mbuf->used; +#else + len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); +#endif + } + + return len; +} + +static int +compile_cclass_node(CClassNode* cc, regex_t* reg) +{ + int r; + + if (IS_NCCLASS_SHARE(cc)) { + add_opcode(reg, OP_CCLASS_NODE); + r = add_pointer(reg, cc); + return r; + } + + if (IS_NULL(cc->mbuf)) { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_NOT); + else + add_opcode(reg, OP_CCLASS); + + r = add_bitset(reg, cc->bs); + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MB_NOT); + else + add_opcode(reg, OP_CCLASS_MB); + + r = add_multi_byte_cclass(cc->mbuf, reg); + } + else { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MIX_NOT); + else + add_opcode(reg, OP_CCLASS_MIX); + + r = add_bitset(reg, cc->bs); + if (r) return r; + r = add_multi_byte_cclass(cc->mbuf, reg); + } + } + + return r; +} + +static int +entry_repeat_range(regex_t* reg, int id, int lower, int upper) +{ +#define REPEAT_RANGE_ALLOC 4 + + OnigRepeatRange* p; + + if (reg->repeat_range_alloc == 0) { + p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); + CHECK_NULL_RETURN_MEMERR(p); + reg->repeat_range = p; + reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; + } + else if (reg->repeat_range_alloc <= id) { + int n; + n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; + p = (OnigRepeatRange* )xrealloc(reg->repeat_range, + sizeof(OnigRepeatRange) * n); + CHECK_NULL_RETURN_MEMERR(p); + reg->repeat_range = p; + reg->repeat_range_alloc = n; + } + else { + p = reg->repeat_range; + } + + p[id].lower = lower; + p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); + return 0; +} + +static int +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, + regex_t* reg) +{ + int r; + int num_repeat = reg->num_repeat; + + r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + reg->num_repeat++; + if (r) return r; + r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); + if (r) return r; + + r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); + if (r) return r; + + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + + if ( +#ifdef USE_SUBEXP_CALL + reg->num_call > 0 || +#endif + IS_QUANTIFIER_IN_REPEAT(qn)) { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); + } + else { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); + } + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + return r; +} + +static int +is_anychar_star_quantifier(QtfrNode* qn) +{ + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + NTYPE(qn->target) == NT_CANY) + return 1; + else + return 0; +} + +#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 +#define CKN_ON (ckn > 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int len, mod_tlen, cklen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); + + /* anychar repeat */ + if (NTYPE(qn->target) == NT_CANY) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) + len = SIZE_OP_JUMP; + else + len = 0; + + len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; + } + else { + if (qn->lower == 0) + len = SIZE_OP_JUMP; + else + len = 0; + + len += mod_tlen + SIZE_OP_PUSH + cklen; + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + else + len = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + len = SIZE_OP_STATE_CHECK_PUSH + tlen; + } + else { + len = SIZE_OP_PUSH + tlen; + } + } + else { + len = tlen; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + if (CKN_ON) + len += SIZE_OP_STATE_CHECK; + } + + return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int r, mod_tlen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + if (CKN_ON) { + r = add_state_check_num(reg, ckn); + if (r) return r; + } + + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + } + else { + if (IS_MULTILINE(reg->options)) { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_ML_STAR + : OP_ANYCHAR_ML_STAR)); + } + else { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_STAR + : OP_ANYCHAR_STAR)); + } + if (r) return r; + if (CKN_ON) + r = add_state_check_num(reg, ckn); + + return r; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) { + r = add_opcode_rel_addr(reg, OP_JUMP, + (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); + if (r) return r; + } + + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + } + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); + } + else { + if (qn->lower == 0) { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + } + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, + -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); + } + else + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else + r = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, tlen); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, tlen); + } + if (r) return r; + } + + r = compile_tree(qn->target, reg); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + } + + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + if (CKN_ON) { + if (r) return r; + r = add_opcode(reg, OP_STATE_CHECK); + if (r) return r; + r = add_state_check_num(reg, ckn); + } + } + return r; +} + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int len, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + /* anychar repeat */ + if (NTYPE(qn->target) == NT_CANY) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact)) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + len = SIZE_OP_JUMP; + } + else { + len = tlen * qn->lower; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; + else if (IS_NOT_NULL(qn->next_head_exact)) + len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; + else + len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; + } + else + len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + len = tlen * qn->lower; + len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + } + + return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int i, r, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact)) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + } + else { + if (IS_MULTILINE(reg->options)) + return add_opcode(reg, OP_ANYCHAR_ML_STAR); + else + return add_opcode(reg, OP_ANYCHAR_STAR); + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); + else if (IS_NOT_NULL(qn->next_head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); + else + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); + } + if (r) return r; + } + else { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + } + else if (IS_NOT_NULL(qn->next_head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); + } + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + int n = qn->upper - qn->lower; + + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + for (i = 0; i < n; i++) { + r = add_opcode_rel_addr(reg, OP_PUSH, + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + } + return r; +} +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_option_node(EncloseNode* node, regex_t* reg) +{ + int tlen; + OnigOptionType prev = reg->options; + + reg->options = node->option; + tlen = compile_length_tree(node->target, reg); + reg->options = prev; + + if (tlen < 0) return tlen; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + + tlen + SIZE_OP_SET_OPTION; + } + else + return tlen; +} + +static int +compile_option_node(EncloseNode* node, regex_t* reg) +{ + int r; + OnigOptionType prev = reg->options; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + if (r) return r; + r = add_opcode(reg, OP_FAIL); + if (r) return r; + } + + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + } + return r; +} + +static int +compile_length_enclose_node(EncloseNode* node, regex_t* reg) +{ + int len; + int tlen; + + if (node->type == ENCLOSE_OPTION) + return compile_length_option_node(node, reg); + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + else + tlen = 0; + + switch (node->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + len = SIZE_OP_MEMORY_START_PUSH + tlen + + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + len = SIZE_OP_MEMORY_START_PUSH; + else + len = SIZE_OP_MEMORY_START; + + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + } + break; + + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); + tlen = compile_length_tree(qn->target, reg); + if (tlen < 0) return tlen; + + len = tlen * qn->lower + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + } + else { + len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return len; +} + +static int get_char_length_tree(Node* node, regex_t* reg, int* len); + +static int +compile_enclose_node(EncloseNode* node, regex_t* reg) +{ + int r, len; + + if (node->type == ENCLOSE_OPTION) + return compile_option_node(node, reg); + + switch (node->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r) return r; + node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + node->state |= NST_ADDR_FIXED; + r = add_abs_addr(reg, (int )node->call_addr); + if (r) return r; + len = compile_length_tree(node->target, reg); + len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) return r; + } +#endif + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + r = add_opcode(reg, OP_MEMORY_START_PUSH); + else + r = add_opcode(reg, OP_MEMORY_START); + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + else + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); + + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = add_opcode(reg, OP_RETURN); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH); + else + r = add_opcode(reg, OP_MEMORY_END); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } + break; + + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + len = compile_length_tree(qn->target, reg); + if (len < 0) return len; + + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + } + else { + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_anchor_node(AnchorNode* node, regex_t* reg) +{ + int len; + int tlen = 0; + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + + switch (node->type) { + case ANCHOR_PREC_READ: + len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + break; + case ANCHOR_PREC_READ_NOT: + len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + break; + case ANCHOR_LOOK_BEHIND: + len = SIZE_OP_LOOK_BEHIND + tlen; + break; + case ANCHOR_LOOK_BEHIND_NOT: + len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + break; + + default: + len = SIZE_OPCODE; + break; + } + + return len; +} + +static int +compile_anchor_node(AnchorNode* node, regex_t* reg) +{ + int r, len; + + switch (node->type) { + case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; + case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; + case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; + case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; + case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; + case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + + case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; + case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; + case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; +#endif + + case ANCHOR_PREC_READ: + r = add_opcode(reg, OP_PUSH_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_POS); + break; + + case ANCHOR_PREC_READ_NOT: + len = compile_length_tree(node->target, reg); + if (len < 0) return len; + r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_POS); + break; + + case ANCHOR_LOOK_BEHIND: + { + int n; + r = add_opcode(reg, OP_LOOK_BEHIND); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + int n; + len = compile_length_tree(node->target, reg); + r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, + len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_tree(Node* node, regex_t* reg) +{ + int len, type, r; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + len = 0; + do { + r = compile_length_tree(NCAR(node), reg); + if (r < 0) return r; + len += r; + } while (IS_NOT_NULL(node = NCDR(node))); + r = len; + break; + + case NT_ALT: + { + int n; + + n = r = 0; + do { + r += compile_length_tree(NCAR(node), reg); + n++; + } while (IS_NOT_NULL(node = NCDR(node))); + r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); + } + break; + + case NT_STR: + if (NSTRING_IS_RAW(node)) + r = compile_length_string_raw_node(NSTR(node), reg); + else + r = compile_length_string_node(node, reg); + break; + + case NT_CCLASS: + r = compile_length_cclass_node(NCCLASS(node), reg); + break; + + case NT_CTYPE: + case NT_CANY: + r = SIZE_OPCODE; + break; + + case NT_BREF: + { + BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + r = SIZE_OP_CALL; + break; +#endif + + case NT_QTFR: + r = compile_length_quantifier_node(NQTFR(node), reg); + break; + + case NT_ENCLOSE: + r = compile_length_enclose_node(NENCLOSE(node), reg); + break; + + case NT_ANCHOR: + r = compile_length_anchor_node(NANCHOR(node), reg); + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_tree(Node* node, regex_t* reg) +{ + int n, type, len, pos, r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + do { + r = compile_tree(NCAR(node), reg); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + Node* x = node; + len = 0; + do { + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { + len += SIZE_OP_PUSH + SIZE_OP_JUMP; + } + } while (IS_NOT_NULL(x = NCDR(x))); + pos = reg->used + len; /* goal position */ + + do { + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + if (r) break; + } + r = compile_tree(NCAR(node), reg); + if (r) break; + if (IS_NOT_NULL(NCDR(node))) { + len = pos - (reg->used + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) break; + } + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_STR: + if (NSTRING_IS_RAW(node)) + r = compile_string_raw_node(NSTR(node), reg); + else + r = compile_string_node(node, reg); + break; + + case NT_CCLASS: + r = compile_cclass_node(NCCLASS(node), reg); + break; + + case NT_CTYPE: + { + int op; + + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; + break; + default: + return ONIGERR_TYPE_BUG; + break; + } + r = add_opcode(reg, op); + } + break; + + case NT_CANY: + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); + break; + + case NT_BREF: + { + BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); + if (r) return r; + r = add_mem_num(reg, n); + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } + } + } + else { + int i; + int* p; + + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } + if (r) return r; + +#ifdef USE_BACKREF_WITH_LEVEL + add_bacref_mems: +#endif + r = add_length(reg, br->back_num); + if (r) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r) return r; + } + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + r = compile_call(NCALL(node), reg); + break; +#endif + + case NT_QTFR: + r = compile_quantifier_node(NQTFR(node), reg); + break; + + case NT_ENCLOSE: + r = compile_enclose_node(NENCLOSE(node), reg); + break; + + case NT_ANCHOR: + r = compile_anchor_node(NANCHOR(node), reg); + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); +#endif + break; + } + + return r; +} + +#ifdef USE_NAMED_GROUP + +static int +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +{ + int r = 0; + Node* node = *plink; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = noname_disable_map(&(NCAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + { + Node** ptarget = &(NQTFR(node)->target); + Node* old = *ptarget; + r = noname_disable_map(ptarget, map, counter); + if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { + onig_reduce_nested_quantifier(node, *ptarget); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + if (IS_ENCLOSE_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + r = noname_disable_map(&(en->target), map, counter); + } + else { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + } + } + else + r = noname_disable_map(&(en->target), map, counter); + } + break; + + default: + break; + } + + return r; +} + +static int +renumber_node_backref(Node* node, GroupNumRemap* map) +{ + int i, pos, n, old_num; + int *backs; + BRefNode* bn = NBREF(node); + + if (! IS_BACKREF_NAME_REF(bn)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + + old_num = bn->back_num; + if (IS_NULL(bn->back_dynamic)) + backs = bn->back_static; + else + backs = bn->back_dynamic; + + for (i = 0, pos = 0; i < old_num; i++) { + n = map[backs[i]].new_val; + if (n > 0) { + backs[pos] = n; + pos++; + } + } + + bn->back_num = pos; + return 0; +} + +static int +renumber_by_map(Node* node, GroupNumRemap* map) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = renumber_by_map(NCAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + case NT_QTFR: + r = renumber_by_map(NQTFR(node)->target, map); + break; + case NT_ENCLOSE: + r = renumber_by_map(NENCLOSE(node)->target, map); + break; + + case NT_BREF: + r = renumber_node_backref(node, map); + break; + + default: + break; + } + + return r; +} + +static int +numbered_ref_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = numbered_ref_check(NCAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + case NT_QTFR: + r = numbered_ref_check(NQTFR(node)->target); + break; + case NT_ENCLOSE: + r = numbered_ref_check(NENCLOSE(node)->target); + break; + + case NT_BREF: + if (! IS_BACKREF_NAME_REF(NBREF(node))) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + break; + + default: + break; + } + + return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ + int r, i, pos, counter; + BitStatusType loc; + GroupNumRemap* map; + + map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); + CHECK_NULL_RETURN_MEMERR(map); + for (i = 1; i <= env->num_mem; i++) { + map[i].new_val = 0; + } + counter = 0; + r = noname_disable_map(root, map, &counter); + if (r != 0) return r; + + r = renumber_by_map(*root, map); + if (r != 0) return r; + + for (i = 1, pos = 1; i <= env->num_mem; i++) { + if (map[i].new_val > 0) { + SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + pos++; + } + } + + loc = env->capture_history; + BIT_STATUS_CLEAR(env->capture_history); + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(loc, i)) { + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + } + } + + env->num_mem = env->num_named; + reg->num_mem = env->num_named; + + return onig_renumber_name_table(reg, map); +} +#endif /* USE_NAMED_GROUP */ + +#ifdef USE_SUBEXP_CALL +static int +unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +{ + int i, offset; + EncloseNode* en; + AbsAddrType addr; + + for (i = 0; i < uslist->num; i++) { + en = NENCLOSE(uslist->us[i].target); + if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + addr = en->call_addr; + offset = uslist->us[i].offset; + + BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); + } + return 0; +} +#endif + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +static int +quantifiers_memory_node_info(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + { + int v; + do { + v = quantifiers_memory_node_info(NCAR(node)); + if (v > r) r = v; + } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ + } + else + r = quantifiers_memory_node_info(NCALL(node)->target); + break; +#endif + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->upper != 0) { + r = quantifiers_memory_node_info(qn->target); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: + return NQ_TARGET_IS_EMPTY_MEM; + break; + + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = quantifiers_memory_node_info(en->target); + break; + default: + break; + } + } + break; + + case NT_BREF: + case NT_STR: + case NT_CTYPE: + case NT_CCLASS: + case NT_CANY: + case NT_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ + OnigDistance tmin; + int r = 0; + + *min = 0; + switch (NTYPE(node)) { + case NT_BREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + if (br->state & NST_RECURSION) break; + + backs = BACKREFS_P(br); + if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[0]], min, env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[i]], &tmin, env); + if (r != 0) break; + if (*min > tmin) *min = tmin; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + EncloseNode* en = NENCLOSE(NCALL(node)->target); + if (IS_ENCLOSE_MIN_FIXED(en)) + *min = en->min_len; + } + else + r = get_min_match_length(NCALL(node)->target, min, env); + break; +#endif + + case NT_LIST: + do { + r = get_min_match_length(NCAR(node), &tmin, env); + if (r == 0) *min += tmin; + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + Node *x, *y; + y = node; + do { + x = NCAR(y); + r = get_min_match_length(x, &tmin, env); + if (r != 0) break; + if (y == node) *min = tmin; + else if (*min > tmin) *min = tmin; + } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + *min = sn->end - sn->s; + } + break; + + case NT_CTYPE: + *min = 1; + break; + + case NT_CCLASS: + case NT_CANY: + *min = 1; + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + + if (qn->lower > 0) { + r = get_min_match_length(qn->target, min, env); + if (r == 0) + *min = distance_multiply(*min, qn->lower); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_MIN_FIXED(en)) + *min = en->min_len; + else { + r = get_min_match_length(en->target, min, env); + if (r == 0) { + en->min_len = *min; + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_min_match_length(en->target, min, env); + break; + } + } + break; + + case NT_ANCHOR: + default: + break; + } + + return r; +} + +static int +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) +{ + OnigDistance tmax; + int r = 0; + + *max = 0; + switch (NTYPE(node)) { + case NT_LIST: + do { + r = get_max_match_length(NCAR(node), &tmax, env); + if (r == 0) + *max = distance_add(*max, tmax); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + do { + r = get_max_match_length(NCAR(node), &tmax, env); + if (r == 0 && *max < tmax) *max = tmax; + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + *max = sn->end - sn->s; + } + break; + + case NT_CTYPE: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case NT_CCLASS: + case NT_CANY: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case NT_BREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + if (br->state & NST_RECURSION) { + *max = ONIG_INFINITE_DISTANCE; + break; + } + backs = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_max_match_length(nodes[backs[i]], &tmax, env); + if (r != 0) break; + if (*max < tmax) *max = tmax; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_max_match_length(NCALL(node)->target, max, env); + else + *max = ONIG_INFINITE_DISTANCE; + break; +#endif + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + + if (qn->upper != 0) { + r = get_max_match_length(qn->target, max, env); + if (r == 0 && *max != 0) { + if (! IS_REPEAT_INFINITE(qn->upper)) + *max = distance_multiply(*max, qn->upper); + else + *max = ONIG_INFINITE_DISTANCE; + } + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_MAX_FIXED(en)) + *max = en->max_len; + else { + r = get_max_match_length(en->target, max, env); + if (r == 0) { + en->max_len = *max; + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_max_match_length(en->target, max, env); + break; + } + } + break; + + case NT_ANCHOR: + default: + break; + } + + return r; +} + +#define GET_CHAR_LEN_VARLEN -1 +#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 + +/* fixed size pattern node only */ +static int +get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +{ + int tlen; + int r = 0; + + level++; + *len = 0; + switch (NTYPE(node)) { + case NT_LIST: + do { + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + if (r == 0) + *len = distance_add(*len, tlen); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + int tlen2; + int varlen = 0; + + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); + if (r == 0) { + if (tlen != tlen2) + varlen = 1; + } + } + if (r == 0) { + if (varlen != 0) { + if (level == 1) + r = GET_CHAR_LEN_TOP_ALT_VARLEN; + else + r = GET_CHAR_LEN_VARLEN; + } + else + *len = tlen; + } + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + UChar *s = sn->s; + while (s < sn->end) { + s += enclen(reg->enc, s, sn->end); + (*len)++; + } + } + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->lower == qn->upper) { + r = get_char_length_tree1(qn->target, reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } + else + r = GET_CHAR_LEN_VARLEN; + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_char_length_tree1(NCALL(node)->target, reg, len, level); + else + r = GET_CHAR_LEN_VARLEN; + break; +#endif + + case NT_CTYPE: + *len = 1; + break; + + case NT_CCLASS: + case NT_CANY: + *len = 1; + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_char_length_tree1(en->target, reg, len, level); + break; + default: + break; + } + } + break; + + case NT_ANCHOR: + break; + + default: + r = GET_CHAR_LEN_VARLEN; + break; + } + + return r; +} + +static int +get_char_length_tree(Node* node, regex_t* reg, int* len) +{ + return get_char_length_tree1(node, reg, len, 0); +} + +/* x is not included y ==> 1 : 0 */ +static int +is_not_included(Node* x, Node* y, regex_t* reg) +{ + int i, len; + OnigCodePoint code; + UChar *p, c; + int ytype; + + retry: + ytype = NTYPE(y); + switch (NTYPE(x)) { + case NT_CTYPE: + { + switch (ytype) { + case NT_CTYPE: + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not) + return 1; + else + return 0; + break; + + case NT_CCLASS: + swap: + { + Node* tmp; + tmp = x; x = y; y = tmp; + goto retry; + } + break; + + case NT_STR: + goto swap; + break; + + default: + break; + } + } + break; + + case NT_CCLASS: + { + CClassNode* xc = NCCLASS(x); + switch (ytype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (IS_CODE_SB_WORD(reg->enc, i)) return 0; + } + } + return 1; + } + return 0; + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_NCCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } + } + } + return 1; + } + break; + + default: + break; + } + break; + + case NT_CCLASS: + { + int v; + CClassNode* yc = NCCLASS(y); + + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + v = BITSET_AT(xc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { + v = BITSET_AT(yc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) + return 0; + } + } + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) + return 1; + return 0; + } + break; + + case NT_STR: + goto swap; + break; + + default: + break; + } + } + break; + + case NT_STR: + { + StrNode* xs = NSTR(x); + if (NSTRING_LEN(x) == 0) + break; + + c = *(xs->s); + switch (ytype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); + break; + default: + break; + } + break; + + case NT_CCLASS: + { + CClassNode* cc = NCCLASS(y); + + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + } + break; + + case NT_STR: + { + UChar *q; + StrNode* ys = NSTR(y); + len = NSTRING_LEN(x); + if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); + if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + /* tiny version */ + return 0; + } + else { + for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { + if (*p != *q) return 1; + } + } + } + break; + + default: + break; + } + } + break; + + default: + break; + } + + return 0; +} + +static Node* +get_head_value_node(Node* node, int exact, regex_t* reg) +{ + Node* n = NULL_NODE; + + switch (NTYPE(node)) { + case NT_BREF: + case NT_ALT: + case NT_CANY: +#ifdef USE_SUBEXP_CALL + case NT_CALL: +#endif + break; + + case NT_CTYPE: + case NT_CCLASS: + if (exact == 0) { + n = node; + } + break; + + case NT_LIST: + n = get_head_value_node(NCAR(node), exact, reg); + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + + if (sn->end <= sn->s) + break; + + if (exact != 0 && + !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { + } + else { + n = node; + } + } + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->lower > 0) { + if (IS_NOT_NULL(qn->head_exact)) + n = qn->head_exact; + else + n = get_head_value_node(qn->target, exact, reg); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType options = reg->options; + + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); + reg->options = options; + } + break; + + case ENCLOSE_MEMORY: + case ENCLOSE_STOP_BACKTRACK: + n = get_head_value_node(en->target, exact, reg); + break; + } + } + break; + + case NT_ANCHOR: + if (NANCHOR(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node)->target, exact, reg); + break; + + default: + break; + } + + return n; +} + +static int +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) +{ + int type, r = 0; + + type = NTYPE(node); + if ((NTYPE2BIT(type) & type_mask) == 0) + return 1; + + switch (type) { + case NT_LIST: + case NT_ALT: + do { + r = check_type_tree(NCAR(node), type_mask, enclose_mask, + anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, + anchor_mask); + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + if ((en->type & enclose_mask) == 0) + return 1; + + r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); + } + break; + + case NT_ANCHOR: + type = NANCHOR(node)->type; + if ((type & anchor_mask) == 0) + return 1; + + if (NANCHOR(node)->target) + r = check_type_tree(NANCHOR(node)->target, + type_mask, enclose_mask, anchor_mask); + break; + + default: + break; + } + return r; +} + +#ifdef USE_SUBEXP_CALL + +#define RECURSION_EXIST 1 +#define RECURSION_INFINITE 2 + +static int +subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node *x; + OnigDistance min; + int ret; + + x = node; + do { + ret = subexp_inf_recursive_check(NCAR(x), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + ret = get_min_match_length(NCAR(x), &min, env); + if (ret != 0) return ret; + if (min != 0) head = 0; + } + } while (IS_NOT_NULL(x = NCDR(x))); + } + break; + + case NT_ALT: + { + int ret; + r = RECURSION_EXIST; + do { + ret = subexp_inf_recursive_check(NCAR(node), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r &= ret; + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); + if (r == RECURSION_EXIST) { + if (NQTFR(node)->lower == 0) r = 0; + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check(an->target, env, head); + break; + } + } + break; + + case NT_CALL: + r = subexp_inf_recursive_check(NCALL(node)->target, env, head); + break; + + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) + return 0; + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); + else { + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + +static int +subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + do { + r = subexp_inf_recursive_check_trav(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check_trav(an->target, env); + break; + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + if (IS_ENCLOSE_RECURSION(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_inf_recursive_check(en->target, env, 1); + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } + r = subexp_inf_recursive_check_trav(en->target, env); + } + + break; + + default: + break; + } + + return r; +} + +static int +subexp_recursive_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r |= subexp_recursive_check(NCAR(node)); + } while (IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = subexp_recursive_check(NQTFR(node)->target); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check(an->target); + break; + } + } + break; + + case NT_CALL: + r = subexp_recursive_check(NCALL(node)->target); + if (r != 0) SET_CALL_RECURSION(node); + break; + + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) + return 0; + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + return 1; /* recursion */ + else { + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NENCLOSE(node)->target); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + + +static int +subexp_recursive_check_trav(Node* node, ScanEnv* env) +{ +#define FOUND_CALLED_NODE 1 + + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + { + int ret; + do { + ret = subexp_recursive_check_trav(NCAR(node), env); + if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; + else if (ret < 0) return ret; + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + r = subexp_recursive_check_trav(NQTFR(node)->target, env); + if (NQTFR(node)->upper == 0) { + if (r == FOUND_CALLED_NODE) + NQTFR(node)->is_refered = 1; + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check_trav(an->target, env); + break; + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + if (! IS_ENCLOSE_RECURSION(en)) { + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_recursive_check(en->target); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } + } + r = subexp_recursive_check_trav(en->target, env); + if (IS_ENCLOSE_CALLED(en)) + r |= FOUND_CALLED_NODE; + } + break; + + default: + break; + } + + return r; +} + +static int +setup_subexp_call(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + do { + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + do { + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = setup_subexp_call(NQTFR(node)->target, env); + break; + case NT_ENCLOSE: + r = setup_subexp_call(NENCLOSE(node)->target, env); + break; + + case NT_CALL: + { + CallNode* cn = NCALL(node); + Node** nodes = SCANENV_MEM_NODES(env); + + if (cn->group_num != 0) { + int gnum = cn->group_num; + +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (gnum > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + +#ifdef USE_NAMED_GROUP + set_call_attr: +#endif + cn->target = nodes[cn->group_num]; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); + cn->unset_addr_list = env->unset_addr_list; + } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = setup_subexp_call(an->target, env); + break; + } + } + break; + + default: + break; + } + + return r; +} +#endif + +/* divide different length alternatives in look-behind. + (?<=A|B) ==> (?<=A)|(?<=B) + (? (?type; + + head = an->target; + np = NCAR(head); + swap_node(node, head); + NCAR(node) = head; + NANCHOR(head)->target = np; + + np = node; + while ((np = NCDR(np)) != NULL_NODE) { + insert_node = onig_node_new_anchor(anc_type); + CHECK_NULL_RETURN_MEMERR(insert_node); + NANCHOR(insert_node)->target = NCAR(np); + NCAR(np) = insert_node; + } + + if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { + np = node; + do { + SET_NTYPE(np, NT_LIST); /* alt -> list */ + } while ((np = NCDR(np)) != NULL_NODE); + } + return 0; +} + +static int +setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r, len; + AnchorNode* an = NANCHOR(node); + + r = get_char_length_tree(an->target, reg, &len); + if (r == 0) + an->char_len = len; + else if (r == GET_CHAR_LEN_VARLEN) + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) + r = divide_look_behind_alternatives(node); + else + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + return r; +} + +static int +next_setup(Node* node, Node* next_node, regex_t* reg) +{ + int type; + + retry: + type = NTYPE(node); + if (type == NT_QTFR) { + QtfrNode* qn = NQTFR(node); + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { +#ifdef USE_QTFR_PEEK_NEXT + Node* n = get_head_value_node(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + qn->next_head_exact = n; + } +#endif + /* automatic posseivation a*b ==> (?>a*)b */ + if (qn->lower <= 1) { + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node *x, *y; + x = get_head_value_node(qn->target, 0, reg); + if (IS_NOT_NULL(x)) { + y = get_head_value_node(next_node, 0, reg); + if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NENCLOSE(node)->target = en; + } + } + } + } + } + } + else if (type == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + node = en->target; + goto retry; + } + } + return 0; +} + + +static int +update_string_node_case_fold(regex_t* reg, Node *node) +{ + UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *sbuf, *ebuf, *sp; + int r, i, len, sbuf_size; + StrNode* sn = NSTR(node); + + end = sn->end; + sbuf_size = (end - sn->s) * 2; + sbuf = (UChar* )xmalloc(sbuf_size); + CHECK_NULL_RETURN_MEMERR(sbuf); + ebuf = sbuf + sbuf_size; + + sp = sbuf; + p = sn->s; + while (p < end) { + len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); + q = buf; + for (i = 0; i < len; i++) { + if (sp >= ebuf) { + sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); + CHECK_NULL_RETURN_MEMERR(sbuf); + sp = sbuf + sbuf_size; + sbuf_size *= 2; + ebuf = sbuf + sbuf_size; + } + + *sp++ = buf[i]; + } + } + + r = onig_node_str_set(node, sbuf, sp); + if (r != 0) { + xfree(sbuf); + return r; + } + + xfree(sbuf); + return 0; +} + +static int +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, + regex_t* reg) +{ + int r; + Node *node; + + node = onig_node_new_str(s, end); + if (IS_NULL(node)) return ONIGERR_MEMORY; + + r = update_string_node_case_fold(reg, node); + if (r != 0) { + onig_node_free(node); + return r; + } + + NSTRING_SET_AMBIG(node); + NSTRING_SET_DONT_GET_OPT_INFO(node); + *rnode = node; + return 0; +} + +static int +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], + UChar *p, int slen, UChar *end, + regex_t* reg, Node **rnode) +{ + int r, i, j, len, varlen; + Node *anode, *var_anode, *snode, *xnode, *an; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + + *rnode = var_anode = NULL_NODE; + + varlen = 0; + for (i = 0; i < item_num; i++) { + if (items[i].byte_len != slen) { + varlen = 1; + break; + } + } + + if (varlen != 0) { + *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(var_anode)) return ONIGERR_MEMORY; + + xnode = onig_node_new_list(NULL, NULL); + if (IS_NULL(xnode)) goto mem_err; + NCAR(var_anode) = xnode; + + anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) goto mem_err; + NCAR(xnode) = anode; + } + else { + *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) return ONIGERR_MEMORY; + } + + snode = onig_node_new_str(p, p + slen); + if (IS_NULL(snode)) goto mem_err; + + NCAR(anode) = snode; + + for (i = 0; i < item_num; i++) { + snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + + for (j = 0; j < items[i].code_len; j++) { + len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); + if (len < 0) { + r = len; + goto mem_err2; + } + + r = onig_node_str_cat(snode, buf, buf + len); + if (r != 0) goto mem_err2; + } + + an = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(an)) { + goto mem_err2; + } + + if (items[i].byte_len != slen) { + Node *rem; + UChar *q = p + items[i].byte_len; + + if (q < end) { + r = expand_case_fold_make_rem_string(&rem, q, end, reg); + if (r != 0) { + onig_node_free(an); + goto mem_err2; + } + + xnode = onig_node_list_add(NULL_NODE, snode); + if (IS_NULL(xnode)) { + onig_node_free(an); + onig_node_free(rem); + goto mem_err2; + } + if (IS_NULL(onig_node_list_add(xnode, rem))) { + onig_node_free(an); + onig_node_free(xnode); + onig_node_free(rem); + goto mem_err; + } + + NCAR(an) = xnode; + } + else { + NCAR(an) = snode; + } + + NCDR(var_anode) = an; + var_anode = an; + } + else { + NCAR(an) = snode; + NCDR(anode) = an; + anode = an; + } + } + + return varlen; + + mem_err2: + onig_node_free(snode); + + mem_err: + onig_node_free(*rnode); + + return ONIGERR_MEMORY; +} + +static int +expand_case_fold_string(Node* node, regex_t* reg) +{ +#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 + + int r, n, len, alt_num; + UChar *start, *end, *p; + Node *top_root, *root, *snode, *prev_node; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + StrNode* sn = NSTR(node); + + if (NSTRING_IS_AMBIG(node)) return 0; + + start = sn->s; + end = sn->end; + if (start >= end) return 0; + + r = 0; + top_root = root = prev_node = snode = NULL_NODE; + alt_num = 1; + p = start; + while (p < end) { + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, + p, end, items); + if (n < 0) { + r = n; + goto err; + } + + len = enclen(reg->enc, p, end); + + if (n == 0) { + if (IS_NULL(snode)) { + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + prev_node = snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } + } + + r = onig_node_str_cat(snode, p, p + len); + if (r != 0) goto err; + } + else { + alt_num *= (n + 1); + if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); + if (r < 0) goto mem_err; + if (r == 1) { + if (IS_NULL(root)) { + top_root = prev_node; + } + else { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + + root = NCAR(prev_node); + } + else { /* r == 0 */ + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + } + + snode = NULL_NODE; + } + + p += len; + } + + if (p < end) { + Node *srem; + + r = expand_case_fold_make_rem_string(&srem, p, end, reg); + if (r != 0) goto mem_err; + + if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(srem); + onig_node_free(prev_node); + goto mem_err; + } + } + + if (IS_NULL(root)) { + prev_node = srem; + } + else { + if (IS_NULL(onig_node_list_add(root, srem))) { + onig_node_free(srem); + goto mem_err; + } + } + } + + /* ending */ + top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); + swap_node(node, top_root); + onig_node_free(top_root); + return 0; + + mem_err: + r = ONIGERR_MEMORY; + + err: + onig_node_free(top_root); + return r; +} + + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define CEC_THRES_NUM_BIG_REPEAT 512 +#define CEC_INFINITE_NUM 0x7fffffff + +#define CEC_IN_INFINITE_REPEAT (1<<0) +#define CEC_IN_FINITE_REPEAT (1<<1) +#define CEC_CONT_BIG_REPEAT (1<<2) + +static int +setup_comb_exp_check(Node* node, int state, ScanEnv* env) +{ + int type; + int r = state; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_comb_exp_check(NCAR(node), r, env); + prev = NCAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_ALT: + { + int ret; + do { + ret = setup_comb_exp_check(NCAR(node), state, env); + r |= ret; + } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + { + int child_state = state; + int add_state = 0; + QtfrNode* qn = NQTFR(node); + Node* target = qn->target; + int var_num; + + if (! IS_REPEAT_INFINITE(qn->upper)) { + if (qn->upper > 1) { + /* {0,1}, {1,1} are allowed */ + child_state |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env->backrefed_mem == 0) { + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); + if (IS_REPEAT_INFINITE(q->upper) + && q->greedy == qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + if (qn->upper == 1) + child_state = state; + } + } + } + } + } + } + } + + if (state & CEC_IN_FINITE_REPEAT) { + qn->comb_exp_check_num = -1; + } + else { + if (IS_REPEAT_INFINITE(qn->upper)) { + var_num = CEC_INFINITE_NUM; + child_state |= CEC_IN_INFINITE_REPEAT; + } + else { + var_num = qn->upper - qn->lower; + } + + if (var_num >= CEC_THRES_NUM_BIG_REPEAT) + add_state |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && + var_num >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn->comb_exp_check_num == 0) { + env->num_comb_exp_check++; + qn->comb_exp_check_num = env->num_comb_exp_check; + if (env->curr_max_regnum > env->comb_exp_max_regnum) + env->comb_exp_max_regnum = env->curr_max_regnum; + } + } + } + + r = setup_comb_exp_check(target, child_state, env); + r |= add_state; + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_MEMORY: + { + if (env->curr_max_regnum < en->regnum) + env->curr_max_regnum = en->regnum; + + r = setup_comb_exp_check(en->target, state, env); + } + break; + + default: + r = setup_comb_exp_check(en->target, state, env); + break; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) + env->has_recursion = 1; + else + r = setup_comb_exp_check(NCALL(node)->target, state, env); + break; +#endif + + default: + break; + } + + return r; +} +#endif + +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) + +/* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_tree(NCAR(node), reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NCAR(node), reg); + } + prev = NCAR(node); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_ALT: + do { + r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_CCLASS: + break; + + case NT_STR: + if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + r = expand_case_fold_string(node, reg); + } + break; + + case NT_CTYPE: + case NT_CANY: + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + break; +#endif + + case NT_BREF: + { + int i; + int* p; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + p = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + } + } + break; + + case NT_QTFR: + { + OnigDistance d; + QtfrNode* qn = NQTFR(node); + Node* target = qn->target; + + if ((state & IN_REPEAT) != 0) { + qn->state |= NST_IN_REPEAT; + } + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + r = get_min_match_length(target, &d, env); + if (r) break; + if (d == 0) { + qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + r = quantifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } +#endif +#if 0 + r = get_max_match_length(target, &d, env); + if (r == 0 && d == 0) { + /* ()* ==> ()?, ()+ ==> () */ + qn->upper = 1; + if (qn->lower > 1) qn->lower = 1; + if (NTYPE(target) == NT_STR) { + qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ + } + } +#endif + } + } + + state |= IN_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + r = setup_tree(target, reg, state, env); + if (r) break; + + /* expand string */ +#define EXPAND_STRING_MAX_LENGTH 100 + if (NTYPE(target) == NT_STR) { + if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int len = NSTRING_LEN(target); + StrNode* sn = NSTR(target); + + if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int i, n = qn->lower; + onig_node_conv_to_str_node(node, NSTR(target)->flag); + for (i = 0; i < n; i++) { + r = onig_node_str_cat(node, sn->s, sn->end); + if (r) break; + } + onig_node_free(target); + break; /* break case NT_QTFR: */ + } + } + } + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT + if (qn->greedy && (qn->target_empty_info != 0)) { + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(qn->target, 1, reg); + } + } +#endif + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType options = reg->options; + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); + reg->options = options; + } + break; + + case ENCLOSE_MEMORY: + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ + } + r = setup_tree(en->target, reg, state, env); + break; + + case ENCLOSE_STOP_BACKTRACK: + { + Node* target = en->target; + r = setup_tree(target, reg, state, env); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + tqn->greedy != 0) { /* (?>a*), a*+ etc... */ + int qtype = NTYPE(tqn->target); + if (IS_NODE_TYPE_SIMPLE(qtype)) + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + } + } + } + break; + } + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + r = setup_tree(an->target, reg, state, env); + break; + case ANCHOR_PREC_READ_NOT: + r = setup_tree(an->target, reg, (state | IN_NOT), env); + break; + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ + BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) + +#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT 0 + +#define ALLOWED_ANCHOR_IN_LB \ +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +#define ALLOWED_ANCHOR_IN_LB_NOT \ +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) + + case ANCHOR_LOOK_BEHIND: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, state, env); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + } + break; + } + } + break; + + default: + break; + } + + return r; +} + +/* set skip map for Boyer-Moor search */ +static int +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + UChar skip[], int** int_skip) +{ + int i, len; + + len = end - s; + if (len < ONIG_CHAR_TABLE_SIZE) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + + for (i = 0; i < len - 1; i++) + skip[s[i]] = len - 1 - i; + } + else { + if (IS_NULL(*int_skip)) { + *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + } + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + + for (i = 0; i < len - 1; i++) + (*int_skip)[s[i]] = len - 1 - i; + } + return 0; +} + +#define OPT_EXACT_MAXLEN 24 + +typedef struct { + OnigDistance min; /* min byte length */ + OnigDistance max; /* max byte length */ +} MinMaxLen; + +typedef struct { + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigCaseFoldType case_fold_flag; + ScanEnv* scan_env; +} OptEnv; + +typedef struct { + int left_anchor; + int right_anchor; +} OptAncInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; +} OptExactInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; +} OptMapInfo; + +typedef struct { + MinMaxLen len; + + OptAncInfo anc; + OptExactInfo exb; /* boundary */ + OptExactInfo exm; /* middle */ + OptExactInfo expr; /* prec read (?=...) */ + + OptMapInfo map; /* boundary */ +} NodeOptInfo; + + +static int +map_position_value(OnigEncoding enc, int i) +{ + static const short int ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { + if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) + return 20; + else + return (int )ByteValTable[i]; + } + else + return 4; /* Take it easy. */ +} + +static int +distance_value(MinMaxLen* mm) +{ + /* 1000 / (min-max-dist + 1) */ + static const short int dist_vals[] = { + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 + }; + + int d; + + if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + + d = mm->max - mm->min; + if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) + /* return dist_vals[d] * 16 / (mm->min + 12); */ + return (int )dist_vals[d]; + else + return 1; +} + +static int +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +{ + if (v2 <= 0) return -1; + if (v1 <= 0) return 1; + + v1 *= distance_value(d1); + v2 *= distance_value(d2); + + if (v2 > v1) return 1; + if (v2 < v1) return -1; + + if (d2->min < d1->min) return 1; + if (d2->min > d1->min) return -1; + return 0; +} + +static int +is_equal_mml(MinMaxLen* a, MinMaxLen* b) +{ + return (a->min == b->min && a->max == b->max) ? 1 : 0; +} + + +static void +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) +{ + mml->min = min; + mml->max = max; +} + +static void +clear_mml(MinMaxLen* mml) +{ + mml->min = mml->max = 0; +} + +static void +copy_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = from->min; + to->max = from->max; +} + +static void +add_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = distance_add(to->min, from->min); + to->max = distance_add(to->max, from->max); +} + +#if 0 +static void +add_len_mml(MinMaxLen* to, OnigDistance len) +{ + to->min = distance_add(to->min, len); + to->max = distance_add(to->max, len); +} +#endif + +static void +alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +{ + if (to->min > from->min) to->min = from->min; + if (to->max < from->max) to->max = from->max; +} + +static void +copy_opt_env(OptEnv* to, OptEnv* from) +{ + *to = *from; +} + +static void +clear_opt_anc_info(OptAncInfo* anc) +{ + anc->left_anchor = 0; + anc->right_anchor = 0; +} + +static void +copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +{ + *to = *from; +} + +static void +concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, + OnigDistance left_len, OnigDistance right_len) +{ + clear_opt_anc_info(to); + + to->left_anchor = left->left_anchor; + if (left_len == 0) { + to->left_anchor |= right->left_anchor; + } + + to->right_anchor = right->right_anchor; + if (right_len == 0) { + to->right_anchor |= left->right_anchor; + } +} + +static int +is_left_anchor(int anc) +{ + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || + anc == ANCHOR_PREC_READ_NOT) + return 0; + + return 1; +} + +static int +is_set_opt_anc_info(OptAncInfo* to, int anc) +{ + if ((to->left_anchor & anc) != 0) return 1; + + return ((to->right_anchor & anc) != 0 ? 1 : 0); +} + +static void +add_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor |= anc; + else + to->right_anchor |= anc; +} + +static void +remove_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor &= ~anc; + else + to->right_anchor &= ~anc; +} + +static void +alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +{ + to->left_anchor &= add->left_anchor; + to->right_anchor &= add->right_anchor; +} + +static int +is_full_opt_exact_info(OptExactInfo* ex) +{ + return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +} + +static void +clear_opt_exact_info(OptExactInfo* ex) +{ + clear_mml(&ex->mmd); + clear_opt_anc_info(&ex->anc); + ex->reach_end = 0; + ex->ignore_case = 0; + ex->len = 0; + ex->s[0] = '\0'; +} + +static void +copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +{ + *to = *from; +} + +static void +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +{ + int i, j, len; + UChar *p, *end; + OptAncInfo tanc; + + if (! to->ignore_case && add->ignore_case) { + if (to->len >= add->len) return ; /* avoid */ + + to->ignore_case = 1; + } + + p = add->s; + end = p + add->len; + for (i = to->len; p < end; ) { + len = enclen(enc, p, end); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; + to->reach_end = (p == end ? add->reach_end : 0); + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); + if (! to->reach_end) tanc.right_anchor = 0; + copy_opt_anc_info(&to->anc, &tanc); +} + +static void +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) +{ + int i, j, len; + UChar *p; + + for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { + len = enclen(enc, p, end); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; +} + +static void +alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +{ + int i, j, len; + + if (add->len == 0 || to->len == 0) { + clear_opt_exact_info(to); + return ; + } + + if (! is_equal_mml(&to->mmd, &add->mmd)) { + clear_opt_exact_info(to); + return ; + } + + for (i = 0; i < to->len && i < add->len; ) { + if (to->s[i] != add->s[i]) break; + len = enclen(env->enc, to->s + i, to->s + to->len); + + for (j = 1; j < len; j++) { + if (to->s[i+j] != add->s[i+j]) break; + } + if (j < len) break; + i += len; + } + + if (! add->reach_end || i < add->len || i < to->len) { + to->reach_end = 0; + } + to->len = i; + to->ignore_case |= add->ignore_case; + + alt_merge_opt_anc_info(&to->anc, &add->anc); + if (! to->reach_end) to->anc.right_anchor = 0; +} + +static void +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +{ + int v1, v2; + + v1 = now->len; + v2 = alt->len; + + if (v2 == 0) { + return ; + } + else if (v1 == 0) { + copy_opt_exact_info(now, alt); + return ; + } + else if (v1 <= 2 && v2 <= 2) { + /* ByteValTable[x] is big value --> low price */ + v2 = map_position_value(enc, now->s[0]); + v1 = map_position_value(enc, alt->s[0]); + + if (now->len > 1) v1 += 5; + if (alt->len > 1) v2 += 5; + } + + if (now->ignore_case == 0) v1 *= 2; + if (alt->ignore_case == 0) v2 *= 2; + + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_exact_info(now, alt); +} + +static void +clear_opt_map_info(OptMapInfo* map) +{ + static const OptMapInfo clean_info = { + {0, 0}, {0, 0}, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + }; + + xmemcpy(map, &clean_info, sizeof(OptMapInfo)); +} + +static void +copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +{ + *to = *from; +} + +static void +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +{ + if (map->map[c] == 0) { + map->map[c] = 1; + map->value += map_position_value(enc, c); + } +} + +static int +add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, + OnigEncoding enc, OnigCaseFoldType case_fold_flag) +{ + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int i, n; + + add_char_opt_map_info(map, p[0], enc); + + case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + if (n < 0) return n; + + for (i = 0; i < n; i++) { + ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); + add_char_opt_map_info(map, buf[0], enc); + } + + return 0; +} + +static void +select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +{ + const int z = 1<<15; /* 32768: something big value */ + + int v1, v2; + + if (alt->value == 0) return ; + if (now->value == 0) { + copy_opt_map_info(now, alt); + return ; + } + + v1 = z / now->value; + v2 = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_map_info(now, alt); +} + +static int +comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +{ +#define COMP_EM_BASE 20 + int ve, vm; + + if (m->value <= 0) return -1; + + ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + vm = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +} + +static void +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +{ + int i, val; + + /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ + if (to->value == 0) return ; + if (add->value == 0 || to->mmd.max < add->mmd.min) { + clear_opt_map_info(to); + return ; + } + + alt_merge_mml(&to->mmd, &add->mmd); + + val = 0; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (add->map[i]) + to->map[i] = 1; + + if (to->map[i]) + val += map_position_value(enc, i); + } + to->value = val; + + alt_merge_opt_anc_info(&to->anc, &add->anc); +} + +static void +set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +{ + copy_mml(&(opt->exb.mmd), mmd); + copy_mml(&(opt->expr.mmd), mmd); + copy_mml(&(opt->map.mmd), mmd); +} + +static void +clear_node_opt_info(NodeOptInfo* opt) +{ + clear_mml(&opt->len); + clear_opt_anc_info(&opt->anc); + clear_opt_exact_info(&opt->exb); + clear_opt_exact_info(&opt->exm); + clear_opt_exact_info(&opt->expr); + clear_opt_map_info(&opt->map); +} + +static void +copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +{ + *to = *from; +} + +static void +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +{ + int exb_reach, exm_reach; + OptAncInfo tanc; + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); + copy_opt_anc_info(&to->anc, &tanc); + + if (add->exb.len > 0 && to->len.max == 0) { + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, + to->len.max, add->len.max); + copy_opt_anc_info(&add->exb.anc, &tanc); + } + + if (add->map.value > 0 && to->len.max == 0) { + if (add->map.mmd.max == 0) + add->map.anc.left_anchor |= to->anc.left_anchor; + } + + exb_reach = to->exb.reach_end; + exm_reach = to->exm.reach_end; + + if (add->len.max != 0) + to->exb.reach_end = to->exm.reach_end = 0; + + if (add->exb.len > 0) { + if (exb_reach) { + concat_opt_exact_info(&to->exb, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + else if (exm_reach) { + concat_opt_exact_info(&to->exm, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + } + select_opt_exact_info(enc, &to->exm, &add->exb); + select_opt_exact_info(enc, &to->exm, &add->exm); + + if (to->expr.len > 0) { + if (add->len.max > 0) { + if (to->expr.len > (int )add->len.max) + to->expr.len = add->len.max; + + if (to->expr.mmd.max == 0) + select_opt_exact_info(enc, &to->exb, &to->expr); + else + select_opt_exact_info(enc, &to->exm, &to->expr); + } + } + else if (add->expr.len > 0) { + copy_opt_exact_info(&to->expr, &add->expr); + } + + select_opt_map_info(&to->map, &add->map); + + add_mml(&to->len, &add->len); +} + +static void +alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +{ + alt_merge_opt_anc_info (&to->anc, &add->anc); + alt_merge_opt_exact_info(&to->exb, &add->exb, env); + alt_merge_opt_exact_info(&to->exm, &add->exm, env); + alt_merge_opt_exact_info(&to->expr, &add->expr, env); + alt_merge_opt_map_info(env->enc, &to->map, &add->map); + + alt_merge_mml(&to->len, &add->len); +} + + +#define MAX_NODE_OPT_INFO_REF_COUNT 5 + +static int +optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +{ + int type; + int r = 0; + + clear_node_opt_info(opt); + set_bound_node_opt_info(opt, &env->mmd); + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + OptEnv nenv; + NodeOptInfo nopt; + Node* nd = node; + + copy_opt_env(&nenv, env); + do { + r = optimize_node_left(NCAR(nd), &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); + } + break; + + case NT_ALT: + { + NodeOptInfo nopt; + Node* nd = node; + + do { + r = optimize_node_left(NCAR(nd), &nopt, env); + if (r == 0) { + if (nd == node) copy_node_opt_info(opt, &nopt); + else alt_merge_node_opt_info(opt, &nopt, env); + } + } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + int slen = sn->end - sn->s; + int is_raw = NSTRING_IS_RAW(node); + + if (! NSTRING_IS_AMBIG(node)) { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + NSTRING_IS_RAW(node), env->enc); + if (slen > 0) { + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + } + set_mml(&opt->len, slen, slen); + } + else { + int max; + + if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + int n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->case_fold_flag); + if (r != 0) break; + } + + max = slen; + } + + set_mml(&opt->len, slen, max); + } + + if (opt->exb.len == slen) + opt->exb.reach_end = 1; + } + break; + + case NT_CCLASS: + { + int i, z; + CClassNode* cc = NCCLASS(node); + + /* no need to check ignore case. (setted in setup_tree()) */ + + if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + set_mml(&opt->len, min, max); + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = BITSET_AT(cc->bs, i); + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + set_mml(&opt->len, 1, 1); + } + } + break; + + case NT_CTYPE: + { + int i, min, max; + + max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + if (max == 1) { + min = 1; + + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + break; + } + } + else { + min = ONIGENC_MBC_MINLEN(env->enc); + } + set_mml(&opt->len, min, max); + } + break; + + case NT_CANY: + { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + set_mml(&opt->len, min, max); + } + break; + + case NT_ANCHOR: + switch (NANCHOR(node)->type) { + case ANCHOR_BEGIN_BUF: + case ANCHOR_BEGIN_POSITION: + case ANCHOR_BEGIN_LINE: + case ANCHOR_END_BUF: + case ANCHOR_SEMI_END_BUF: + case ANCHOR_END_LINE: + add_opt_anc_info(&opt->anc, NANCHOR(node)->type); + break; + + case ANCHOR_PREC_READ: + { + NodeOptInfo nopt; + + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); + if (r == 0) { + if (nopt.exb.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exb); + else if (nopt.exm.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exm); + + opt->expr.reach_end = 0; + + if (nopt.map.value > 0) + copy_opt_map_info(&opt->map, &nopt.map); + } + } + break; + + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ + case ANCHOR_LOOK_BEHIND_NOT: + break; + } + break; + + case NT_BREF: + { + int i; + int* backs; + OnigDistance min, max, tmin, tmax; + Node** nodes = SCANENV_MEM_NODES(env->scan_env); + BRefNode* br = NBREF(node); + + if (br->state & NST_RECURSION) { + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; + } + backs = BACKREFS_P(br); + r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); + if (r != 0) break; + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; + } + if (r == 0) set_mml(&opt->len, min, max); + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + else { + OnigOptionType save = env->options; + env->options = NENCLOSE(NCALL(node)->target)->option; + r = optimize_node_left(NCALL(node)->target, opt, env); + env->options = save; + } + break; +#endif + + case NT_QTFR: + { + int i; + OnigDistance min, max; + NodeOptInfo nopt; + QtfrNode* qn = NQTFR(node); + + r = optimize_node_left(qn->target, &nopt, env); + if (r) break; + + if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (env->mmd.max == 0 && + NTYPE(qn->target) == NT_CANY && qn->greedy) { + if (IS_MULTILINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + else + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + } + } + else { + if (qn->lower > 0) { + copy_node_opt_info(opt, &nopt); + if (nopt.exb.len > 0) { + if (nopt.exb.reach_end) { + for (i = 2; i < qn->lower && + ! is_full_opt_exact_info(&opt->exb); i++) { + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); + } + if (i < qn->lower) { + opt->exb.reach_end = 0; + } + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + } + + min = distance_multiply(nopt.len.min, qn->lower); + if (IS_REPEAT_INFINITE(qn->upper)) + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + else + max = distance_multiply(nopt.len.max, qn->upper); + + set_mml(&opt->len, min, max); + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType save = env->options; + + env->options = en->option; + r = optimize_node_left(en->target, opt, env); + env->options = save; + } + break; + + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + en->opt_count++; + if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { + OnigDistance min, max; + + min = 0; + max = ONIG_INFINITE_DISTANCE; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; + set_mml(&opt->len, min, max); + } + else +#endif + { + r = optimize_node_left(en->target, opt, env); + + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + } + } + break; + + case ENCLOSE_STOP_BACKTRACK: + r = optimize_node_left(en->target, opt, env); + break; + } + } + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "optimize_node_left: undefined node type %d\n", + NTYPE(node)); +#endif + r = ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +{ + int r; + + if (e->len == 0) return 0; + + if (e->ignore_case) { + reg->exact = (UChar* )xmalloc(e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + xmemcpy(reg->exact, e->s, e->len); + reg->exact_end = reg->exact + e->len; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + } + else { + int allow_reverse; + + reg->exact = str_dup(e->s, e->s + e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + reg->exact_end = reg->exact + e->len; + + allow_reverse = + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + + if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { + r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, + reg->map, &(reg->int_map)); + if (r) return r; + + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + } + else { + reg->optimize = ONIG_OPTIMIZE_EXACT; + } + } + + reg->dmin = e->mmd.min; + reg->dmax = e->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + } + + return 0; +} + +static void +set_optimize_map_info(regex_t* reg, OptMapInfo* m) +{ + int i; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + reg->map[i] = m->map[i]; + + reg->optimize = ONIG_OPTIMIZE_MAP; + reg->dmin = m->mmd.min; + reg->dmax = m->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + 1; + } +} + +static void +set_sub_anchor(regex_t* reg, OptAncInfo* anc) +{ + reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; +} + +#ifdef ONIG_DEBUG +static void print_optimize_info(FILE* f, regex_t* reg); +#endif + +static int +set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) +{ + + int r; + NodeOptInfo opt; + OptEnv env; + + env.enc = reg->enc; + env.options = reg->options; + env.case_fold_flag = reg->case_fold_flag; + env.scan_env = scan_env; + clear_mml(&env.mmd); + + r = optimize_node_left(node, &opt, &env); + if (r) return r; + + reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + + if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { + reg->anchor_dmin = opt.len.min; + reg->anchor_dmax = opt.len.max; + } + + if (opt.exb.len > 0 || opt.exm.len > 0) { + select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + goto set_map; + } + else { + r = set_optimize_exact_info(reg, &opt.exb); + set_sub_anchor(reg, &opt.exb.anc); + } + } + else if (opt.map.value > 0) { + set_map: + set_optimize_map_info(reg, &opt.map); + set_sub_anchor(reg, &opt.map.anc); + } + else { + reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + if (opt.len.max == 0) + reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + } + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) + print_optimize_info(stderr, reg); +#endif + return r; +} + +static void +clear_optimize_info(regex_t* reg) +{ + reg->optimize = ONIG_OPTIMIZE_NONE; + reg->anchor = 0; + reg->anchor_dmin = 0; + reg->anchor_dmax = 0; + reg->sub_anchor = 0; + reg->exact_end = (UChar* )NULL; + reg->threshold_len = 0; + if (IS_NOT_NULL(reg->exact)) { + xfree(reg->exact); + reg->exact = (UChar* )NULL; + } +} + +#ifdef ONIG_DEBUG + +static void print_enc_string(FILE* fp, OnigEncoding enc, + const UChar *s, const UChar *end) +{ + fprintf(fp, "\nPATTERN: /"); + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + const UChar *p; + OnigCodePoint code; + + p = s; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + fprintf(fp, " 0x%04x ", (int )code); + } + else { + fputc((int )code, fp); + } + + p += enclen(enc, p); + } + } + else { + while (s < end) { + fputc((int )*s, fp); + s++; + } + } + + fprintf(fp, "/\n"); +} + +static void +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) +{ + if (a == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", a); + + fputs("-", f); + + if (b == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", b); +} + +static void +print_anchor(FILE* f, int anchor) +{ + int q = 0; + + fprintf(f, "["); + + if (anchor & ANCHOR_BEGIN_BUF) { + fprintf(f, "begin-buf"); + q = 1; + } + if (anchor & ANCHOR_BEGIN_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-line"); + } + if (anchor & ANCHOR_BEGIN_POSITION) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-pos"); + } + if (anchor & ANCHOR_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-buf"); + } + if (anchor & ANCHOR_SEMI_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "semi-end-buf"); + } + if (anchor & ANCHOR_END_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-line"); + } + if (anchor & ANCHOR_ANYCHAR_STAR) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "anychar-star"); + } + if (anchor & ANCHOR_ANYCHAR_STAR_ML) { + if (q) fprintf(f, ", "); + fprintf(f, "anychar-star-pl"); + } + + fprintf(f, "]"); +} + +static void +print_optimize_info(FILE* f, regex_t* reg) +{ + static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; + + fprintf(f, "optimize: %s\n", on[reg->optimize]); + fprintf(f, " anchor: "); print_anchor(f, reg->anchor); + if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) + print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); + fprintf(f, "\n"); + + if (reg->optimize) { + fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + + if (reg->exact) { + UChar *p; + fprintf(f, "exact: ["); + for (p = reg->exact; p < reg->exact_end; p++) { + fputc(*p, f); + } + fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); + } + else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + int c, i, n = 0; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + if (reg->map[i]) n++; + + fprintf(f, "map: n=%d\n", n); + if (n > 0) { + c = 0; + fputc('[', f); + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (reg->map[i] != 0) { + if (c > 0) fputs(", ", f); + c++; + if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) + fputc(i, f); + else + fprintf(f, "%d", i); + } + } + fprintf(f, "]\n"); + } + } +} +#endif /* ONIG_DEBUG */ + + +static void +onig_free_body(regex_t* reg) +{ + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + +#ifdef USE_NAMED_GROUP + onig_names_free(reg); +#endif +} + +extern void +onig_free(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + onig_free_body(reg); + xfree(reg); + } +} + +#define REGEX_TRANSFER(to,from) do {\ + (to)->state = ONIG_STATE_MODIFY;\ + onig_free_body(to);\ + xmemcpy(to, from, sizeof(regex_t));\ + xfree(from);\ +} while (0) + +extern void +onig_transfer(regex_t* to, regex_t* from) +{ + THREAD_ATOMIC_START; + REGEX_TRANSFER(to, from); + THREAD_ATOMIC_END; +} + +#define REGEX_CHAIN_HEAD(reg) do {\ + while (IS_NOT_NULL((reg)->chain)) {\ + (reg) = (reg)->chain;\ + }\ +} while (0) + +extern void +onig_chain_link_add(regex_t* to, regex_t* add) +{ + THREAD_ATOMIC_START; + REGEX_CHAIN_HEAD(to); + to->chain = add; + THREAD_ATOMIC_END; +} + +extern void +onig_chain_reduce(regex_t* reg) +{ + regex_t *head, *prev; + + prev = reg; + head = prev->chain; + if (IS_NOT_NULL(head)) { + reg->state = ONIG_STATE_MODIFY; + while (IS_NOT_NULL(head->chain)) { + prev = head; + head = head->chain; + } + prev->chain = (regex_t* )NULL; + REGEX_TRANSFER(reg, head); + } +} + +#if 0 +extern int +onig_clone(regex_t** to, regex_t* from) +{ + int r, size; + regex_t* reg; + +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(from); + if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(from); + ONIG_STATE_INC(from); + } + } + else { + int n = 0; + while (ONIG_STATE(from) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + ONIG_STATE_INC(from); + } +#endif /* USE_MULTI_THREAD_SYSTEM */ + + r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_CASE_FOLD_DEFAULT, + from->enc, ONIG_SYNTAX_DEFAULT); + if (r != 0) { + ONIG_STATE_DEC(from); + return r; + } + + xmemcpy(reg, from, sizeof(onig_t)); + reg->chain = (regex_t* )NULL; + reg->state = ONIG_STATE_NORMAL; + + if (from->p) { + reg->p = (UChar* )xmalloc(reg->alloc); + if (IS_NULL(reg->p)) goto mem_error; + xmemcpy(reg->p, from->p, reg->alloc); + } + + if (from->exact) { + reg->exact = (UChar* )xmalloc(from->exact_end - from->exact); + if (IS_NULL(reg->exact)) goto mem_error; + reg->exact_end = reg->exact + (from->exact_end - from->exact); + xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact); + } + + if (from->int_map) { + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; + reg->int_map = (int* )xmalloc(size); + if (IS_NULL(reg->int_map)) goto mem_error; + xmemcpy(reg->int_map, from->int_map, size); + } + + if (from->int_map_backward) { + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; + reg->int_map_backward = (int* )xmalloc(size); + if (IS_NULL(reg->int_map_backward)) goto mem_error; + xmemcpy(reg->int_map_backward, from->int_map_backward, size); + } + +#ifdef USE_NAMED_GROUP + reg->name_table = names_clone(from); /* names_clone is not implemented */ +#endif + + ONIG_STATE_DEC(from); + *to = reg; + return 0; + + mem_error: + ONIG_STATE_DEC(from); + return ONIGERR_MEMORY; +} +#endif + +#ifdef ONIG_DEBUG +static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); +#endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif + +extern int +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigErrorInfo* einfo) +{ +#define COMPILE_INIT_SIZE 20 + + int r, init_size; + Node* root; + ScanEnv scan_env; +#ifdef USE_SUBEXP_CALL + UnsetAddrList uslist; +#endif + + reg->state = ONIG_STATE_COMPILING; + +#ifdef ONIG_DEBUG + print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + + if (reg->alloc == 0) { + init_size = (pattern_end - pattern) * 2; + if (init_size <= 0) init_size = COMPILE_INIT_SIZE; + r = BBUF_INIT(reg, init_size); + if (r != 0) goto end; + } + else + reg->used = 0; + + reg->num_mem = 0; + reg->num_repeat = 0; + reg->num_null_check = 0; + reg->repeat_range_alloc = 0; + reg->repeat_range = (OnigRepeatRange* )NULL; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + reg->num_comb_exp_check = 0; +#endif + + r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP + /* mixed use named group and no-named group */ + if (scan_env.num_named > 0 && + IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + if (scan_env.num_named != scan_env.num_mem) + r = disable_noname_group_capture(&root, reg, &scan_env); + else + r = numbered_ref_check(root); + + if (r != 0) goto err; + } +#endif + +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_init(&uslist, scan_env.num_call); + if (r != 0) goto err; + scan_env.unset_addr_list = &uslist; + r = setup_subexp_call(root, &scan_env); + if (r != 0) goto err_unset; + r = subexp_recursive_check_trav(root, &scan_env); + if (r < 0) goto err_unset; + r = subexp_inf_recursive_check_trav(root, &scan_env); + if (r != 0) goto err_unset; + + reg->num_call = scan_env.num_call; + } + else + reg->num_call = 0; +#endif + + r = setup_tree(root, reg, 0, &scan_env); + if (r != 0) goto err_unset; + +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + + reg->capture_history = scan_env.capture_history; + reg->bt_mem_start = scan_env.bt_mem_start; + reg->bt_mem_start |= reg->capture_history; + if (IS_FIND_CONDITION(reg->options)) + BIT_STATUS_ON_ALL(reg->bt_mem_end); + else { + reg->bt_mem_end = scan_env.bt_mem_end; + reg->bt_mem_end |= reg->capture_history; + } + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (scan_env.backrefed_mem == 0 +#ifdef USE_SUBEXP_CALL + || scan_env.num_call == 0 +#endif + ) { + setup_comb_exp_check(root, 0, &scan_env); +#ifdef USE_SUBEXP_CALL + if (scan_env.has_recursion != 0) { + scan_env.num_comb_exp_check = 0; + } + else +#endif + if (scan_env.comb_exp_max_regnum > 0) { + int i; + for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { + if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + scan_env.num_comb_exp_check = 0; + break; + } + } + } + } + + reg->num_comb_exp_check = scan_env.num_comb_exp_check; +#endif + + clear_optimize_info(reg); +#ifndef ONIG_DONT_OPTIMIZE + r = set_optimize_info_from_tree(root, reg, &scan_env); + if (r != 0) goto err_unset; +#endif + + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { + xfree(scan_env.mem_nodes_dynamic); + scan_env.mem_nodes_dynamic = (Node** )NULL; + } + + r = compile_tree(root, reg); + if (r == 0) { + r = add_opcode(reg, OP_END); +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_fix(&uslist, reg); + unset_addr_list_end(&uslist); + if (r) goto err; + } +#endif + + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + reg->stack_pop_level = STACK_POP_LEVEL_ALL; + else { + if (reg->bt_mem_start != 0) + reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; + else + reg->stack_pop_level = STACK_POP_LEVEL_FREE; + } + } +#ifdef USE_SUBEXP_CALL + else if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + onig_node_free(root); + +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP + onig_print_names(stderr, reg); +#endif + print_compiled_byte_code_list(stderr, reg); +#endif + + end: + reg->state = ONIG_STATE_NORMAL; + return r; + + err_unset: +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + err: + if (IS_NOT_NULL(scan_env.error)) { + if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; + einfo->par = scan_env.error; + einfo->par_end = scan_env.error_end; + } + } + + onig_node_free(root); + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) + xfree(scan_env.mem_nodes_dynamic); + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} +#endif + +static int onig_inited = 0; + +extern int +onig_alloc_init(regex_t** reg, OnigOptionType option, + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, const OnigSyntaxType* syntax) +{ + if (! onig_inited) + onig_init(); + + if (ONIGENC_IS_UNDEF(enc)) + return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + + if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) + == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; + } + + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + (*reg)->state = ONIG_STATE_MODIFY; + + if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { + option |= syntax->options; + option &= ~ONIG_OPTION_SINGLELINE; + } + else + option |= syntax->options; + + (*reg)->enc = enc; + (*reg)->options = option; + (*reg)->syntax = syntax; + (*reg)->optimize = 0; + (*reg)->exact = (UChar* )NULL; + (*reg)->int_map = (int* )NULL; + (*reg)->int_map_backward = (int* )NULL; + (*reg)->chain = (regex_t* )NULL; + + (*reg)->p = (UChar* )NULL; + (*reg)->alloc = 0; + (*reg)->used = 0; + (*reg)->name_table = (void* )NULL; + + (*reg)->case_fold_flag = case_fold_flag; + return 0; +} + +extern int +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + r = onig_alloc_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, + enc, syntax); + if (r) return r; + + r = onig_compile(*reg, pattern, pattern_end, einfo); + if (r) { + onig_free(*reg); + *reg = NULL; + } + return r; +} + +extern int +onig_init(void) +{ + if (onig_inited != 0) + return 0; + + THREAD_SYSTEM_INIT; + THREAD_ATOMIC_START; + + onig_inited = 1; + + onigenc_init(); + /* onigenc_set_default_caseconv_table((UChar* )0); */ + +#ifdef ONIG_DEBUG_STATISTICS + onig_statistics_init(); +#endif + + THREAD_ATOMIC_END; + return 0; +} + + +extern int +onig_end(void) +{ + THREAD_ATOMIC_START; + +#ifdef ONIG_DEBUG_STATISTICS + onig_print_statistics(stderr); +#endif + +#ifdef USE_SHARED_CCLASS_TABLE + onig_free_shared_cclass_table(); +#endif + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + onig_free_node_list(); +#endif + + onig_inited = 0; + + THREAD_ATOMIC_END; + THREAD_SYSTEM_END; + return 0; +} + +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +extern int +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_NCCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return onig_is_code_in_cc_len(len, code, cc); +} + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_STATE_CHECK 6 + +OnigOpInfoType OnigOpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_NOT_WORD, "not-word", ARG_NON }, + { OP_WORD_BOUND, "word-bound", ARG_NON }, + { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, + { OP_WORD_BEGIN, "word-begin", ARG_NON }, + { OP_WORD_END, "word-end", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_PUSH_POS, "push-pos", ARG_NON }, + { OP_POP_POS, "pop-pos", ARG_NON }, + { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, + { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, + { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, + { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, + { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, + { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_ML_STAR, + "state-check-anychar-ml*", ARG_STATE_CHECK }, + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +Indent(FILE* f, int indent) +{ + int i; + for (i = 0; i < indent; i++) putc(' ', f); +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + StateCheckNumType scn; + OnigCodePoint code; + UChar *q; + + fprintf(f, "[%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_STATE_CHECK: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + fprintf(f, ":%d", scn); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%u:%d", (unsigned int )cc, n); + } + break; + + case OP_BACKREFN_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":(%d)", addr); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_PUSH_LOOK_BEHIND_NOT: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:(%d)", len, addr); + break; + + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:(%d)", scn, addr); + break; + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", + *--bp); + } + } + fputs("]", f); + if (nextp) *nextp = bp; +} + +static void +print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + int ncode; + UChar* bp = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "code length: %d\n", reg->used); + + ncode = 0; + while (bp < end) { + ncode++; + if (bp > reg->p) { + if (ncode % 5 == 0) + fprintf(f, "\n"); + else + fputs(" ", f); + } + onig_print_compiled_byte_code(f, bp, &bp, reg->enc); + } + + fprintf(f, "\n"); +} + +static void +print_indent_tree(FILE* f, Node* node, int indent) +{ + int i, type; + int add = 3; + UChar* p; + + Indent(f, indent); + if (IS_NULL(node)) { + fprintf(f, "ERROR: null node!!!\n"); + exit (0); + } + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + if (NTYPE(node) == NT_LIST) + fprintf(f, "\n", (int )node); + else + fprintf(f, "\n", (int )node); + + print_indent_tree(f, NCAR(node), indent + add); + while (IS_NOT_NULL(node = NCDR(node))) { + if (NTYPE(node) != type) { + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + exit(0); + } + print_indent_tree(f, NCAR(node), indent + add); + } + break; + + case NT_STR: + fprintf(f, "", + (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); + for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } + } + break; + + case NT_CCLASS: + fprintf(f, "", (int )node); + if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node)->mbuf) { + BBuf* bbuf = NCCLASS(node)->mbuf; + for (i = 0; i < bbuf->used; i++) { + if (i > 0) fprintf(f, ","); + fprintf(f, "%0x", bbuf->p[i]); + } + } + break; + + case NT_CTYPE: + fprintf(f, " ", (int )node); + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) + fputs("not word", f); + else + fputs("word", f); + break; + + default: + fprintf(f, "ERROR: undefined ctype.\n"); + exit(0); + } + break; + + case NT_CANY: + fprintf(f, "", (int )node); + break; + + case NT_ANCHOR: + fprintf(f, " ", (int )node); + switch (NANCHOR(node)->type) { + case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; + case ANCHOR_END_BUF: fputs("end buf", f); break; + case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; + case ANCHOR_END_LINE: fputs("end line", f); break; + case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; + case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; + + case ANCHOR_WORD_BOUND: fputs("word bound", f); break; + case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; + case ANCHOR_WORD_END: fputs("word end", f); break; +#endif + case ANCHOR_PREC_READ: fputs("prec read", f); break; + case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; + case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; + case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; + + default: + fprintf(f, "ERROR: undefined anchor type.\n"); + break; + } + break; + + case NT_BREF: + { + int* p; + BRefNode* br = NBREF(node); + p = BACKREFS_P(br); + fprintf(f, "", (int )node); + for (i = 0; i < br->back_num; i++) { + if (i > 0) fputs(", ", f); + fprintf(f, "%d", p[i]); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + { + CallNode* cn = NCALL(node); + fprintf(f, "", (int )node); + p_string(f, cn->name_end - cn->name, cn->name); + } + break; +#endif + + case NT_QTFR: + fprintf(f, "{%d,%d}%s\n", (int )node, + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); + print_indent_tree(f, NQTFR(node)->target, indent + add); + break; + + case NT_ENCLOSE: + fprintf(f, " ", (int )node); + switch (NENCLOSE(node)->type) { + case ENCLOSE_OPTION: + fprintf(f, "option:%d\n", NENCLOSE(node)->option); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); + break; + case ENCLOSE_MEMORY: + fprintf(f, "memory:%d", NENCLOSE(node)->regnum); + break; + case ENCLOSE_STOP_BACKTRACK: + fprintf(f, "stop-bt"); + break; + + default: + break; + } + fprintf(f, "\n"); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); + break; + + default: + fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); + break; + } + + if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && + type != NT_ENCLOSE) + fprintf(f, "\n"); + fflush(f); +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_PARSE_TREE +static void +print_tree(FILE* f, Node* node) +{ + print_indent_tree(f, node, 0); +} +#endif diff --git a/regenc.c b/regenc.c new file mode 100644 index 0000000..634afd8 --- /dev/null +++ b/regenc.c @@ -0,0 +1,931 @@ +/********************************************************************** + regenc.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; + +extern int +onigenc_init(void) +{ + return 0; +} + +extern OnigEncoding +onigenc_get_default_encoding(void) +{ + return OnigEncDefaultCharEncoding; +} + +extern int +onigenc_set_default_encoding(OnigEncoding enc) +{ + OnigEncDefaultCharEncoding = enc; + return 0; +} + +extern int +onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) + return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) + return e-p+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); + return 1; +} + +extern UChar* +onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); + if (p < s) { + p += enclen(enc, p, end); + } + return p; +} + +extern UChar* +onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, + const UChar* start, const UChar* s, const UChar* end, const UChar** prev) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); + + if (p < s) { + if (prev) *prev = (const UChar* )p; + p += enclen(enc, p, end); + } + else { + if (prev) *prev = (const UChar* )NULL; /* Sorry */ + } + return p; +} + +extern UChar* +onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) +{ + if (s <= start) + return (UChar* )NULL; + + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); +} + +extern UChar* +onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) +{ + while (ONIG_IS_NOT_NULL(s) && n-- > 0) { + if (s <= start) + return (UChar* )NULL; + + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); + } + return (UChar* )s; +} + +extern UChar* +onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) +{ + UChar* q = (UChar* )p; + while (n-- > 0) { + q += ONIGENC_MBC_ENC_LEN(enc, q, end); + } + return (q <= end ? q : NULL); +} + +extern int +onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int n = 0; + UChar* q = (UChar* )p; + + while (q < end) { + q += ONIGENC_MBC_ENC_LEN(enc, q, end); + n++; + } + return n; +} + +extern int +onigenc_strlen_null(OnigEncoding enc, const UChar* s) +{ + int n = 0; + UChar* p = (UChar* )s; + UChar* e = p + strlen((const char *)s); + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return n; + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return n; + } + p += ONIGENC_MBC_ENC_LEN(enc, p, e); + n++; + } +} + +extern int +onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) +{ + UChar* start = (UChar* )s; + UChar* p = (UChar* )s; + UChar* e = p + strlen((const char *)s); + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return (int )(p - start); + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return (int )(p - start); + } + p += ONIGENC_MBC_ENC_LEN(enc, p, e); + } +} + +const UChar OnigEncAsciiToLowerCaseTable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncAsciiToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif + +const unsigned short OnigEncAsciiCtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', +}; +#endif + +extern void +onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) +{ + /* nothing */ + /* obsoleted. */ +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) +{ + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); +} + +const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { + { 0x41, 0x61 }, + { 0x42, 0x62 }, + { 0x43, 0x63 }, + { 0x44, 0x64 }, + { 0x45, 0x65 }, + { 0x46, 0x66 }, + { 0x47, 0x67 }, + { 0x48, 0x68 }, + { 0x49, 0x69 }, + { 0x4a, 0x6a }, + { 0x4b, 0x6b }, + { 0x4c, 0x6c }, + { 0x4d, 0x6d }, + { 0x4e, 0x6e }, + { 0x4f, 0x6f }, + { 0x50, 0x70 }, + { 0x51, 0x71 }, + { 0x52, 0x72 }, + { 0x53, 0x73 }, + { 0x54, 0x74 }, + { 0x55, 0x75 }, + { 0x56, 0x76 }, + { 0x57, 0x77 }, + { 0x58, 0x78 }, + { 0x59, 0x79 }, + { 0x5a, 0x7a } +}; + +extern int +onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg, + OnigEncoding enc ARG_UNUSED) +{ + OnigCodePoint code; + int i, r; + + for (i = 0; + i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); + i++) { + code = OnigAsciiLowerMap[i].to; + r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); + if (r != 0) return r; + + code = OnigAsciiLowerMap[i].from; + r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); + if (r != 0) return r; + } + + return 0; +} + +extern int +onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else + return 0; +} + +static int +ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + OnigCodePoint ss[] = { 0x73, 0x73 }; + + return (*f)((OnigCodePoint )0xdf, ss, 2, arg); +} + +extern int +onigenc_apply_all_case_fold_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + OnigCodePoint code; + int i, r; + + r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0); + if (r != 0) return r; + + for (i = 0; i < map_size; i++) { + code = map[i].to; + r = (*f)(map[i].from, &code, 1, arg); + if (r != 0) return r; + + code = map[i].from; + r = (*f)(map[i].to, &code, 1, arg); + if (r != 0) return r; + } + + if (ess_tsett_flag != 0) + return ss_apply_all_case_fold(flag, f, arg); + + return 0; +} + +extern int +onigenc_get_case_fold_codes_by_str_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { + /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { + /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (*p == 0xdf && ess_tsett_flag != 0) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else { + int i; + + for (i = 0; i < map_size; i++) { + if (*p == map[i].from) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].to; + return 1; + } + else if (*p == map[i].to) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].from; + return 1; + } + } + } + + return 0; +} + + +extern int +onigenc_not_support_get_ctype_code_range(OnigCtype ctype, + OnigCodePoint* sb_out, const OnigCodePoint* ranges[], + OnigEncoding enc) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) +{ + if (p < end) { + if (*p == 0x0a) return 1; + } + return 0; +} + +/* for single byte encodings */ +extern int +onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, + const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) +{ + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); + + (*p)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +extern int +onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED) +{ + const UChar* p = *pp; + + (*pp)++; + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); +} +#endif + +extern int +onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return 1; +} + +extern OnigCodePoint +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return (OnigCodePoint )(*p); +} + +extern int +onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +{ + return 1; +} + +extern int +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) +{ + if (code > 0xff) + rb_raise(rb_eRangeError, "%u out of char range", code); + *buf = (UChar )(code & 0xff); + return 1; +} + +extern UChar* +onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, + const UChar* end, + OnigEncoding enc ARG_UNUSED) +{ + return (UChar* )s; +} + +extern int +onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return TRUE; +} + +extern int +onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, + OnigEncoding enc ARG_UNUSED) +{ + return FALSE; +} + +extern int +onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, + OnigEncoding enc ARG_UNUSED) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +extern OnigCodePoint +onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p, end); + n = (OnigCodePoint )(*p++); + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +extern int +onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower) +{ + int len; + const UChar *p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = enclen(enc, p, end); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted to lower char */ + } +} + +#if 0 +extern int +onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, + const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + + (*pp) += enclen(enc, p); + return FALSE; +} +#endif + +extern int +onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) { + *p++ = (UChar )((code >> 24) & 0xff); + } + if ((code & 0xff0000) != 0 || p != buf) { + *p++ = (UChar )((code >> 16) & 0xff); + } + if ((code & 0xff00) != 0 || p != buf) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + static const PosixBracketEntryType PBS[] = { + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } + }; + + const PosixBracketEntryType *pb; + int len; + + len = onigenc_strlen(enc, p, end); + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (len == pb->len && + onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + return pb->ctype; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +extern int +onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, + const UChar* sascii /* ascii */, int n) +{ + int x, c; + + while (n-- > 0) { + if (p >= end) return (int )(*sascii); + + c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); + x = *sascii - c; + if (x) return x; + + sascii++; + p += enclen(enc, p, end); + } + return 0; +} + +/* Property management */ +static int +resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) +{ + int size; + const OnigCodePoint **list = *plist; + + size = sizeof(OnigCodePoint*) * new_size; + if (IS_NULL(list)) { + list = (const OnigCodePoint** )xmalloc(size); + } + else { + list = (const OnigCodePoint** )xrealloc((void* )list, size); + } + + if (IS_NULL(list)) return ONIGERR_MEMORY; + + *plist = list; + *psize = new_size; + + return 0; +} + +extern int +onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, + hash_table_type **table, const OnigCodePoint*** plist, int *pnum, + int *psize) +{ +#define PROP_INIT_SIZE 16 + + int r; + + if (*psize <= *pnum) { + int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); + r = resize_property_list(new_size, plist, psize); + if (r != 0) return r; + } + + (*plist)[*pnum] = prop; + + if (ONIG_IS_NULL(*table)) { + *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); + if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; + } + + *pnum = *pnum + 1; + onig_st_insert_strend(*table, name, name + strlen((char* )name), + (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); + return 0; +} + +extern int +onigenc_property_list_init(int (*f)(void)) +{ + int r; + + THREAD_ATOMIC_START; + + r = f(); + + THREAD_ATOMIC_END; + return r; +} diff --git a/regenc.h b/regenc.h new file mode 100644 index 0000000..3844e41 --- /dev/null +++ b/regenc.h @@ -0,0 +1,207 @@ +#ifndef ONIGURUMA_REGENC_H +#define ONIGURUMA_REGENC_H +/********************************************************************** + regenc.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef REGINT_H +#ifndef RUBY_EXTERN +#include "ruby/config.h" +#include "ruby/defines.h" +#endif +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#endif +#include "ruby/oniguruma.h" + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigPairCaseFoldCodes; + + +#ifndef NULL +#define NULL ((void* )0) +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef ARG_UNUSED +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif +#endif + +#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) +#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL +#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) + +#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e)) + +/* character types bit flag */ +#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) +#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) +#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) +#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) +#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) +#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) +#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) +#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) +#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) +#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) +#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) +#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) +#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) +#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) +#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) + +#define CTYPE_TO_BIT(ctype) (1<<(ctype)) +#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ + ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ + (ctype) == ONIGENC_CTYPE_PRINT) + + +typedef struct { + UChar *name; + int ctype; + short int len; +} PosixBracketEntryType; + + +/* #define USE_CRNL_AS_LINE_TERMINATOR */ +#define USE_UNICODE_PROPERTIES +/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ + + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + +/* for encoding system implementation (internal) */ +ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)); +ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc)); +ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc)); +ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc)); + + +/* methods for single byte encoding */ +ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc)); +ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p, const UChar* e, OnigEncoding enc)); +ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end, OnigEncoding enc)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf, OnigEncoding enc)); +ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc)); +ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc)); +ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc)); +ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc)); + +/* methods for multi byte encoding */ +ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); +ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)); +ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)); +ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); + + +/* in enc/unicode.c */ +ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc)); +ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc)); +ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); +ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)); + + +#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ + OnigEncISO_8859_1_ToLowerCaseTable[c] +#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ + OnigEncISO_8859_1_ToUpperCaseTable[c] + +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; + +ONIG_EXTERN int +onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); +ONIG_EXTERN UChar* +onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); + +/* defined in regexec.c, but used in enc/xxx.c */ +extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; + +#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) +#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] +#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] +#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ + ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ + (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ + ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) + +#ifdef ONIG_ENC_REGISTER +extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*); +#define OnigEncodingName(n) encoding_##n +#define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n) +#define OnigEncodingDefine(f,n) \ + OnigEncodingDeclare(n); \ + void Init_##f(void) { \ + ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ + &OnigEncodingName(n)); \ + } \ + OnigEncodingDeclare(n) +#else +#define OnigEncodingName(n) OnigEncoding##n +#define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n) +#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n) +#endif + +/* macros for define replica encoding and encoding alias */ +#define ENC_REPLICATE(name, orig) +#define ENC_ALIAS(name, orig) +#define ENC_DUMMY(name) + +#endif /* ONIGURUMA_REGENC_H */ diff --git a/regerror.c b/regerror.c new file mode 100644 index 0000000..780ba94 --- /dev/null +++ b/regerror.c @@ -0,0 +1,387 @@ +/********************************************************************** + regerror.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" +#include /* for vsnprintf() */ + +#ifdef HAVE_STDARG_PROTOTYPES +#include +#define va_init_list(a,b) va_start(a,b) +#else +#include +#define va_init_list(a,b) va_start(a) +#endif + +extern UChar* +onig_error_code_to_format(int code) +{ + const char *p; + + if (code >= 0) return (UChar* )0; + + switch (code) { + case ONIG_MISMATCH: + p = "mismatch"; break; + case ONIG_NO_SUPPORT_CONFIG: + p = "no support in this configuration"; break; + case ONIGERR_MEMORY: + p = "failed to allocate memory"; break; + case ONIGERR_MATCH_STACK_LIMIT_OVER: + p = "match-stack limit over"; break; + case ONIGERR_TYPE_BUG: + p = "undefined type (bug)"; break; + case ONIGERR_PARSER_BUG: + p = "internal parser error (bug)"; break; + case ONIGERR_STACK_BUG: + p = "stack error (bug)"; break; + case ONIGERR_UNDEFINED_BYTECODE: + p = "undefined bytecode (bug)"; break; + case ONIGERR_UNEXPECTED_BYTECODE: + p = "unexpected bytecode (bug)"; break; + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + p = "default multibyte-encoding is not setted"; break; + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + p = "can't convert to wide-char on specified multibyte-encoding"; break; + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: + p = "end pattern at left brace"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: + p = "end pattern at left bracket"; break; + case ONIGERR_EMPTY_CHAR_CLASS: + p = "empty char-class"; break; + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: + p = "premature end of char-class"; break; + case ONIGERR_END_PATTERN_AT_ESCAPE: + p = "end pattern at escape"; break; + case ONIGERR_END_PATTERN_AT_META: + p = "end pattern at meta"; break; + case ONIGERR_END_PATTERN_AT_CONTROL: + p = "end pattern at control"; break; + case ONIGERR_META_CODE_SYNTAX: + p = "invalid meta-code syntax"; break; + case ONIGERR_CONTROL_CODE_SYNTAX: + p = "invalid control-code syntax"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + p = "char-class value at end of range"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + p = "char-class value at start of range"; break; + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + p = "unmatched range specifier in char-class"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + p = "target of repeat operator is not specified"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + p = "target of repeat operator is invalid"; break; + case ONIGERR_NESTED_REPEAT_OPERATOR: + p = "nested repeat operator"; break; + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: + p = "unmatched close parenthesis"; break; + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + p = "end pattern with unmatched parenthesis"; break; + case ONIGERR_END_PATTERN_IN_GROUP: + p = "end pattern in group"; break; + case ONIGERR_UNDEFINED_GROUP_OPTION: + p = "undefined group option"; break; + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: + p = "invalid POSIX bracket type"; break; + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: + p = "invalid pattern in look-behind"; break; + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: + p = "invalid repeat range {lower,upper}"; break; + case ONIGERR_TOO_BIG_NUMBER: + p = "too big number"; break; + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + p = "too big number for repeat range"; break; + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + p = "upper is smaller than lower in repeat range"; break; + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: + p = "empty range in char class"; break; + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + p = "mismatch multibyte code length in char-class range"; break; + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: + p = "too many multibyte code ranges are specified"; break; + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: + p = "too short multibyte code string"; break; + case ONIGERR_TOO_BIG_BACKREF_NUMBER: + p = "too big backref number"; break; + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP + p = "invalid backref number/name"; break; +#else + p = "invalid backref number"; break; +#endif + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: + p = "too big wide-char value"; break; + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: + p = "too long wide-char value"; break; + case ONIGERR_INVALID_CODE_POINT_VALUE: + p = "invalid code point value"; break; + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: + p = "undefined name <%n> reference"; break; + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + p = "multiplex definition name <%n> call"; break; + case ONIGERR_NEVER_ENDING_RECURSION: + p = "never ending recursion"; break; + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name {%n}"; break; + case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: + p = "not supported encoding combination"; break; + case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: + p = "invalid combination of options"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: + p = "over thread pass limit count"; break; + + default: + p = "undefined error code"; break; + } + + return (UChar* )p; +} + +static void sprint_byte(char* s, unsigned int v) +{ + sprintf(s, "%02x", (v & 0377)); +} + +static void sprint_byte_with_x(char* s, unsigned int v) +{ + sprintf(s, "\\x%02x", (v & 0377)); +} + +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enclen(enc, p, end); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; +} + + +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ +#define MAX_ERROR_PAR_LEN 30 + +extern int +#ifdef HAVE_STDARG_PROTOTYPES +onig_error_code_to_str(UChar* s, int code, ...) +#else +onig_error_code_to_str(s, code, va_alist) + UChar* s; + int code; + va_dcl +#endif +{ + UChar *p, *q; + OnigErrorInfo* einfo; + int len, is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; + va_list vargs; + + va_init_list(vargs, code); + + switch (code) { + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); + q = onig_error_code_to_format(code); + p = s; + while (*q != '\0') { + if (*q == '%') { + q++; + if (*q == 'n') { /* '%n': name */ + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { + xmemcpy(p, "...", 3); + p += 3; + } + q++; + } + else + goto normal_char; + } + else { + normal_char: + *p++ = *q++; + } + } + *p = '\0'; + len = p - s; + break; + + default: + q = onig_error_code_to_format(code); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); + xmemcpy(s, q, len); + s[len] = '\0'; + break; + } + + va_end(vargs); + return len; +} + + +void +#ifdef HAVE_STDARG_PROTOTYPES +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) +#else +onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) + UChar buf[]; + int bufsize; + OnigEncoding enc; + UChar* pat; + UChar* pat_end; + const UChar *fmt; + va_dcl +#endif +{ + int n, need, len; + UChar *p, *s, *bp; + UChar bs[6]; + va_list args; + + va_init_list(args, fmt); + n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); + va_end(args); + + need = (pat_end - pat) * 4 + 4; + + if (n + need < bufsize) { + strcat((char* )buf, ": /"); + s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); + + p = pat; + while (p < pat_end) { + if (*p == '\\') { + *s++ = *p++; + len = enclen(enc, p, pat_end); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )'\\'; + *s++ = *p++; + } + else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) { + len = enclen(enc, p, pat_end); + if (ONIGENC_MBC_MINLEN(enc) == 1) { + while (len-- > 0) *s++ = *p++; + } + else { /* for UTF16 */ + int blen; + + while (len-- > 0) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (blen-- > 0) *s++ = *bp++; + } + } + } + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (len-- > 0) *s++ = *bp++; + } + else { + *s++ = *p++; + } + } + + *s++ = '/'; + *s = '\0'; + } +} diff --git a/regexec.c b/regexec.c new file mode 100644 index 0000000..2605059 --- /dev/null +++ b/regexec.c @@ -0,0 +1,3822 @@ +/********************************************************************** + regexec.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + +#ifdef USE_CRNL_AS_LINE_TERMINATOR +#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) +#endif + +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + +static void +history_tree_clear(OnigCaptureTreeNode* node) +{ + int i; + + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); + } + } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; + } +} + +static void +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; + } +} + +static OnigCaptureTreeNode* +history_node_new(void) +{ + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} + +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +#define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + } + else { + n = parent->allocated * 2; + parent->childs = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + } + CHECK_NULL_RETURN_MEMERR(parent->childs); + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + history_tree_add_child(clone, child); + } + + return clone; +} + +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern void +onig_region_clear(OnigRegion* region) +{ + int i; + + for (i = 0; i < region->num_regs; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif +} + +extern int +onig_region_resize(OnigRegion* region, int n) +{ + region->num_regs = n; + + if (n < ONIG_NREGION) + n = ONIG_NREGION; + + if (region->allocated == 0) { + region->beg = (int* )xmalloc(n * sizeof(int)); + if (region->beg == 0) + return ONIGERR_MEMORY; + + region->end = (int* )xmalloc(n * sizeof(int)); + if (region->end == 0) { + xfree(region->beg); + return ONIGERR_MEMORY; + } + + region->allocated = n; + } + else if (region->allocated < n) { + int *tmp; + + region->allocated = 0; + tmp = (int* )xrealloc(region->beg, n * sizeof(int)); + if (tmp == 0) { + xfree(region->beg); + xfree(region->end); + return ONIGERR_MEMORY; + } + region->beg = tmp; + tmp = (int* )xrealloc(region->end, n * sizeof(int)); + if (tmp == 0) { + xfree(region->beg); + return ONIGERR_MEMORY; + } + region->end = tmp; + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + + return 0; +} + +static int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void +onig_region_init(OnigRegion* region) +{ + region->num_regs = 0; + region->allocated = 0; + region->beg = (int* )0; + region->end = (int* )0; + region->history_root = (OnigCaptureTreeNode* )0; +} + +extern OnigRegion* +onig_region_new(void) +{ + OnigRegion* r; + + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + if (r) + onig_region_init(r); + return r; +} + +extern void +onig_region_free(OnigRegion* r, int free_self) +{ + if (r) { + if (r->allocated > 0) { + if (r->beg) xfree(r->beg); + if (r->end) xfree(r->end); + r->allocated = 0; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif + if (free_self) xfree(r); + } +} + +extern void +onig_region_copy(OnigRegion* to, OnigRegion* from) +{ +#define RREGC_SIZE (sizeof(int) * from->num_regs) + int i; + + if (to == from) return; + + onig_region_resize(to, from->num_regs); + for (i = 0; i < from->num_regs; i++) { + to->beg[i] = from->beg[i]; + to->end[i] = from->end[i]; + } + to->num_regs = from->num_regs; + +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); + + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); + } +#endif +} + + +/** stack **/ +#define INVALID_STACK_INDEX -1 + +/* stack type */ +/* used by normal-POP */ +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 +/* handled by normal-POP */ +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 +/* avoided by normal-POP */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_VOID 0x0a00 /* for fill a blank */ + +/* stack type check mask */ +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).best_len = ONIG_MISMATCH;\ +} while(0) +#else +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ +} while(0) +#endif + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 + +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ + if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ + (msa).state_check_buff = (void* )xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ + }\ + else \ + (msa).state_check_buff = (void* )xalloca(size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + } while(0) + +#define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ + if ((msa).state_check_buff) xfree((msa).state_check_buff);\ + }\ +} while(0) +#else +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#endif + + + +#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ + if (msa->stack_p) {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ + + sizeof(OnigStackType) * (stack_num));\ + stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ +} while(0) + +#define STACK_SAVE do{\ + if (stk_base != stk_alloc) {\ + msa->stack_p = stk_base;\ + msa->stack_n = stk_end - stk_base;\ + };\ +} while(0) + +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + +static int +stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) +{ + unsigned int n; + OnigStackType *x, *stk_base, *stk_end, *stk; + + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = stk_end - stk_base; + if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { + x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(x, stk_base, n * sizeof(OnigStackType)); + n *= 2; + } + else { + unsigned int limit_size = MatchStackLimitSize; + n *= 2; + if (limit_size != 0 && n > limit_size) { + if ((unsigned int )(stk_end - stk_base) == limit_size) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = limit_size; + } + x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + *arg_stk = x + (stk - stk_base); + *arg_stk_base = x; + *arg_stk_end = x + n; + return 0; +} + +#define STACK_ENSURE(n) do {\ + if (stk_end - stk < (n)) {\ + int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + if (r != 0) { STACK_SAVE; return r; } \ + }\ +} while(0) + +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define STATE_CHECK_POS(s,snum) \ + (((s) - str) * num_comb_exp_check + ((snum) - 1)) +#define STATE_CHECK_VAL(v,snum) do {\ + if (state_check_buff != NULL) {\ + int x = STATE_CHECK_POS(s,snum);\ + (v) = state_check_buff[x/8] & (1<<(x%8));\ + }\ + else (v) = 0;\ +} while(0) + + +#define ELSE_IF_STATE_CHECK_MARK(stk) \ + else if ((stk)->type == STK_STATE_CHECK_MARK) { \ + int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ + state_check_buff[x/8] |= (1<<(x%8)); \ + } + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ALT;\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_STATE_CHECK(s,snum) do {\ + if (state_check_buff != NULL) {\ + STACK_ENSURE(1);\ + stk->type = STK_STATE_CHECK_MARK;\ + stk->u.state.pstr = (s);\ + stk->u.state.state_check = (snum);\ + STACK_INC;\ + }\ +} while(0) + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define ELSE_IF_STATE_CHECK_MARK(stk) + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) +#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) +#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) + +#define STACK_PUSH_REPEAT(id, pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT;\ + stk->u.repeat.num = (id);\ + stk->u.repeat.pcode = (pat);\ + stk->u.repeat.count = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_REPEAT_INC(sindex) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT_INC;\ + stk->u.repeat_inc.si = (sindex);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_START(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_START;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END_MARK(mnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END_MARK;\ + stk->u.mem.num = (mnum);\ + STACK_INC;\ +} while(0) + +#define STACK_GET_MEM_START(mnum, k) do {\ + int level = 0;\ + k = stk;\ + while (k > stk_base) {\ + k--;\ + if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ + && k->u.mem.num == (mnum)) {\ + level++;\ + }\ + else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) break;\ + level--;\ + }\ + }\ +} while(0) + +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_START;\ + stk->u.null_check.num = (cnum);\ + stk->u.null_check.pstr = (s);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALL_FRAME(pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALL_FRAME;\ + stk->u.call_frame.ret_addr = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_RETURN do {\ + STACK_ENSURE(1);\ + stk->type = STK_RETURN;\ + STACK_INC;\ +} while(0) + + +#ifdef ONIG_DEBUG +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } +#else +#define STACK_BASE_CHECK(p, at) +#endif + +#define STACK_POP_ONE do {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ +} while(0) + +#define STACK_POP do {\ + switch (pop_level) {\ + case STACK_POP_LEVEL_FREE:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + case STACK_POP_LEVEL_MEM_START:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + default:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + }\ +} while(0) + +#define STACK_POP_TIL_POS_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ + if (stk->type == STK_POS_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ + if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POS_END(k) do {\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_POS) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_END do {\ + OnigStackType *k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_STOP_BT) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK(isnull,id,s) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + +#define STACK_GET_REPEAT(id, k) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ + if (k->type == STK_REPEAT) {\ + if (level == 0) {\ + if (k->u.repeat.num == (id)) {\ + break;\ + }\ + }\ + }\ + else if (k->type == STK_CALL_FRAME) level--;\ + else if (k->type == STK_RETURN) level++;\ + }\ +} while(0) + +#define STACK_RETURN(addr) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) + + +#define STRING_CMP(s1,s2,len) do {\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ + }\ +} while(0) + +#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, + UChar* s1, UChar** ps2, int mblen, const UChar* text_end) +{ + UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p1, *p2, *end1, *s2; + int len1, len2; + + s2 = *ps2; + end1 = s1 + mblen; + while (s1 < end1) { + len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); + len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + } + + *ps2 = s2; + return 1; +} + +#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ + is_fail = 0;\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ + }\ + }\ +} while(0) + +#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + + +#define IS_EMPTY_STR (str == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define DATA_ENSURE_CHECK1 (s < right_range) +#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#else +#define DATA_ENSURE_CHECK1 (s < end) +#define DATA_ENSURE_CHECK(n) (s + (n) <= end) +#define DATA_ENSURE(n) if (s + (n) > end) goto fail +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, + OnigStackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + OnigStackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_MEMERR(child); + child->group = n; + child->beg = (int )(k->u.mem.pstr - str); + r = history_tree_add_child(node, child); + if (r != 0) return r; + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = (int )(k->u.mem.pstr - str); + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = (int )(k->u.mem.pstr - str); + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif + +#ifdef USE_BACKREF_WITH_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , OnigStackType* top, OnigStackType* stk_base + , int ignore_case, int case_fold_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + OnigStackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, (int )(pend - pstart), send) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_WITH_LEVEL */ + + +#ifdef ONIG_DEBUG_STATISTICS + +#define USE_TIMEOFDAY + +#ifdef USE_TIMEOFDAY +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +static struct timeval ts, te; +#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +#else +#ifdef HAVE_SYS_TIMES_H +#include +#endif +static struct tms ts, te; +#define GETTIME(t) times(&(t)) +#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +#endif + +static int OpCounter[256]; +static int OpPrevCounter[256]; +static unsigned long OpTime[256]; +static int OpCurr = OP_FINISH; +static int OpPrevTarget = OP_FAIL; +static int MaxStackDepth = 0; + +#define MOP_IN(opcode) do {\ + if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ + OpCurr = opcode;\ + OpCounter[opcode]++;\ + GETTIME(ts);\ +} while(0) + +#define MOP_OUT do {\ + GETTIME(te);\ + OpTime[OpCurr] += TIMEDIFF(te, ts);\ +} while(0) + +extern void +onig_statistics_init(void) +{ + int i; + for (i = 0; i < 256; i++) { + OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; + } + MaxStackDepth = 0; +} + +extern void +onig_print_statistics(FILE* f) +{ + int i; + fprintf(f, " count prev time\n"); + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + } + fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +} + +#define STACK_INC do {\ + stk++;\ + if (stk - stk_base > MaxStackDepth) \ + MaxStackDepth = stk - stk_base;\ +} while(0) + +#else +#define STACK_INC stk++ + +#define MOP_IN(opcode) +#define MOP_OUT +#endif + + +/* matching region of POSIX API */ +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} posix_regmatch_t; + +/* match data(str - end) from position (sstart). */ +/* if sstart == str then set sprev to NULL. */ +static int +match_at(regex_t* reg, const UChar* str, const UChar* end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar* right_range, +#endif + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) +{ + static const UChar FinishCode[] = { OP_FINISH }; + + int i, n, num_mem, best_len, pop_level; + LengthType tlen, tlen2; + MemNumType mem; + RelAddrType addr; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; + UChar *s, *q, *sbegin; + UChar *p = reg->p; + char *alloca_base; + OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stkp; /* used as any purpose. */ + OnigStackIndex si; + OnigStackIndex *repeat_stk; + OnigStackIndex *mem_start_stk, *mem_end_stk; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int scv; + unsigned char* state_check_buff = msa->state_check_buff; + int num_comb_exp_check = reg->num_comb_exp_check; +#endif + n = reg->num_repeat + reg->num_mem * 2; + + STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + pop_level = reg->stack_pop_level; + num_mem = reg->num_mem; + repeat_stk = (OnigStackIndex* )alloca_base; + + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); + mem_end_stk = mem_start_stk + num_mem; + mem_start_stk--; /* for index start from 1, + mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_end_stk--; /* for index start from 1, + mem_end_stk[1]..mem_end_stk[num_mem] */ + for (i = 1; i <= num_mem; i++) { + mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + } + +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", + (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "size: %d, start offset: %d\n", + (int )(end - str), (int )(sstart - str)); +#endif + + STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */ + best_len = ONIG_MISMATCH; + s = (UChar* )sstart; + while (1) { +#ifdef ONIG_DEBUG_MATCH + { + UChar *q, *bp, buf[50]; + int len; + fprintf(stderr, "%4d> \"", (int )(s - str)); + bp = buf; + for (i = 0, q = s; i < 7 && q < end; i++) { + len = enclen(encode, q); + while (len-- > 0) *bp++ = *q++; + } + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } + else { xmemcpy(bp, "\"", 1); bp += 1; } + *bp = 0; + fputs((char* )buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); + onig_print_compiled_byte_code(stderr, p, NULL, encode); + fprintf(stderr, "\n"); + } +#endif + + sbegin = s; + switch (*p++) { + case OP_END: MOP_IN(OP_END); + n = s - sstart; + if (n > best_len) { + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif + best_len = n; + region = msa->region; + if (region) { +#ifdef USE_POSIX_API_REGION_OPTION + if (IS_POSIX_REGION(msa->options)) { + posix_regmatch_t* rmt = (posix_regmatch_t* )region; + + rmt[0].rm_so = sstart - str; + rmt[0].rm_eo = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; + + rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; + } + } + } + else { +#endif /* USE_POSIX_API_REGION_OPTION */ + region->beg[0] = sstart - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } + +#ifdef USE_CAPTURE_HISTORY + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_MEMERR(node); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = sstart - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ +#ifdef USE_POSIX_API_REGION_OPTION + } /* else IS_POSIX_REGION() */ +#endif + } /* if (region) */ + } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif + MOP_OUT; + + if (IS_FIND_CONDITION(option)) { + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } + } + + /* default behavior: return first-matching result. */ + goto finish; + break; + + case OP_EXACT1: MOP_IN(OP_EXACT1); +#if 0 + DATA_ENSURE(1); + if (*p != *s) goto fail; + p++; s++; +#endif + if (*p != *s++) goto fail; + DATA_ENSURE(0); + p++; + MOP_OUT; + break; + + case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + { + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } + } + MOP_OUT; + break; + + case OP_EXACT2: MOP_IN(OP_EXACT2); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT3: MOP_IN(OP_EXACT3); + DATA_ENSURE(3); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT4: MOP_IN(OP_EXACT4); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT5: MOP_IN(OP_EXACT5); + DATA_ENSURE(5); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTN: MOP_IN(OP_EXACTN); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen); + while (tlen-- > 0) { + if (*p++ != *s++) goto fail; + } + sprev = s - 1; + MOP_OUT; + continue; + break; + + case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + { + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } + } + + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + break; + + case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + DATA_ENSURE(6); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 2); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 2; + MOP_OUT; + continue; + break; + + case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 3); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 3; + MOP_OUT; + continue; + break; + + case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + GET_LENGTH_INC(tlen, p); /* mb-len */ + GET_LENGTH_INC(tlen2, p); /* string len */ + tlen2 *= tlen; + DATA_ENSURE(tlen2); + while (tlen2-- > 0) { + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - tlen; + MOP_OUT; + continue; + break; + + case OP_CCLASS: MOP_IN(OP_CCLASS); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ + MOP_OUT; + break; + + case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; + + cclass_mb: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len; + + DATA_ENSURE(1); + mb_len = enclen(encode, s, end); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + MOP_OUT; + break; + + case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { + p += SIZE_BITSET; + goto cclass_mb; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + break; + + case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s, end); + MOP_OUT; + break; + + case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; + } + + cclass_mb_not: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s, end); + + if (! DATA_ENSURE_CHECK(mb_len)) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } + + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + + cc_mb_not_success: + MOP_OUT; + break; + + case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { + p += SIZE_BITSET; + goto cclass_mb_not; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + break; + + case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enclen(encode, s, end); + ss = s; + s += mb_len; + DATA_ENSURE(0); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; + } + MOP_OUT; + break; + + case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + s += n; + MOP_OUT; + break; + + case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + DATA_ENSURE(1); + n = enclen(encode, s, end); + DATA_ENSURE(n); + s += n; + MOP_OUT; + break; + + case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + break; + + case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + break; + + case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s, end); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + p++; + MOP_OUT; + break; + + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s, end); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; + MOP_OUT; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + break; + + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_WORD: MOP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + break; + + case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s, end); + MOP_OUT; + break; + + case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + continue; + break; + + case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + continue; + break; + +#ifdef USE_WORD_BEGIN_END + case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + MOP_OUT; + continue; + } + } + goto fail; + break; + + case OP_WORD_END: MOP_IN(OP_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + MOP_OUT; + continue; + } + } + goto fail; + break; +#endif + + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + if (! ON_STR_BEGIN(s)) goto fail; + + MOP_OUT; + continue; + break; + + case OP_END_BUF: MOP_IN(OP_END_BUF); + if (! ON_STR_END(s)) goto fail; + + MOP_OUT; + continue; + break; + + case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + if (ON_STR_BEGIN(s)) { + if (IS_NOTBOL(msa->options)) goto fail; + MOP_OUT; + continue; + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { + MOP_OUT; + continue; + } + goto fail; + break; + + case OP_END_LINE: MOP_IN(OP_END_LINE); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { + MOP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + MOP_OUT; + continue; + } +#endif + goto fail; + break; + + case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && + ON_STR_END(s + enclen(encode, s, end))) { + MOP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + UChar* ss = s + enclen(encode, s); + ss += enclen(encode, ss); + if (ON_STR_END(ss)) { + MOP_OUT; + continue; + } + } +#endif + goto fail; + break; + + case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + if (s != msa->start) + goto fail; + + MOP_OUT; + continue; + break; + + case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_START(mem, s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + GET_MEMNUM_INC(mem, p); + mem_start_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + continue; + break; + +#ifdef USE_SUBEXP_CALL + case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); + GET_MEMNUM_INC(mem, p); + STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ + STACK_PUSH_MEM_END(mem, s); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + STACK_GET_MEM_START(mem, stkp); + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + else + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + + STACK_PUSH_MEM_END_MARK(mem); + MOP_OUT; + continue; + break; +#endif + + case OP_BACKREF1: MOP_IN(OP_BACKREF1); + mem = 1; + goto backref; + break; + + case OP_BACKREF2: MOP_IN(OP_BACKREF2); + mem = 2; + goto backref; + break; + + case OP_BACKREFN: MOP_IN(OP_BACKREFN); + GET_MEMNUM_INC(mem, p); + backref: + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + continue; + } + break; + + case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + MOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_WITH_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev, end)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + MOP_OUT; + continue; + } + + break; +#endif + +#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); + GET_OPTION_INC(option, p); + STACK_PUSH_ALT(p, s, sprev); + p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; + MOP_OUT; + continue; + break; + + case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); + GET_OPTION_INC(option, p); + MOP_OUT; + continue; + break; +#endif + + case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_PUSH_NULL_CHECK_START(mem, s); + MOP_OUT; + continue; + break; + + case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + MOP_OUT; + continue; + break; + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + MOP_OUT; + continue; + break; +#endif + +#ifdef USE_SUBEXP_CALL + case OP_NULL_CHECK_END_MEMST_PUSH: + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#else + STACK_NULL_CHECK_REC(isnull, mem, s); +#endif + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + MOP_OUT; + continue; + break; +#endif + + case OP_JUMP: MOP_IN(OP_JUMP); + GET_RELADDR_INC(addr, p); + p += addr; + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_PUSH: MOP_IN(OP_PUSH); + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + MOP_OUT; + continue; + break; + + case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + GET_STATE_CHECK_NUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + STATE_CHECK_VAL(scv, mem); + if (scv) { + p += addr; + } + else { + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + } + MOP_OUT; + continue; + break; + + case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_STATE_CHECK(s, mem); + MOP_OUT; + continue; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_POP: MOP_IN(OP_POP); + STACK_POP_ONE; + MOP_OUT; + continue; + break; + + case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + GET_RELADDR_INC(addr, p); + if (*p == *s && DATA_ENSURE_CHECK1) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + } + p += (addr + 1); + MOP_OUT; + continue; + break; + + case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + GET_RELADDR_INC(addr, p); + if (*p == *s) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + } + p++; + MOP_OUT; + continue; + break; + + case OP_REPEAT: MOP_IN(OP_REPEAT); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } + } + MOP_OUT; + continue; + break; + + case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } + } + MOP_OUT; + continue; + break; + + case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ + } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + break; + + case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + break; + + case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); + STACK_PUSH_POS(s, sprev); + MOP_OUT; + continue; + break; + + case OP_POP_POS: MOP_IN(OP_POP_POS); + { + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; + } + MOP_OUT; + continue; + break; + + case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); + GET_RELADDR_INC(addr, p); + STACK_PUSH_POS_NOT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + + case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); + STACK_POP_TIL_POS_NOT; + goto fail; + break; + + case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); + STACK_PUSH_STOP_BT; + MOP_OUT; + continue; + break; + + case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); + STACK_STOP_BT_END; + MOP_OUT; + continue; + break; + + case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + GET_LENGTH_INC(tlen, p); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); + MOP_OUT; + continue; + break; + + case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(tlen, p); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); + if (IS_NULL(q)) { + /* too short case -> success. ex. /(?p + addr; + MOP_OUT; + continue; + break; + + case OP_RETURN: MOP_IN(OP_RETURN); + STACK_RETURN(p); + STACK_PUSH_RETURN; + MOP_OUT; + continue; + break; +#endif + + case OP_FINISH: + goto finish; + break; + + fail: + MOP_OUT; + /* fall */ + case OP_FAIL: MOP_IN(OP_FAIL); + STACK_POP; + p = stk->u.state.pcode; + s = stk->u.state.pstr; + sprev = stk->u.state.pstr_prev; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (stk->u.state.state_check != 0) { + stk->type = STK_STATE_CHECK_MARK; + stk++; + } +#endif + + MOP_OUT; + continue; + break; + + default: + goto bytecode_error; + + } /* end of switch */ + sprev = sbegin; + } /* end of while(1) */ + + finish: + STACK_SAVE; + return best_len; + +#ifdef ONIG_DEBUG + stack_error: + STACK_SAVE; + return ONIGERR_STACK_BUG; +#endif + + bytecode_error: + STACK_SAVE; + return ONIGERR_UNDEFINED_BYTECODE; + + unexpected_bytecode_error: + STACK_SAVE; + return ONIGERR_UNEXPECTED_BYTECODE; +} + + +static UChar* +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *t, *p, *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + if (enc->max_enc_len == enc->min_enc_len) { + int n = enc->max_enc_len; + + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; + } + s += n; + } + return (UChar*)NULL; + } + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + if (target_end == t || memcmp(t, p, target_end - t) == 0) + return s; + } + s += enclen(enc, s, text_end); + } + + return (UChar* )NULL; +} + +static int +str_lower_case_match(OnigEncoding enc, int case_fold_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) +{ + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + while (t < tend) { + lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) return 0; + lowlen--; + } + } + + return 1; +} + +static UChar* +slow_search_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) + return s; + + s += enclen(enc, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *t, *p, *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); + + while (s >= text) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); + + while (s >= text) { + if (str_lower_case_match(enc, case_fold_flag, + target, target_end, s, text_end)) + return s; + + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *p, *end; + const UChar *tail; + int skip, tlen1; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", + (int )text, (int )text_end, (int )text_range); +#endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )s; + + skip = reg->map[*se]; + t = s; + do { + s += enclen(reg->enc, s, end); + } while ((s - t) < skip && s < end); + } + } + else { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )s; + + skip = reg->int_map[*se]; + t = s; + do { + s += enclen(reg->enc, s, end); + } while ((s - t) < skip && s < end); + } + } + + return (UChar* )NULL; +} + +static UChar* +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + + end = text_range + (target_end - target) - 1; + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text + (target_end - target) - 1; + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + s += reg->map[*s]; + } + } + else { /* see int_map[] */ + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + s += reg->int_map[*s]; + } + } + return (UChar* )NULL; +} + +static int +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + int** skip) + +{ + int i, len; + + if (IS_NULL(*skip)) { + *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*skip)) return ONIGERR_MEMORY; + } + + len = end - s; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + (*skip)[i] = len; + + for (i = len - 1; i > 0; i--) + (*skip)[s[i]] = i; + + return 0; +} + +static UChar* +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + const UChar *s, *t, *p; + + s = text_end - (target_end - target); + if (text_start < s) + s = text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + + while (s >= text) { + p = s; + t = target; + while (t < target_end && *p == *t) { + p++; t++; + } + if (t == target_end) + return (UChar* )s; + + s -= reg->int_map_backward[*s]; + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + } + + return (UChar* )NULL; +} + +static UChar* +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range, const UChar* text_end) +{ + const UChar *s = text; + + while (s < text_range) { + if (map[*s]) return (UChar* )s; + + s += enclen(enc, s, text_end); + } + return (UChar* )NULL; +} + +static UChar* +map_search_backward(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* adjust_text, + const UChar* text_start, const UChar* text_end) +{ + const UChar *s = text_start; + + while (s >= text) { + if (map[*s]) return (UChar* )s; + + s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); + } + return (UChar* )NULL; +} + +extern int +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, + OnigOptionType option) +{ + int r; + UChar *prev; + OnigMatchArg msa; + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + + MATCH_ARG_INIT(msa, option, region, at); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + if (region +#ifdef USE_POSIX_API_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + } + else + r = 0; + + if (r == 0) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); + r = match_at(reg, str, end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + end, +#endif + at, prev, &msa); + } + + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + return r; +} + +static int +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) +{ + UChar *p, *pprev = (UChar* )NULL; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", + (int )str, (int )end, (int )s, (int )range); +#endif + + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { + p += reg->dmin; + } + else { + UChar *q = p + reg->dmin; + while (p < q) p += enclen(reg->enc, p, end); + } + } + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); + break; + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search(reg->enc, reg->map, p, range, end); + break; + } + + if (p && p < range) { + if (p - reg->dmin < s) { + retry_gate: + pprev = p; + p += enclen(reg->enc, p, end); + goto retry; + } + + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) + goto retry_gate; + break; + } + } + + if (reg->dmax == 0) { + *low = p; + if (low_prev) { + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p, end); + } + } + else { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, end, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low, end); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low, end); + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + *high = p - reg->dmin; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); +#endif + return 1; /* success */ + } + + return 0; /* fail */ +} + +static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, + int** skip)); + +#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 + +static int +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) +{ + int r; + UChar *p; + + range += reg->dmin; + p = s; + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + exact_method: + p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + if (IS_NULL(reg->int_map_backward)) { + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) + goto exact_method; + + r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, + &(reg->int_map_backward)); + if (r) return r; + } + p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, + end, p); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); + break; + } + + if (p) { + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p, end); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; + } + } + + /* no needs to adjust *high, *high is used as range check only */ + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + *high = p - reg->dmin; + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + (int )(*low - str), (int )(*high - str)); +#endif + return 1; /* success */ + } + + fail: +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: fail.\n"); +#endif + return 0; /* fail */ +} + + +extern int +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + int r; + UChar *s, *prev; + OnigMatchArg msa; + const UChar *orig_start = start; +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar *orig_range = range; +#endif + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + if (region +#ifdef USE_POSIX_API_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + if (r) goto finish_no_msa; + } + + if (start > end || start < str) goto mismatch_no_msa; + + +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#else +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + + /* anchor optimize: resume search range */ + if (reg->anchor != 0 && str < end) { + UChar *min_semi_end, *max_semi_end; + + if (reg->anchor & ANCHOR_BEGIN_POSITION) { + /* search start-position only */ + begin_position: + if (range > start) + range = start + 1; + else + range = start; + } + else if (reg->anchor & ANCHOR_BEGIN_BUF) { + /* search str-position only */ + if (range > start) { + if (start != str) goto mismatch_no_msa; + range = str + 1; + } + else { + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_END_BUF) { + min_semi_end = max_semi_end = (UChar* )end; + + end_buf: + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + + if (range > start) { + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); + else { /* match with empty at end */ + start = onigenc_get_prev_char_head(reg->enc, str, end, end); + } + } + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start >= range) goto mismatch_no_msa; + } + else { + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); + } + if (range > start) goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); + + max_semi_end = (UChar* )end; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } + } + else { + min_semi_end = (UChar* )end; + goto end_buf; + } + } + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + goto begin_position; + } + } + else if (str == end) { /* empty string */ + static const UChar address_for_empty_string[] = ""; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); +#endif + + if (reg->threshold_len == 0) { + start = end = str = address_for_empty_string; + s = (UChar* )start; + prev = (UChar* )NULL; + + MATCH_ARG_INIT(msa, option, region, start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ +#endif + MATCH_AND_RETURN_CHECK(end); + goto mismatch; + } + goto mismatch_no_msa; + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + MATCH_ARG_INIT(msa, option, region, orig_start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + s = (UChar* )start; + if (range > start) { /* forward search */ + if (s > str) + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + else + prev = (UChar* )NULL; + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *sch_range, *low, *high, *low_prev; + + sch_range = (UChar* )range; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } + } + + if ((end - start) < reg->threshold_len) + goto mismatch; + + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } + } while (s < range); + goto mismatch; + } + else { /* check only. */ + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s, end); + } + } while (s < range); + goto mismatch; + } + } + } + + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s, end); + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK(orig_range); + } + } + else { /* backward search */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + if (orig_start < end) + orig_start += enclen(reg->enc, orig_start, end); /* is upper range */ +#endif + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *low, *high, *adjrange, *sch_start; + + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); + else + adjrange = (UChar* )end; + + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; + } + else { /* check only. */ + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start, end); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; + } + } + + do { + prev = onigenc_get_prev_char_head(reg->enc, str, s, end); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } while (s >= range); + } + + mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif + r = ONIG_MISMATCH; + + finish: + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + + /* If result is mismatch and no FIND_NOT_EMPTY option, + then the region is not setted in match_at(). */ + if (IS_FIND_NOT_EMPTY(reg->options) && region +#ifdef USE_POSIX_API_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + onig_region_clear(region); + } + +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + mismatch_no_msa: + r = ONIG_MISMATCH; + finish_no_msa: + ONIG_STATE_DEC_THREAD(reg); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + match: + ONIG_STATE_DEC_THREAD(reg); + MATCH_ARG_FREE(msa); + return s - str; +} + +extern OnigEncoding +onig_get_encoding(regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(regex_t* reg) +{ + return reg->options; +} + +extern OnigCaseFoldType +onig_get_case_fold_flag(regex_t* reg) +{ + return reg->case_fold_flag; +} + +extern const OnigSyntaxType* +onig_get_syntax(regex_t* reg) +{ + return reg->syntax; +} + +extern int +onig_number_of_captures(regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(regex_t* reg) +{ +#ifdef USE_CAPTURE_HISTORY + int i, n; + + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif +} + +extern void +onig_copy_encoding(OnigEncoding to, OnigEncoding from) +{ + *to = *from; +} + diff --git a/regint.h b/regint.h new file mode 100644 index 0000000..fc37f65 --- /dev/null +++ b/regint.h @@ -0,0 +1,842 @@ +#ifndef ONIGURUMA_REGINT_H +#define ONIGURUMA_REGINT_H +/********************************************************************** + regint.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* for debug */ +/* #define ONIG_DEBUG_PARSE_TREE */ +/* #define ONIG_DEBUG_COMPILE */ +/* #define ONIG_DEBUG_SEARCH */ +/* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DONT_OPTIMIZE */ + +/* for byte-code statistical data. */ +/* #define ONIG_DEBUG_STATISTICS */ + +#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ + defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ + defined(ONIG_DEBUG_STATISTICS) +#ifndef ONIG_DEBUG +#define ONIG_DEBUG +#endif +#endif + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + (defined(__ppc__) && defined(__APPLE__)) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ + defined(__mc68020__) +#define PLATFORM_UNALIGNED_WORD_ACCESS +#endif + +/* config */ +/* spec. config */ +#define USE_NAMED_GROUP +#define USE_SUBEXP_CALL +#define USE_BACKREF_WITH_LEVEL /* \k, \k */ +#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ +#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ +#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ +/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ + +/* internal config */ +#define USE_PARSE_TREE_NODE_RECYCLE +#define USE_OP_PUSH_OR_JUMP_EXACT +#define USE_QTFR_PEEK_NEXT +#define USE_ST_LIBRARY +#define USE_SHARED_CCLASS_TABLE + +#define INIT_MATCH_STACK_SIZE 160 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ + +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + +/* */ +/* escape other system UChar definition */ +#ifndef RUBY_DEFINES_H +#include "ruby/ruby.h" +#undef xmalloc +#undef xrealloc +#undef xcalloc +#undef xfree +#endif +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#undef USE_CAPTURE_HISTORY +#define USE_VARIABLE_META_CHARS +#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ +#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ +/* #define USE_MULTI_THREAD_SYSTEM */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ +#define THREAD_ATOMIC_START /* depend on thread system */ +#define THREAD_ATOMIC_END /* depend on thread system */ +#define THREAD_PASS /* depend on thread system */ +#ifndef xmalloc +#define xmalloc malloc +#define xrealloc realloc +#define xcalloc calloc +#define xfree free +#endif + +#ifdef RUBY + +#define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints() +#define onig_st_init_table st_init_table +#define onig_st_init_table_with_size st_init_table_with_size +#define onig_st_init_numtable st_init_numtable +#define onig_st_init_numtable_with_size st_init_numtable_with_size +#define onig_st_init_strtable st_init_strtable +#define onig_st_init_strtable_with_size st_init_strtable_with_size +#define onig_st_delete st_delete +#define onig_st_delete_safe st_delete_safe +#define onig_st_insert st_insert +#define onig_st_lookup st_lookup +#define onig_st_foreach st_foreach +#define onig_st_add_direct st_add_direct +#define onig_st_free_table st_free_table +#define onig_st_cleanup_safe st_cleanup_safe +#define onig_st_copy st_copy +#define onig_st_nothing_key_clone st_nothing_key_clone +#define onig_st_nothing_key_free st_nothing_key_free +#define onig_st_is_member st_is_member + +#define USE_UPPER_CASE_TABLE +#else + +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_lookup onig_st_lookup +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free +/* */ +#define onig_st_is_member st_is_member + +#define CHECK_INTERRUPT_IN_MATCH_AT + +#endif + +#define STATE_CHECK_STRING_THRESHOLD_LEN 7 +#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 + +#define THREAD_PASS_LIMIT_COUNT 8 +#define xmemset memset +#define xmemcpy memcpy +#define xmemmove memmove + +#if defined(_WIN32) && !defined(__GNUC__) +#define xalloca _alloca +#define xvsnprintf _vsnprintf +#else +#define xalloca alloca +#define xvsnprintf vsnprintf +#endif + + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- + +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) +#else +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef HAVE_STDLIB_H +#include +#endif + +#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__)) +#include +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef ONIG_DEBUG +# include +#endif + +#include "regenc.h" + +#ifdef MIN +#undef MIN +#endif +#ifdef MAX +#undef MAX +#endif +#define MIN(a,b) (((a)>(b))?(b):(a)) +#define MAX(a,b) (((a)<(b))?(b):(a)) + +#define IS_NULL(p) (((void*)(p)) == (void*)0) +#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL +#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY +#define NULL_UCHARP ((UChar* )0) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + +#define PLATFORM_GET_INC(val,p,type) do{\ + val = *(type* )p;\ + (p) += sizeof(type);\ +} while(0) + +#else + +#define PLATFORM_GET_INC(val,p,type) do{\ + xmemcpy(&val, (p), sizeof(type));\ + (p) += sizeof(type);\ +} while(0) + +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG + +#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ + (pad_size) = WORD_ALIGNMENT_SIZE \ + - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ + if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ +} while (0) + +#define ALIGNMENT_RIGHT(addr) do {\ + (addr) += (WORD_ALIGNMENT_SIZE - 1);\ + (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ +} while (0) + +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +/* stack pop level */ +#define STACK_POP_LEVEL_FREE 0 +#define STACK_POP_LEVEL_MEM_START 1 +#define STACK_POP_LEVEL_ALL 2 + +/* optimize flags */ +#define ONIG_OPTIMIZE_NONE 0 +#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ +#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ +#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ +#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ +#define ONIG_OPTIMIZE_MAP 5 /* char map */ + +/* bit status */ +typedef unsigned int BitStatusType; + +#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) +#define BIT_STATUS_CLEAR(stats) (stats) = 0 +#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) +#define BIT_STATUS_AT(stats,n) \ + ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) + +#define BIT_STATUS_ON_AT(stats,n) do {\ + if ((n) < (int )BIT_STATUS_BITS_NUM) \ + (stats) |= (1 << (n));\ + else\ + (stats) |= 1;\ +} while (0) + +#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ + if ((n) < (int )BIT_STATUS_BITS_NUM)\ + (stats) |= (1 << (n));\ +} while (0) + + +#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) + +#define DIGITVAL(code) ((code) - '0') +#define ODIGITVAL(code) DIGITVAL(code) +#define XDIGITVAL(enc,code) \ + (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ + : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) + +#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) +#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) +#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) +#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) +#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) +#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) +#define IS_FIND_CONDITION(option) ((option) & \ + (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) +#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) +#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) +#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) + +/* OP_SET_OPTION is required for these options. +#define IS_DYNAMIC_OPTION(option) \ + (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) +*/ +/* ignore-case and multibyte status are included in compiled code. */ +#define IS_DYNAMIC_OPTION(option) 0 + +#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ + ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) + +#define REPEAT_INFINITE -1 +#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) + +/* bitset */ +#define BITS_PER_BYTE 8 +#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) +#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) +#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS +typedef unsigned int Bits; +#else +typedef unsigned char Bits; +#endif +typedef Bits BitSet[BITSET_SIZE]; +typedef Bits* BitSetRef; + +#define SIZE_BITSET sizeof(BitSet) + +#define BITSET_CLEAR(bs) do {\ + int i;\ + for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ +} while (0) + +#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] +#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) + +#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) +#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) +#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) +#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) + +/* bytes buffer */ +typedef struct _BBuf { + UChar* p; + unsigned int used; + unsigned int alloc; +} BBuf; + +#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) + +#define BBUF_SIZE_INC(buf,inc) do{\ + (buf)->alloc += (inc);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_EXPAND(buf,low) do{\ + do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_ENSURE_SIZE(buf,size) do{\ + unsigned int new_alloc = (buf)->alloc;\ + while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ + if ((buf)->alloc != new_alloc) {\ + (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ + (buf)->alloc = new_alloc;\ + }\ +} while (0) + +#define BBUF_WRITE(buf,pos,bytes,n) do{\ + int used = (pos) + (n);\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_WRITE1(buf,pos,byte) do{\ + int used = (pos) + 1;\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + (buf)->p[(pos)] = (byte);\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) +#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) +#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) +#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) + +/* from < to */ +#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ + if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ + if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ + (buf)->used -= (from - to);\ +} while (0) + +#define BBUF_INSERT(buf,pos,bytes,n) do {\ + if (pos >= (buf)->used) {\ + BBUF_WRITE(buf,pos,bytes,n);\ + }\ + else {\ + BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + }\ +} while (0) + +#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] + + +#define ANCHOR_BEGIN_BUF (1<<0) +#define ANCHOR_BEGIN_LINE (1<<1) +#define ANCHOR_BEGIN_POSITION (1<<2) +#define ANCHOR_END_BUF (1<<3) +#define ANCHOR_SEMI_END_BUF (1<<4) +#define ANCHOR_END_LINE (1<<5) + +#define ANCHOR_WORD_BOUND (1<<6) +#define ANCHOR_NOT_WORD_BOUND (1<<7) +#define ANCHOR_WORD_BEGIN (1<<8) +#define ANCHOR_WORD_END (1<<9) +#define ANCHOR_PREC_READ (1<<10) +#define ANCHOR_PREC_READ_NOT (1<<11) +#define ANCHOR_LOOK_BEHIND (1<<12) +#define ANCHOR_LOOK_BEHIND_NOT (1<<13) + +#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ +#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ + +/* operation code */ +enum OpCode { + OP_FINISH = 0, /* matching process terminator (no more alternative) */ + OP_END = 1, /* pattern code terminator (success end) */ + + OP_EXACT1 = 2, /* single byte, N = 1 */ + OP_EXACT2, /* single byte, N = 2 */ + OP_EXACT3, /* single byte, N = 3 */ + OP_EXACT4, /* single byte, N = 4 */ + OP_EXACT5, /* single byte, N = 5 */ + OP_EXACTN, /* single byte */ + OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ + OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ + OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ + OP_EXACTMB2N, /* mb-length = 2 */ + OP_EXACTMB3N, /* mb-length = 3 */ + OP_EXACTMBN, /* other length */ + + OP_EXACT1_IC, /* single byte, N = 1, ignore case */ + OP_EXACTN_IC, /* single byte, ignore case */ + + OP_CCLASS, + OP_CCLASS_MB, + OP_CCLASS_MIX, + OP_CCLASS_NOT, + OP_CCLASS_MB_NOT, + OP_CCLASS_MIX_NOT, + OP_CCLASS_NODE, /* pointer to CClassNode node */ + + OP_ANYCHAR, /* "." */ + OP_ANYCHAR_ML, /* "." multi-line */ + OP_ANYCHAR_STAR, /* ".*" */ + OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ + OP_ANYCHAR_STAR_PEEK_NEXT, + OP_ANYCHAR_ML_STAR_PEEK_NEXT, + + OP_WORD, + OP_NOT_WORD, + OP_WORD_BOUND, + OP_NOT_WORD_BOUND, + OP_WORD_BEGIN, + OP_WORD_END, + + OP_BEGIN_BUF, + OP_END_BUF, + OP_BEGIN_LINE, + OP_END_LINE, + OP_SEMI_END_BUF, + OP_BEGIN_POSITION, + + OP_BACKREF1, + OP_BACKREF2, + OP_BACKREFN, + OP_BACKREFN_IC, + OP_BACKREF_MULTI, + OP_BACKREF_MULTI_IC, + OP_BACKREF_WITH_LEVEL, /* \k, \k */ + + OP_MEMORY_START, + OP_MEMORY_START_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ + OP_MEMORY_END, + OP_MEMORY_END_REC, /* push marker to stack */ + + OP_FAIL, /* pop stack and move */ + OP_JUMP, + OP_PUSH, + OP_POP, + OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ + OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ + OP_REPEAT, /* {n,m} */ + OP_REPEAT_NG, /* {n,m}? (non greedy) */ + OP_REPEAT_INC, + OP_REPEAT_INC_NG, /* non greedy */ + OP_REPEAT_INC_SG, /* search and get in stack */ + OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ + OP_NULL_CHECK_START, /* null loop checker start */ + OP_NULL_CHECK_END, /* null loop checker end */ + OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ + OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ + + OP_PUSH_POS, /* (?=...) start */ + OP_POP_POS, /* (?=...) end */ + OP_PUSH_POS_NOT, /* (?!...) start */ + OP_FAIL_POS, /* (?!...) end */ + OP_PUSH_STOP_BT, /* (?>...) start */ + OP_POP_STOP_BT, /* (?>...) end */ + OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ + OP_PUSH_LOOK_BEHIND_NOT, /* (? */ + OP_RETURN, + + OP_STATE_CHECK_PUSH, /* combination explosion check and push */ + OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ + OP_STATE_CHECK, /* check only */ + OP_STATE_CHECK_ANYCHAR_STAR, + OP_STATE_CHECK_ANYCHAR_ML_STAR, + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + OP_SET_OPTION_PUSH, /* set option and push recover option */ + OP_SET_OPTION /* set option */ +}; + +typedef int RelAddrType; +typedef int AbsAddrType; +typedef int LengthType; +typedef int RepeatNumType; +typedef short int MemNumType; +typedef short int StateCheckNumType; +typedef void* PointerType; + +#define SIZE_OPCODE 1 +#define SIZE_RELADDR sizeof(RelAddrType) +#define SIZE_ABSADDR sizeof(AbsAddrType) +#define SIZE_LENGTH sizeof(LengthType) +#define SIZE_MEMNUM sizeof(MemNumType) +#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) +#define SIZE_REPEATNUM sizeof(RepeatNumType) +#define SIZE_OPTION sizeof(OnigOptionType) +#define SIZE_CODE_POINT sizeof(OnigCodePoint) +#define SIZE_POINTER sizeof(PointerType) + + +#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) +#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) +#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) +#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) +#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) +#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) +#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) +#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) + +/* code point's address must be aligned address. */ +#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) +#define GET_BYTE_INC(byte,p) do{\ + byte = *(p);\ + (p)++;\ +} while(0) + + +/* op-code + arg size */ +#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE +#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) +#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP SIZE_OPCODE +#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_POS SIZE_OPCODE +#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP_POS SIZE_OPCODE +#define SIZE_OP_FAIL_POS SIZE_OPCODE +#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_FAIL SIZE_OPCODE +#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE +#define SIZE_OP_POP_STOP_BT SIZE_OPCODE +#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) +#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) +#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE +#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) +#define SIZE_OP_RETURN SIZE_OPCODE + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#endif + +#define MC_ESC(syn) (syn)->meta_char_table.esc +#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar +#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime +#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time +#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time +#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime + +#define IS_MC_ESC_CODE(code, syn) \ + ((code) == MC_ESC(syn) && \ + !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) + + +#define SYN_POSIX_COMMON_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) + +#define SYN_GNU_REGEX_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) + +#define SYN_GNU_REGEX_BV \ + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + + +#define NCCLASS_FLAGS(cc) ((cc)->flags) +#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) +#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) +#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) + +/* cclass node */ +#define FLAG_NCCLASS_NOT (1<<0) +#define FLAG_NCCLASS_SHARE (1<<1) + +#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) +#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) +#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) + +typedef struct { + int type; + /* struct _Node* next; */ + /* unsigned int flags; */ +} NodeBase; + +typedef struct { + NodeBase base; + unsigned int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; + +typedef long OnigStackIndex; + +typedef struct _OnigStackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + OnigStackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is setted, if this stack type is MEM-START */ + OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ + OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } null_check; +#ifdef USE_SUBEXP_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + } u; +} OnigStackType; + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +#ifdef USE_COMBINATION_EXPLOSION_CHECK + void* state_check_buff; + int state_check_buff_size; +#endif +} OnigMatchArg; + + +#define IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) + +#ifdef ONIG_DEBUG + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OnigOpInfoType; + +extern OnigOpInfoType OnigOpInfo[]; + +extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc)); + +#ifdef ONIG_DEBUG_STATISTICS +extern void onig_statistics_init P_((void)); +extern void onig_print_statistics P_((FILE* f)); +#endif +#endif + +extern UChar* onig_error_code_to_format P_((int code)); +extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); +extern int onig_bbuf_init P_((BBuf* buf, int size)); +extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax)); +extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); +extern void onig_chain_reduce P_((regex_t* reg)); +extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); +extern void onig_transfer P_((regex_t* to, regex_t* from)); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); + +/* strend hash */ +typedef void hash_table_type; +#ifdef RUBY +#include "ruby/st.h" +typedef st_data_t hash_data_type; +#else +typedef unsigned long hash_data_type; +#endif + +extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); +extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); +extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); + +/* encoding property management */ +#define PROPERTY_LIST_ADD_PROP(Name, CR) \ + r = onigenc_property_list_add_property((UChar* )Name, CR,\ + &PropertyNameTable, &PropertyList, &PropertyListNum,\ + &PropertyListSize);\ + if (r != 0) goto end + +#define PROPERTY_LIST_INIT_CHECK \ + if (PropertyInited == 0) {\ + int r = onigenc_property_list_init(init_property_list);\ + if (r != 0) return r;\ + } + +extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); + +typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); + +extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); + +#endif /* ONIGURUMA_REGINT_H */ diff --git a/regparse.c b/regparse.c new file mode 100644 index 0000000..75ad24f --- /dev/null +++ b/regparse.c @@ -0,0 +1,5599 @@ +/* -*- mode:c; c-file-style:"gnu" -*- */ +/********************************************************************** + regparse.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +#define WARN_BUFSIZE 256 + +#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + + +const OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | + ONIG_SYN_OP2_ESC_H_XDIGIT ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; + +extern void onig_null_warn(const char* s ARG_UNUSED) { } + +#ifdef DEFAULT_WARN_FUNCTION +static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; +#else +static OnigWarnFunc onig_warn = onig_null_warn; +#endif + +#ifdef DEFAULT_VERB_WARN_FUNCTION +static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; +#else +static OnigWarnFunc onig_verb_warn = onig_null_warn; +#endif + +extern void onig_set_warn_func(OnigWarnFunc f) +{ + onig_warn = f; +} + +extern void onig_set_verb_warn_func(OnigWarnFunc f) +{ + onig_verb_warn = f; +} + +static void +bbuf_free(BBuf* bbuf) +{ + if (IS_NOT_NULL(bbuf)) { + if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); + xfree(bbuf); + } +} + +static int +bbuf_clone(BBuf** rto, BBuf* from) +{ + int r; + BBuf *to; + + *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_MEMERR(to); + r = BBUF_INIT(to, from->alloc); + if (r != 0) return r; + to->used = from->used; + xmemcpy(to->p, from->p, from->used); + return 0; +} + +#define BACKREF_REL_TO_ABS(rel_no, env) \ + ((env)->num_mem + 1 + (rel_no)) + +#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) + +#define MBCODE_START_POS(enc) \ + (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) + +#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ + add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + +#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ + if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ + r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ + if (r) return r;\ + }\ +} while (0) + + +#define BITSET_IS_EMPTY(bs,empty) do {\ + int i;\ + empty = 1;\ + for (i = 0; i < (int )BITSET_SIZE; i++) {\ + if ((bs)[i] != 0) {\ + empty = 0; break;\ + }\ + }\ +} while (0) + +static void +bitset_set_range(BitSetRef bs, int from, int to) +{ + int i; + for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { + BITSET_SET_BIT(bs, i); + } +} + +#if 0 +static void +bitset_set_all(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } +} +#endif + +static void +bitset_invert(BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } +} + +static void +bitset_invert_to(BitSetRef from, BitSetRef to) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } +} + +static void +bitset_and(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } +} + +static void +bitset_or(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } +} + +static void +bitset_copy(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } +} + +extern int +onig_strncmp(const UChar* s1, const UChar* s2, int n) +{ + int x; + + while (n-- > 0) { + x = *s2++ - *s1++; + if (x) return x; + } + return 0; +} + +extern void +onig_strcpy(UChar* dest, const UChar* src, const UChar* end) +{ + int len = end - src; + if (len > 0) { + xmemcpy(dest, src, len); + dest[len] = (UChar )0; + } +} + +#ifdef USE_NAMED_GROUP +static UChar* +strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) +{ + int slen, term_len, i; + UChar *r; + + slen = end - s; + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} +#endif + +/* scan pattern methods */ +#define PEND_VALUE 0 + +#define PFETCH_READY UChar* pfetch_prev +#define PEND (p < end ? 0 : 1) +#define PUNFETCH p = pfetch_prev +#define PINC do { \ + pfetch_prev = p; \ + p += enclen(enc, p, end); \ +} while (0) +#define PFETCH(c) do { \ + c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \ + pfetch_prev = p; \ + p += enclen(enc, p, end); \ +} while (0) + +#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) +#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) + +static UChar* +strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, + int capa) +{ + UChar* r; + + if (dest) + r = (UChar* )xrealloc(dest, capa + 1); + else + r = (UChar* )xmalloc(capa + 1); + + CHECK_NULL_RETURN(r); + onig_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +/* dest on static area */ +static UChar* +strcat_capa_from_static(UChar* dest, UChar* dest_end, + const UChar* src, const UChar* src_end, int capa) +{ + UChar* r; + + r = (UChar* )xmalloc(capa + 1); + CHECK_NULL_RETURN(r); + onig_strcpy(r, dest, dest_end); + onig_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + + +#ifdef USE_ST_LIBRARY + +#include "ruby/st.h" + +typedef struct { + UChar* s; + UChar* end; +} st_str_end_key; + +static int +str_end_cmp(st_str_end_key* x, st_str_end_key* y) +{ + UChar *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +str_end_hash(st_str_end_key* x) +{ + UChar *p; + int val = 0; + + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + +extern hash_table_type* +onig_st_init_strend_table_with_size(int size) +{ + static const struct st_hash_type hashType = { + str_end_cmp, + str_end_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type *value) +{ + st_str_end_key key; + + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +extern int +onig_st_insert_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type value) +{ + st_str_end_key* key; + int result; + + key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); + key->s = (UChar* )str_key; + key->end = (UChar* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +#endif /* USE_ST_LIBRARY */ + + +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_LIBRARY + +typedef st_table NameTable; +typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ + +#define NAMEBUF_SIZE 24 +#define NAMEBUF_SIZE_1 25 + +#ifdef ONIG_DEBUG +static int +i_print_name_entry(UChar* key, NameEntry* e, void* arg) +{ + int i; + FILE* fp = (FILE* )arg; + + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) + fputs("-", fp); + else if (e->back_num == 1) + fprintf(fp, "%d", e->back_ref1); + else { + for (i = 0; i < e->back_num; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[i]); + } + } + fputs("\n", fp); + return ST_CONTINUE; +} + +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + fprintf(fp, "name table\n"); + onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); + fputs("\n", fp); + } + return 0; +} +#endif /* ONIG_DEBUG */ + +static int +i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) +{ + xfree(e->name); + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); + return ST_DELETE; +} + +static int +names_clear(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_name_entry, 0); + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) onig_st_free_table(t); + reg->name_table = (void* )NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, const UChar* name, const UChar* name_end) +{ + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + e = (NameEntry* )NULL; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +typedef struct { + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; + int ret; + OnigEncoding enc; +} INamesArg; + +static int +i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) +{ + int r = (*(arg->func))(e->name, + e->name + e->name_len, + e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); + if (r != 0) { + arg->ret = r; + return ST_STOP; + } + return ST_CONTINUE; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + INamesArg narg; + NameTable* t = (NameTable* )reg->name_table; + + narg.ret = 0; + if (IS_NOT_NULL(t)) { + narg.func = func; + narg.reg = reg; + narg.arg = arg; + narg.enc = reg->enc; /* should be pattern encoding. */ + onig_st_foreach(t, i_names, (HashDataType )&narg); + } + return narg.ret; +} + +static int +i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) +{ + int i; + + if (e->back_num > 1) { + for (i = 0; i < e->back_num; i++) { + e->back_refs[i] = map[e->back_refs[i]].new_val; + } + } + else if (e->back_num == 1) { + e->back_ref1 = map[e->back_ref1].new_val; + } + + return ST_CONTINUE; +} + +extern int +onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_renumber_name, (HashDataType )map); + } + return 0; +} + + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num_entries; + else + return 0; +} + +#else /* USE_ST_LIBRARY */ + +#define INIT_NAMES_ALLOC_NUM 8 + +typedef struct { + NameEntry* e; + int num; + int alloc; +} NameTable; + +#ifdef ONIG_DEBUG +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + int i, j; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t) && t->num > 0) { + fprintf(fp, "name table\n"); + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) { + fputs("-", fp); + } + else if (e->back_num == 1) { + fprintf(fp, "%d", e->back_ref1); + } + else { + for (j = 0; j < e->back_num; j++) { + if (j > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[j]); + } + } + fputs("\n", fp); + } + fputs("\n", fp); + } + return 0; +} +#endif + +static int +names_clear(regex_t* reg) +{ + int i; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->back_num = 0; + e->back_alloc = 0; + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + e->back_refs = (int* )NULL; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) xfree(t); + reg->name_table = NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, UChar* name, UChar* name_end) +{ + int i, len; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (NameEntry* )NULL; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + int i, r; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + r = (*func)(e->name, e->name + e->name_len, e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); + if (r != 0) return r; + } + } + return 0; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num; + else + return 0; +} + +#endif /* else USE_ST_LIBRARY */ + +static int +name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) +{ + int alloc; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (name_end - name <= 0) + return ONIGERR_EMPTY_GROUP_NAME; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_LIBRARY + if (IS_NULL(t)) { + t = onig_st_init_strend_table_with_size(5); + reg->name_table = (void* )t; + } + e = (NameEntry* )xmalloc(sizeof(NameEntry)); + CHECK_NULL_RETURN_MEMERR(e); + + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) { + xfree(e); + return ONIGERR_MEMORY; + } + onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + + e->name_len = name_end - name; + e->back_num = 0; + e->back_alloc = 0; + e->back_refs = (int* )NULL; + +#else + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (NameTable* )xmalloc(sizeof(NameTable)); + CHECK_NULL_RETURN_MEMERR(t); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + reg->name_table = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(t->e); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].back_num = 0; + t->e[i].back_alloc = 0; + t->e[i].back_refs = (int* )NULL; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = strdup_with_null(reg->enc, name, name_end); + e->name_len = name_end - name; +#endif + } + + if (e->back_num >= 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, + name, name_end); + return ONIGERR_MULTIPLEX_DEFINED_NAME; + } + + e->back_num++; + if (e->back_num == 1) { + e->back_ref1 = backref; + } + else { + if (e->back_num == 2) { + alloc = INIT_NAME_BACKREFS_ALLOC_NUM; + e->back_refs = (int* )xmalloc(sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(e->back_refs); + e->back_alloc = alloc; + e->back_refs[0] = e->back_ref1; + e->back_refs[1] = backref; + } + else { + if (e->back_num > e->back_alloc) { + alloc = e->back_alloc * 2; + e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(e->back_refs); + e->back_alloc = alloc; + } + e->back_refs[e->back_num - 1] = backref; + } + } + + return 0; +} + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + NameEntry* e = name_find(reg, name, name_end); + + if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; + + switch (e->back_num) { + case 0: + *nums = 0; + break; + case 1: + *nums = &(e->back_ref1); + break; + default: + *nums = e->back_refs; + break; + } + return e->back_num; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion *region) +{ + int i, n, *nums; + + n = onig_name_to_group_numbers(reg, name, name_end, &nums); + if (n < 0) + return n; + else if (n == 0) + return ONIGERR_PARSER_BUG; + else if (n == 1) + return nums[0]; + else { + if (IS_NOT_NULL(region)) { + for (i = n - 1; i >= 0; i--) { + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; + } + } + return nums[n - 1]; + } +} + +#else /* USE_NAMED_GROUP */ + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion* region) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + return 0; +} +#endif /* else USE_NAMED_GROUP */ + +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + + +#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 + +static void +scan_env_clear(ScanEnv* env) +{ + int i; + + BIT_STATUS_CLEAR(env->capture_history); + BIT_STATUS_CLEAR(env->bt_mem_start); + BIT_STATUS_CLEAR(env->bt_mem_end); + BIT_STATUS_CLEAR(env->backrefed_mem); + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + env->num_mem = 0; +#ifdef USE_NAMED_GROUP + env->num_named = 0; +#endif + env->mem_alloc = 0; + env->mem_nodes_dynamic = (Node** )NULL; + + for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) + env->mem_nodes_static[i] = NULL_NODE; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + env->num_comb_exp_check = 0; + env->comb_exp_max_regnum = 0; + env->curr_max_regnum = 0; + env->has_recursion = 0; +#endif +} + +static int +scan_env_add_mem_entry(ScanEnv* env) +{ + int i, need, alloc; + Node** p; + + need = env->num_mem + 1; + if (need >= SCANENV_MEMNODES_SIZE) { + if (env->mem_alloc <= need) { + if (IS_NULL(env->mem_nodes_dynamic)) { + alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; + p = (Node** )xmalloc(sizeof(Node*) * alloc); + xmemcpy(p, env->mem_nodes_static, + sizeof(Node*) * SCANENV_MEMNODES_SIZE); + } + else { + alloc = env->mem_alloc * 2; + p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + } + CHECK_NULL_RETURN_MEMERR(p); + + for (i = env->num_mem + 1; i < alloc; i++) + p[i] = NULL_NODE; + + env->mem_nodes_dynamic = p; + env->mem_alloc = alloc; + } + } + + env->num_mem++; + return env->num_mem; +} + +static int +scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +{ + if (env->num_mem >= num) + SCANENV_MEM_NODES(env)[num] = node; + else + return ONIGERR_PARSER_BUG; + return 0; +} + + +#ifdef USE_PARSE_TREE_NODE_RECYCLE +typedef struct _FreeNode { + struct _FreeNode* next; +} FreeNode; + +static FreeNode* FreeNodeList = (FreeNode* )NULL; +#endif + +extern void +onig_node_free(Node* node) +{ + start: + if (IS_NULL(node)) return ; + + switch (NTYPE(node)) { + case NT_STR: + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); + } + break; + + case NT_LIST: + case NT_ALT: + onig_node_free(NCAR(node)); + { + Node* next_node = NCDR(node); + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif + node = next_node; + goto start; + } + break; + + case NT_CCLASS: + { + CClassNode* cc = NCCLASS(node); + + if (IS_NCCLASS_SHARE(cc)) return ; + if (cc->mbuf) + bbuf_free(cc->mbuf); + } + break; + + case NT_QTFR: + if (NQTFR(node)->target) + onig_node_free(NQTFR(node)->target); + break; + + case NT_ENCLOSE: + if (NENCLOSE(node)->target) + onig_node_free(NENCLOSE(node)->target); + break; + + case NT_BREF: + if (IS_NOT_NULL(NBREF(node)->back_dynamic)) + xfree(NBREF(node)->back_dynamic); + break; + + case NT_ANCHOR: + if (NANCHOR(node)->target) + onig_node_free(NANCHOR(node)->target); + break; + } + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif +} + +#ifdef USE_PARSE_TREE_NODE_RECYCLE +extern int +onig_free_node_list(void) +{ + FreeNode* n; + + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { + n = FreeNodeList; + FreeNodeList = FreeNodeList->next; + xfree(n); + } + /* THREAD_ATOMIC_END; */ + return 0; +} +#endif + +static Node* +node_new(void) +{ + Node* node; + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + THREAD_ATOMIC_START; + if (IS_NOT_NULL(FreeNodeList)) { + node = (Node* )FreeNodeList; + FreeNodeList = FreeNodeList->next; + THREAD_ATOMIC_END; + return node; + } + THREAD_ATOMIC_END; +#endif + + node = (Node* )xmalloc(sizeof(Node)); + /* xmemset(node, 0, sizeof(Node)); */ + return node; +} + + +static void +initialize_cclass(CClassNode* cc) +{ + BITSET_CLEAR(cc->bs); + /* cc->base.flags = 0; */ + cc->flags = 0; + cc->mbuf = NULL; +} + +static Node* +node_new_cclass(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CCLASS); + initialize_cclass(NCCLASS(node)); + return node; +} + +static Node* +node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, + const OnigCodePoint ranges[]) +{ + int n, i; + CClassNode* cc; + OnigCodePoint j; + + Node* node = node_new_cclass(); + CHECK_NULL_RETURN(node); + + cc = NCCLASS(node); + if (not != 0) NCCLASS_SET_NOT(cc); + + BITSET_CLEAR(cc->bs); + if (sb_out > 0 && IS_NOT_NULL(ranges)) { + n = ONIGENC_CODE_RANGE_NUM(ranges); + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); + j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { + if (j >= sb_out) goto sb_end; + + BITSET_SET_BIT(cc->bs, j); + } + } + } + + sb_end: + if (IS_NULL(ranges)) { + is_null: + cc->mbuf = NULL; + } + else { + BBuf* bbuf; + + n = ONIGENC_CODE_RANGE_NUM(ranges); + if (n == 0) goto is_null; + + bbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN(bbuf); + bbuf->alloc = n + 1; + bbuf->used = n + 1; + bbuf->p = (UChar* )((void* )ranges); + + cc->mbuf = bbuf; + } + + return node; +} + +static Node* +node_new_ctype(int type, int not) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CTYPE); + NCTYPE(node)->ctype = type; + NCTYPE(node)->not = not; + return node; +} + +static Node* +node_new_anychar(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CANY); + return node; +} + +static Node* +node_new_list(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_LIST); + NCAR(node) = left; + NCDR(node) = right; + return node; +} + +extern Node* +onig_node_new_list(Node* left, Node* right) +{ + return node_new_list(left, right); +} + +extern Node* +onig_node_list_add(Node* list, Node* x) +{ + Node *n; + + n = onig_node_new_list(x, NULL); + if (IS_NULL(n)) return NULL_NODE; + + if (IS_NOT_NULL(list)) { + while (IS_NOT_NULL(NCDR(list))) + list = NCDR(list); + + NCDR(list) = n; + } + + return n; +} + +extern Node* +onig_node_new_alt(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ALT); + NCAR(node) = left; + NCDR(node) = right; + return node; +} + +extern Node* +onig_node_new_anchor(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ANCHOR); + NANCHOR(node)->type = type; + NANCHOR(node)->target = NULL; + NANCHOR(node)->char_len = -1; + return node; +} + +static Node* +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) +{ + int i; + Node* node = node_new(); + + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_BREF); + NBREF(node)->state = 0; + NBREF(node)->back_num = back_num; + NBREF(node)->back_dynamic = (int* )NULL; + if (by_name != 0) + NBREF(node)->state |= NST_NAME_REF; + +#ifdef USE_BACKREF_WITH_LEVEL + if (exist_level != 0) { + NBREF(node)->state |= NST_NEST_LEVEL; + NBREF(node)->nest_level = nest_level; + } +#endif + + for (i = 0; i < back_num; i++) { + if (backrefs[i] <= env->num_mem && + IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { + NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ + break; + } + } + + if (back_num <= NODE_BACKREFS_SIZE) { + for (i = 0; i < back_num; i++) + NBREF(node)->back_static[i] = backrefs[i]; + } + else { + int* p = (int* )xmalloc(sizeof(int) * back_num); + if (IS_NULL(p)) { + onig_node_free(node); + return NULL; + } + NBREF(node)->back_dynamic = p; + for (i = 0; i < back_num; i++) + p[i] = backrefs[i]; + } + return node; +} + +#ifdef USE_SUBEXP_CALL +static Node* +node_new_call(UChar* name, UChar* name_end, int gnum) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CALL); + NCALL(node)->state = 0; + NCALL(node)->target = NULL_NODE; + NCALL(node)->name = name; + NCALL(node)->name_end = name_end; + NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ + return node; +} +#endif + +static Node* +node_new_quantifier(int lower, int upper, int by_number) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_QTFR); + NQTFR(node)->state = 0; + NQTFR(node)->target = NULL; + NQTFR(node)->lower = lower; + NQTFR(node)->upper = upper; + NQTFR(node)->greedy = 1; + NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQTFR(node)->head_exact = NULL_NODE; + NQTFR(node)->next_head_exact = NULL_NODE; + NQTFR(node)->is_refered = 0; + if (by_number != 0) + NQTFR(node)->state |= NST_BY_NUMBER; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + NQTFR(node)->comb_exp_check_num = 0; +#endif + + return node; +} + +static Node* +node_new_enclose(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ENCLOSE); + NENCLOSE(node)->type = type; + NENCLOSE(node)->state = 0; + NENCLOSE(node)->regnum = 0; + NENCLOSE(node)->option = 0; + NENCLOSE(node)->target = NULL; + NENCLOSE(node)->call_addr = -1; + NENCLOSE(node)->opt_count = 0; + return node; +} + +extern Node* +onig_node_new_enclose(int type) +{ + return node_new_enclose(type); +} + +static Node* +node_new_enclose_memory(OnigOptionType option, int is_named) +{ + Node* node = node_new_enclose(ENCLOSE_MEMORY); + CHECK_NULL_RETURN(node); + if (is_named != 0) + SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); + +#ifdef USE_SUBEXP_CALL + NENCLOSE(node)->option = option; +#endif + return node; +} + +static Node* +node_new_option(OnigOptionType option) +{ + Node* node = node_new_enclose(ENCLOSE_OPTION); + CHECK_NULL_RETURN(node); + NENCLOSE(node)->option = option; + return node; +} + +extern int +onig_node_str_cat(Node* node, const UChar* s, const UChar* end) +{ + int addlen = end - s; + + if (addlen > 0) { + int len = NSTR(node)->end - NSTR(node)->s; + + if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + UChar* p; + int capa = len + addlen + NODE_STR_MARGIN; + + if (capa <= NSTR(node)->capa) { + onig_strcpy(NSTR(node)->s + len, s, end); + } + else { + if (NSTR(node)->s == NSTR(node)->buf) + p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, + s, end, capa); + else + p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); + + CHECK_NULL_RETURN_MEMERR(p); + NSTR(node)->s = p; + NSTR(node)->capa = capa; + } + } + else { + onig_strcpy(NSTR(node)->s + len, s, end); + } + NSTR(node)->end = NSTR(node)->s + len + addlen; + } + + return 0; +} + +extern int +onig_node_str_set(Node* node, const UChar* s, const UChar* end) +{ + onig_node_str_clear(node); + return onig_node_str_cat(node, s, end); +} + +static int +node_str_cat_char(Node* node, UChar c) +{ + UChar s[1]; + + s[0] = c; + return onig_node_str_cat(node, s, s + 1); +} + +extern void +onig_node_conv_to_str_node(Node* node, int flag) +{ + SET_NTYPE(node, NT_STR); + NSTR(node)->flag = flag; + NSTR(node)->capa = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; +} + +extern void +onig_node_str_clear(Node* node) +{ + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); + } + + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; +} + +static Node* +node_new_str(const UChar* s, const UChar* end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_STR); + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; + if (onig_node_str_cat(node, s, end)) { + onig_node_free(node); + return NULL; + } + return node; +} + +extern Node* +onig_node_new_str(const UChar* s, const UChar* end) +{ + return node_new_str(s, end); +} + +static Node* +node_new_str_raw(UChar* s, UChar* end) +{ + Node* node = node_new_str(s, end); + NSTRING_SET_RAW(node); + return node; +} + +static Node* +node_new_empty(void) +{ + return node_new_str(NULL, NULL); +} + +static Node* +node_new_str_raw_char(UChar c) +{ + UChar p[1]; + + p[0] = c; + return node_new_str_raw(p, p + 1); +} + +static Node* +str_node_split_last_char(StrNode* sn, OnigEncoding enc) +{ + const UChar *p; + Node* n = NULL_NODE; + + if (sn->end > sn->s) { + p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end); + if (p && p > sn->s) { /* can be splitted. */ + n = node_new_str(p, sn->end); + if ((sn->flag & NSTR_RAW) != 0) + NSTRING_SET_RAW(n); + sn->end = (UChar* )p; + } + } + return n; +} + +static int +str_node_can_be_split(StrNode* sn, OnigEncoding enc) +{ + if (sn->end > sn->s) { + return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); + } + return 0; +} + +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR +static int +node_str_head_pad(StrNode* sn, int num, UChar val) +{ + UChar buf[NODE_STR_BUF_SIZE]; + int i, len; + + len = sn->end - sn->s; + onig_strcpy(buf, sn->s, sn->end); + onig_strcpy(&(sn->s[num]), buf, buf + len); + sn->end += num; + + for (i = 0; i < num; i++) { + sn->s[i] = val; + } +} +#endif + +extern int +onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) +{ + unsigned int num, val; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + val = (unsigned int )DIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 10UL < num) + return -1; /* overflow */ + + num = num * 10 + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + val = (unsigned int )XDIGITVAL(enc,c); + if ((INT_MAX_LIMIT - val) / 16UL < num) + return -1; /* overflow */ + + num = (num << 4) + XDIGITVAL(enc,c); + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { + val = ODIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 8UL < num) + return -1; /* overflow */ + + num = (num << 3) + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + + +#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ + BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) + +/* data format: + [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] + (all data size is OnigCodePoint) + */ +static int +new_code_range(BBuf** pbuf) +{ +#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) + int r; + OnigCodePoint n; + BBuf* bbuf; + + bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_MEMERR(*pbuf); + r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r) return r; + + n = 0; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + return 0; +} + +static int +add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) +{ + int r, inc_n, pos; + int low, high, bound, x; + OnigCodePoint n, *data; + BBuf* bbuf; + + if (from > to) { + n = from; from = to; to = n; + } + + if (IS_NULL(*pbuf)) { + r = new_code_range(pbuf); + if (r) return r; + bbuf = *pbuf; + n = 0; + } + else { + bbuf = *pbuf; + GET_CODE_POINT(n, bbuf->p); + } + data = (OnigCodePoint* )(bbuf->p); + data++; + + for (low = 0, bound = n; low < bound; ) { + x = (low + bound) >> 1; + if (from > data[x*2 + 1]) + low = x + 1; + else + bound = x; + } + + for (high = low, bound = n; high < bound; ) { + x = (high + bound) >> 1; + if (to >= data[x*2] - 1) + high = x + 1; + else + bound = x; + } + + inc_n = low + 1 - high; + if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) + return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; + + if (inc_n != 1) { + if (from > data[low*2]) + from = data[low*2]; + if (to < data[(high - 1)*2 + 1]) + to = data[(high - 1)*2 + 1]; + } + + if (inc_n != 0 && (OnigCodePoint )high < n) { + int from_pos = SIZE_CODE_POINT * (1 + high * 2); + int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); + int size = (n - high) * 2 * SIZE_CODE_POINT; + + if (inc_n > 0) { + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + } + else { + BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); + } + } + + pos = SIZE_CODE_POINT * (1 + low * 2); + BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BBUF_WRITE_CODE_POINT(bbuf, pos, from); + BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); + n += inc_n; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + + return 0; +} + +static int +add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) +{ + if (from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + return 0; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + + return add_code_range_to_buf(pbuf, from, to); +} + +static int +not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) +{ + int r, i, n; + OnigCodePoint pre, from, *data, to = 0; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf)) { + set_all: + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + + data = (OnigCodePoint* )(bbuf->p); + GET_CODE_POINT(n, data); + data++; + if (n <= 0) goto set_all; + + r = 0; + pre = MBCODE_START_POS(enc); + for (i = 0; i < n; i++) { + from = data[i*2]; + to = data[i*2+1]; + if (pre <= from - 1) { + r = add_code_range_to_buf(pbuf, pre, from - 1); + if (r != 0) return r; + } + if (to == ~((OnigCodePoint )0)) break; + pre = to + 1; + } + if (to < ~((OnigCodePoint )0)) { + r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + } + return r; +} + +#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ + BBuf *tbuf; \ + int tnot; \ + tnot = not1; not1 = not2; not2 = tnot; \ + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ +} while (0) + +static int +or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, + BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, n1, *data1; + OnigCodePoint from, to; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { + if (not1 != 0 || not2 != 0) + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + return 0; + } + + r = 0; + if (IS_NULL(bbuf2)) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + if (IS_NULL(bbuf1)) { + if (not1 != 0) { + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + else { + if (not2 == 0) { + return bbuf_clone(pbuf, bbuf2); + } + else { + return not_code_range_buf(enc, bbuf2, pbuf); + } + } + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + GET_CODE_POINT(n1, data1); + data1++; + + if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ + r = bbuf_clone(pbuf, bbuf2); + } + else if (not1 == 0) { /* 1 OR (not 2) */ + r = not_code_range_buf(enc, bbuf2, pbuf); + } + if (r != 0) return r; + + for (i = 0; i < n1; i++) { + from = data1[i*2]; + to = data1[i*2+1]; + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, + OnigCodePoint* data, int n) +{ + int i, r; + OnigCodePoint from2, to2; + + for (i = 0; i < n; i++) { + from2 = data[i*2]; + to2 = data[i*2+1]; + if (from2 < from1) { + if (to2 < from1) continue; + else { + from1 = to2 + 1; + } + } + else if (from2 <= to1) { + if (to2 < to1) { + if (from1 <= from2 - 1) { + r = add_code_range_to_buf(pbuf, from1, from2-1); + if (r != 0) return r; + } + from1 = to2 + 1; + } + else { + to1 = from2 - 1; + } + } + else { + from1 = from2; + } + if (from1 > to1) break; + } + if (from1 <= to1) { + r = add_code_range_to_buf(pbuf, from1, to1); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, j, n1, n2, *data1, *data2; + OnigCodePoint from, to, from1, to1, from2, to2; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1)) { + if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ + return bbuf_clone(pbuf, bbuf2); + return 0; + } + else if (IS_NULL(bbuf2)) { + if (not2 != 0) + return bbuf_clone(pbuf, bbuf1); + return 0; + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + data2 = (OnigCodePoint* )(bbuf2->p); + GET_CODE_POINT(n1, data1); + GET_CODE_POINT(n2, data2); + data1++; + data2++; + + if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + for (j = 0; j < n2; j++) { + from2 = data2[j*2]; + to2 = data2[j*2+1]; + if (from2 > to1) break; + if (to2 < from1) continue; + from = MAX(from1, from2); + to = MIN(to1, to2); + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + } + } + else if (not1 == 0) { /* 1 AND (not 2) */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + r = and_code_range1(pbuf, from1, to1, data2, n2); + if (r != 0) return r; + } + } + + return 0; +} + +static int +and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_NCCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_NCCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_and(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); + } + else { + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + return 0; +} + +static int +or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_NCCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_NCCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_or(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); + } + else { + r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + else + return 0; +} + +static int +conv_backslash_value(int c, ScanEnv* env) +{ + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; + case 'v': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) + return '\v'; + break; + + default: + break; + } + } + return c; +} + +#if 0 /* no invalid quantifier */ +static int +is_invalid_quantifier_target(Node* node) +{ + switch (NTYPE(node)) { + case NT_ANCHOR: + return 1; + break; + + case NT_ENCLOSE: + /* allow enclosed elements */ + /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ + break; + + case NT_LIST: + do { + if (! is_invalid_quantifier_target(NCAR(node))) return 0; + } while (IS_NOT_NULL(node = NCDR(node))); + return 0; + break; + + case NT_ALT: + do { + if (is_invalid_quantifier_target(NCAR(node))) return 1; + } while (IS_NOT_NULL(node = NCDR(node))); + break; + + default: + break; + } + return 0; +} +#else +#define is_invalid_quantifier_target(node) 0 +#endif + +/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ +static int +popular_quantifier_num(QtfrNode* q) +{ + if (q->greedy) { + if (q->lower == 0) { + if (q->upper == 1) return 0; + else if (IS_REPEAT_INFINITE(q->upper)) return 1; + } + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 2; + } + } + else { + if (q->lower == 0) { + if (q->upper == 1) return 3; + else if (IS_REPEAT_INFINITE(q->upper)) return 4; + } + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 5; + } + } + return -1; +} + + +enum ReduceType { + RQ_ASIS = 0, /* as is */ + RQ_DEL = 1, /* delete parent */ + RQ_A, /* to '*' */ + RQ_AQ, /* to '*?' */ + RQ_QQ, /* to '??' */ + RQ_P_QQ, /* to '+)??' */ + RQ_PQ_Q /* to '+?)?' */ +}; + +static enum ReduceType const ReduceTypeTable[6][6] = { + {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ + {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ + {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ + {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ +}; + +extern void +onig_reduce_nested_quantifier(Node* pnode, Node* cnode) +{ + int pnum, cnum; + QtfrNode *p, *c; + + p = NQTFR(pnode); + c = NQTFR(cnode); + pnum = popular_quantifier_num(p); + cnum = popular_quantifier_num(c); + if (pnum < 0 || cnum < 0) return ; + + switch(ReduceTypeTable[cnum][pnum]) { + case RQ_DEL: + *pnode = *cnode; + break; + case RQ_A: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; + break; + case RQ_AQ: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; + break; + case RQ_QQ: + p->target = c->target; + p->lower = 0; p->upper = 1; p->greedy = 0; + break; + case RQ_P_QQ: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 0; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; + return ; + break; + case RQ_PQ_Q: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 1; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; + return ; + break; + case RQ_ASIS: + p->target = cnode; + return ; + break; + } + + c->target = NULL_NODE; + onig_node_free(cnode); +} + + +enum TokenSyms { + TK_EOT = 0, /* end of token */ + TK_RAW_BYTE = 1, + TK_CHAR, + TK_STRING, + TK_CODE_POINT, + TK_ANYCHAR, + TK_CHAR_TYPE, + TK_BACKREF, + TK_CALL, + TK_ANCHOR, + TK_OP_REPEAT, + TK_INTERVAL, + TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ + TK_ALT, + TK_SUBEXP_OPEN, + TK_SUBEXP_CLOSE, + TK_CC_OPEN, + TK_QUOTE_OPEN, + TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + /* in cc */ + TK_CC_CLOSE, + TK_CC_RANGE, + TK_POSIX_BRACKET_OPEN, + TK_CC_AND, /* && */ + TK_CC_CC_OPEN /* [ */ +}; + +typedef struct { + enum TokenSyms type; + int escaped; + int base; /* is number: 8, 16 (used in [....]) */ + UChar* backp; + union { + UChar* s; + int c; + OnigCodePoint code; + int anchor; + int subtype; + struct { + int lower; + int upper; + int greedy; + int possessive; + } repeat; + struct { + int num; + int ref1; + int* refs; + int by_name; +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level; + int level; /* \k */ +#endif + } backref; + struct { + UChar* name; + UChar* name_end; + int gnum; + } call; + struct { + int ctype; + int not; + } prop; + } u; +} OnigToken; + + +static int +fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +{ + int low, up, syn_allow, non_low = 0; + int r = 0; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); + + if (PEND) { + if (syn_allow) + return 1; /* "....{" : OK! */ + else + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ + } + + if (! syn_allow) { + c = PPEEK; + if (c == ')' || c == '(' || c == '|') { + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; + } + } + + low = onig_scan_unsigned_number(&p, end, env->enc); + if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (low > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == *src) { /* can't read low */ + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { + /* allow {,n} as {0,n} */ + low = 0; + non_low = 1; + } + else + goto invalid; + } + + if (PEND) goto invalid; + PFETCH(c); + if (c == ',') { + UChar* prev = p; + up = onig_scan_unsigned_number(&p, end, env->enc); + if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (up > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == prev) { + if (non_low != 0) + goto invalid; + up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } + } + else { + if (non_low != 0) + goto invalid; + + PUNFETCH; + up = low; /* {n} : exact n times */ + r = 2; /* fixed */ + } + + if (PEND) goto invalid; + PFETCH(c); + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { + if (c != MC_ESC(env->syntax)) goto invalid; + PFETCH(c); + } + if (c != '}') goto invalid; + + if (!IS_REPEAT_INFINITE(up) && low > up) { + return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; + } + + tok->type = TK_INTERVAL; + tok->u.repeat.lower = low; + tok->u.repeat.upper = up; + *src = p; + return r; /* 0: normal {n,m}, 2: fixed {n} */ + + invalid: + if (syn_allow) + return 1; /* OK */ + else + return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; +} + +/* \M-, \C-, \c, or \... */ +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) +{ + int v; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + switch (c) { + case 'M': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c != '-') return ONIGERR_META_CODE_SYNTAX; + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c = ((c & 0xff) | 0x80); + } + else + goto backslash; + break; + + case 'C': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; + goto control; + } + else + goto backslash; + + case 'c': + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { + control: + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c == '?') { + c = 0177; + } + else { + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c &= 0x9f; + } + break; + } + /* fall through */ + + default: + { + backslash: + c = conv_backslash_value(c, env); + } + break; + } + + *src = p; + return c; +} + +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); + +static OnigCodePoint +get_name_end_code_point(OnigCodePoint start) +{ + switch (start) { + case '<': return (OnigCodePoint )'>'; break; + case '\'': return (OnigCodePoint )'\''; break; + default: + break; + } + + return (OnigCodePoint )0; +} + +#ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_WITH_LEVEL +/* + \k, \k + \k, \k + \k<-num+n>, \k<-num-n> +*/ +static int +fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel) +{ + int r, sign, is_num, exist_level; + OnigCodePoint end_code; + OnigCodePoint c = 0; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + is_num = exist_level = 0; + sign = 1; + pnum_head = *src; + + end_code = get_name_end_code_point(start_code); + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')' || c == '+' || c == '-') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } + + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (r == 0 && c != end_code) { + if (c == '+' || c == '-') { + int level; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + level = onig_scan_unsigned_number(&p, end, enc); + if (level < 0) return ONIGERR_TOO_BIG_NUMBER; + *rlevel = (level * flag); + exist_level = 1; + + PFETCH(c); + if (c == end_code) + goto end; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + end: + if (r == 0) { + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) goto err; + + *rback_num *= sign; + } + + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_WITH_LEVEL */ + +/* + def: 0 -> define name (don't allow number name) + 1 -> reference name (allow number name) +*/ +static int +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) +{ + int r, is_num, sign; + OnigCodePoint end_code; + OnigCodePoint c = 0; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + name_end = end; + pnum_head = *src; + r = 0; + is_num = 0; + sign = 1; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (ref == 1) + is_num = 1; + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (c == '-') { + if (ref == 1) { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (r == 0) { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } + + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + + is_num = 0; + } + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + } + + if (c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + + *rback_num *= sign; + } + + *rname_end = name_end; + *src = p; + return 0; + } + else { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') + break; + } + if (PEND) + name_end = end; + + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#else +static int +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) +{ + int r, is_num, sign; + OnigCodePoint end_code; + OnigCodePoint c = 0; + UChar *name_end; + OnigEncoding enc = env->enc; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + *rname_end = name_end = end; + r = 0; + pnum_head = *src; + is_num = 0; + sign = 1; + + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + + PFETCH(c); + if (c == end_code || c == ')') break; + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + if (r == 0 && c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (r == 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + *rback_num *= sign; + + *rname_end = name_end; + *src = p; + return 0; + } + else { + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_NAMED_GROUP */ + +static void +CC_ESC_WARN(ScanEnv* env, UChar *c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { + UChar buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"character class has '%s' without escape", c); + (*onig_warn)((char* )buf); + } +} + +static void +CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { + UChar buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, + (env)->pattern, (env)->pattern_end, + (UChar* )"regular expression has '%s' without escape", c); + (*onig_warn)((char* )buf); + } +} + +static UChar* +find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, + UChar **next, OnigEncoding enc) +{ + int i; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + while (p < to) { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enclen(enc, p, to); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q, to); + } + if (i >= n) { + if (IS_NOT_NULL(next)) + *next = q; + return p; + } + } + p = q; + } + return NULL_UCHARP; +} + +static int +str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, + OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn) +{ + int i, in_esc; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + in_esc = 0; + while (p < to) { + if (in_esc) { + in_esc = 0; + p += enclen(enc, p, to); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enclen(enc, p, to); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q, to); + } + if (i >= n) return 1; + p += enclen(enc, p, to); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + if (x == bad) return 0; + else if (x == MC_ESC(syn)) in_esc = 1; + p = q; + } + } + } + return 0; +} + +static int +fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int num; + OnigCodePoint c, c2; + const OnigSyntaxType* syn = env->syntax; + OnigEncoding enc = env->enc; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + PFETCH(c); + tok->type = TK_CHAR; + tok->base = 0; + tok->u.c = c; + tok->escaped = 0; + + if (c == ']') { + tok->type = TK_CC_CLOSE; + } + else if (c == '-') { + tok->type = TK_CC_RANGE; + } + else if (c == MC_ESC(syn)) { + if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) + goto end; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + tok->escaped = 1; + tok->u.c = c; + switch (c) { + case 'w': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 0; + break; + case 'W': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 1; + break; + case 'd': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 0; + break; + case 'D': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 1; + break; + case 's': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 0; + break; + case 'S': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 1; + break; + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; + break; + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; + break; + + case 'p': + case 'P': + c2 = PPEEK; + if (c2 == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '0': + case '1': case '2': case '3': case '4': case '5': case '6': case '7': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + PUNFETCH; + prev = p; + num = scan_unsigned_octal_number(&p, end, 3, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + if (tok->u.c != num) { + tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + } + break; + } + } + else if (c == '[') { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { + OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; + tok->backp = p; /* point at '[' is readed */ + PINC; + if (str_exist_check_with_esc(send, 2, p, end, + (OnigCodePoint )']', enc, syn)) { + tok->type = TK_POSIX_BRACKET_OPEN; + } + else { + PUNFETCH; + goto cc_in_cc; + } + } + else { + cc_in_cc: + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { + tok->type = TK_CC_CC_OPEN; + } + else { + CC_ESC_WARN(env, (UChar* )"["); + } + } + } + else if (c == '&') { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && + !PEND && (PPEEK_IS('&'))) { + PINC; + tok->type = TK_CC_AND; + } + } + + end: + *src = p; + return tok->type; +} + +static int +fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int r, num; + OnigCodePoint c; + OnigEncoding enc = env->enc; + const OnigSyntaxType* syn = env->syntax; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + start: + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + tok->type = TK_STRING; + tok->base = 0; + tok->backp = p; + + PFETCH(c); + if (IS_MC_ESC_CODE(c, syn)) { + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + tok->backp = p; + PFETCH(c); + + tok->u.c = c; + tok->escaped = 1; + switch (c) { + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + greedy_check: + if (!PEND && PPEEK_IS('?') && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + PFETCH(c); + tok->u.repeat.greedy = 0; + tok->u.repeat.possessive = 0; + } + else { + possessive_check: + if (!PEND && PPEEK_IS('+') && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && + tok->type != TK_INTERVAL) || + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && + tok->type == TK_INTERVAL))) { + PFETCH(c); + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 1; + } + else { + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 0; + } + } + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case 'w': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 0; + break; + + case 'W': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_W; + tok->u.prop.not = 1; + break; + + case 'b': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BOUND; + break; + + case 'B': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + break; + +#ifdef USE_WORD_BEGIN_END + case '<': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BEGIN; + break; + + case '>': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_END; + break; +#endif + + case 's': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 0; + break; + + case 'S': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_S; + tok->u.prop.not = 1; + break; + + case 'd': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 0; + break; + + case 'D': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_D; + tok->u.prop.not = 1; + break; + + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; + break; + + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; + break; + + case 'A': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + begin_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_BUF; + break; + + case 'Z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_SEMI_END_BUF; + break; + + case 'z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + end_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_END_BUF; + break; + + case 'G': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_POSITION; + break; + + case '`': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto begin_buf; + break; + + case '\'': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto end_buf; + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + PUNFETCH; + prev = p; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { + goto skip_backref; + } + + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + + tok->type = TK_BACKREF; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_WITH_LEVEL + tok->u.backref.exist_level = 0; +#endif + break; + } + + skip_backref: + if (c == '8' || c == '9') { + /* normal char */ + p = prev; PINC; + break; + } + + p = prev; + /* fall through */ + case '0': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + prev = p; + num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + else if (c != '0') { + PINC; + } + break; + +#ifdef USE_NAMED_GROUP + case 'k': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { + PFETCH(c); + if (c == '<' || c == '\'') { + UChar* name_end; + int* backs; + int back_num; + + prev = p; + +#ifdef USE_BACKREF_WITH_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, + env, &back_num, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else + r = fetch_name(&p, end, &name_end, env, &back_num, 1); +#endif + if (r < 0) return r; + + if (back_num != 0) { + if (back_num < 0) { + back_num = BACKREF_REL_TO_ABS(back_num, env); + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; + } + + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; + } + tok->type = TK_BACKREF; + tok->u.backref.by_name = 0; + tok->u.backref.num = 1; + tok->u.backref.ref1 = back_num; + } + else { + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } + } + } + else + PUNFETCH; + } + break; +#endif + +#ifdef USE_SUBEXP_CALL + case 'g': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { + PFETCH(c); + if (c == '<' || c == '\'') { + int gnum; + UChar* name_end; + + prev = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = prev; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + } + else + PUNFETCH; + } + break; +#endif + + case 'Q': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { + tok->type = TK_QUOTE_OPEN; + } + break; + + case 'p': + case 'P': + if (PPEEK_IS('{') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c); + if (c == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + /* set_raw: */ + if (tok->u.c != num) { + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { /* string */ + p = tok->backp + enclen(enc, tok->backp, end); + } + break; + } + } + else { + tok->u.c = c; + tok->escaped = 0; + +#ifdef USE_VARIABLE_META_CHARS + if ((c != ONIG_INEFFECTIVE_META_CHAR) && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + if (c == MC_ANYCHAR(syn)) + goto any_char; + else if (c == MC_ANYTIME(syn)) + goto anytime; + else if (c == MC_ZERO_OR_ONE_TIME(syn)) + goto zero_or_one_time; + else if (c == MC_ONE_OR_MORE_TIME(syn)) + goto one_or_more_time; + else if (c == MC_ANYCHAR_ANYTIME(syn)) { + tok->type = TK_ANYCHAR_ANYTIME; + goto out; + } + } +#endif + + switch (c) { + case '.': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS + any_char: +#endif + tok->type = TK_ANYCHAR; + break; + + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + anytime: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + one_or_more_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS + zero_or_one_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + goto greedy_check; + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PPEEK_IS('#')) { + PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(syn)) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } + } + goto start; + } + PUNFETCH; + } + + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case '^': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + break; + + case '$': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + break; + + case '[': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; + tok->type = TK_CC_OPEN; + break; + + case ']': + if (*src > env->pattern) /* /].../ is allowed. */ + CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); + break; + + case '#': + if (IS_EXTEND(env->option)) { + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_NEWLINE(enc, c)) + break; + } + goto start; + break; + } + break; + + case ' ': case '\t': case '\n': case '\r': case '\f': + if (IS_EXTEND(env->option)) + goto start; + break; + + default: + /* string */ + break; + } + } + +#ifdef USE_VARIABLE_META_CHARS + out: +#endif + *src = p; + return tok->type; +} + +static int +add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, + OnigEncoding enc ARG_UNUSED, + OnigCodePoint sb_out, const OnigCodePoint mbr[]) +{ + int i, r; + OnigCodePoint j; + + int n = ONIGENC_CODE_RANGE_NUM(mbr); + + if (not == 0) { + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); + j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { + if (j >= sb_out) { + if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), j, + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + i++; + } + + goto sb_end; + } + BITSET_SET_BIT(cc->bs, j); + } + } + + sb_end: + for ( ; i < n; i++) { + r = add_code_range_to_buf(&(cc->mbuf), + ONIGENC_CODE_RANGE_FROM(mbr, i), + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + } + } + else { + OnigCodePoint prev = 0; + + for (i = 0; i < n; i++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + if (j >= sb_out) { + goto sb_end2; + } + BITSET_SET_BIT(cc->bs, j); + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + for (j = prev; j < sb_out; j++) { + BITSET_SET_BIT(cc->bs, j); + } + + sb_end2: + prev = sb_out; + + for (i = 0; i < n; i++) { + if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), prev, + ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); + if (r != 0) return r; + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + if (prev < 0x7fffffff) { + r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + if (r != 0) return r; + } + } + + return 0; +} + +static int +add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) +{ + int c, r; + const OnigCodePoint *ranges; + OnigCodePoint sb_out; + OnigEncoding enc = env->enc; + + switch (ctype) { + case ONIGENC_CTYPE_D: + case ONIGENC_CTYPE_S: + case ONIGENC_CTYPE_W: + ctype ^= ONIGENC_CTYPE_SPECIAL_MASK; + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + return 0; + break; + } + + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); + if (r == 0) { + return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); + } + else if (r != ONIG_NO_SUPPORT_CONFIG) { + return r; + } + + r = 0; + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + case ONIGENC_CTYPE_BLANK: + case ONIGENC_CTYPE_CNTRL: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_LOWER: + case ONIGENC_CTYPE_PUNCT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_UPPER: + case ONIGENC_CTYPE_XDIGIT: + case ONIGENC_CTYPE_ASCII: + case ONIGENC_CTYPE_ALNUM: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + case ONIGENC_CTYPE_GRAPH: + case ONIGENC_CTYPE_PRINT: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + break; + + case ONIGENC_CTYPE_WORD: + if (not == 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ + && ! ONIGENC_IS_CODE_WORD(enc, c)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + return r; +} + +static int +parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +{ +#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 +#define POSIX_BRACKET_NAME_MIN_LEN 4 + + static const PosixBracketEntryType PBS[] = { + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } + }; + + const PosixBracketEntryType *pb; + int not, i, r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + if (PPEEK_IS('^')) { + PINC; + not = 1; + } + else + not = 0; + + if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) + goto not_posix_bracket; + + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { + p = (UChar* )onigenc_step(enc, p, end, pb->len); + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + + r = add_ctype_to_cc(cc, pb->ctype, not, env); + if (r != 0) return r; + + PINC; PINC; + *src = p; + return 0; + } + } + + not_posix_bracket: + c = 0; + i = 0; + while (!PEND && ((c = PPEEK) != ':') && c != ']') { + PINC; + if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; + } + if (c == ':' && ! PEND) { + PINC; + if (! PEND) { + PFETCH(c); + if (c == ']') + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + } + } + + return 1; /* 1: is not POSIX bracket, but no error. */ +} + +static int +fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *prev, *start, *p = *src; + PFETCH_READY; + + r = 0; + start = prev = p; + + while (!PEND) { + prev = p; + PFETCH(c); + if (c == '}') { + r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); + if (r < 0) break; + + *src = p; + return r; + } + else if (c == '(' || c == ')' || c == '{' || c == '|') { + r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; + break; + } + } + + onig_scan_env_set_error_string(env, r, *src, prev); + return r; +} + +static int +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, ctype; + CClassNode* cc; + + ctype = fetch_char_property_to_ctype(src, end, env); + if (ctype < 0) return ctype; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + r = add_ctype_to_cc(cc, ctype, 0, env); + if (r != 0) return r; + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); + + return 0; +} + + +enum CCSTATE { + CCS_VALUE, + CCS_RANGE, + CCS_COMPLETE, + CCS_START +}; + +enum CCVALTYPE { + CCV_SB, + CCV_CODE_POINT, + CCV_CLASS +}; + +static int +next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + if (*state == CCS_RANGE) + return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; + + if (*state == CCS_VALUE && *type != CCV_CLASS) { + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + } + + *state = CCS_VALUE; + *type = CCV_CLASS; + return 0; +} + +static int +next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, + int* vs_israw, int v_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + switch (*state) { + case CCS_VALUE: + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + break; + + case CCS_RANGE: + if (intype == *type) { + if (intype == CCV_SB) { + if (*vs > 0xff || v > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )v); + } + else { + r = add_code_range(&(cc->mbuf), env, *vs, v); + if (r < 0) return r; + } + } + else { +#if 0 + if (intype == CCV_CODE_POINT && *type == CCV_SB) { +#endif + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); + if (r < 0) return r; +#if 0 + } + else + return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; +#endif + } + ccs_range_end: + *state = CCS_COMPLETE; + break; + + case CCS_COMPLETE: + case CCS_START: + *state = CCS_VALUE; + break; + + default: + break; + } + + *vs_israw = v_israw; + *vs = v; + *type = intype; + return 0; +} + +static int +code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, + ScanEnv* env) +{ + int in_esc; + OnigCodePoint code; + OnigEncoding enc = env->enc; + UChar* p = from; + PFETCH_READY; + + in_esc = 0; + while (! PEND) { + if (ignore_escaped && in_esc) { + in_esc = 0; + } + else { + PFETCH(code); + if (code == c) return 1; + if (code == MC_ESC(env->syntax)) in_esc = 1; + } + } + return 0; +} + +static int +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, neg, len, fetched, and_start; + OnigCodePoint v, vs; + UChar *p; + Node* node; + CClassNode *cc, *prev_cc; + CClassNode work_cc; + + enum CCSTATE state; + enum CCVALTYPE val_type, in_type; + int val_israw, in_israw; + + prev_cc = (CClassNode* )NULL; + *np = NULL_NODE; + r = fetch_token_in_cc(tok, src, end, env); + if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { + neg = 1; + r = fetch_token_in_cc(tok, src, end, env); + } + else { + neg = 0; + } + + if (r < 0) return r; + if (r == TK_CC_CLOSE) { + if (! code_exist_check((OnigCodePoint )']', + *src, env->pattern_end, 1, env)) + return ONIGERR_EMPTY_CHAR_CLASS; + + CC_ESC_WARN(env, (UChar* )"]"); + r = tok->type = TK_CHAR; /* allow []...] */ + } + + *np = node = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(node); + cc = NCCLASS(node); + + and_start = 0; + state = CCS_START; + p = *src; + while (r != TK_CC_CLOSE) { + fetched = 0; + switch (r) { + case TK_CHAR: + if ((tok->u.code >= SINGLE_BYTE_SIZE) || + (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) { + in_type = CCV_CODE_POINT; + } + else if (len < 0) { + r = len; + goto err; + } + else { + sb_char: + in_type = CCV_SB; + } + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry2; + break; + + case TK_RAW_BYTE: + /* tok->base != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; + UChar* psave = p; + int i, base = tok->base; + + buf[0] = tok->u.c; + for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + if (r != TK_RAW_BYTE || tok->base != base) { + fetched = 1; + break; + } + buf[i] = tok->u.c; + } + + if (i < ONIGENC_MBC_MINLEN(env->enc)) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + + len = enclen(env->enc, buf, buf+i); + if (i < len) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + else if (i > len) { /* fetch back */ + p = psave; + for (i = 1; i < len; i++) { + r = fetch_token_in_cc(tok, &p, end, env); + } + fetched = 0; + } + + if (i == 1) { + v = (OnigCodePoint )buf[0]; + goto raw_single; + } + else { + v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); + in_type = CCV_CODE_POINT; + } + } + else { + v = (OnigCodePoint )tok->u.c; + raw_single: + in_type = CCV_SB; + } + in_israw = 1; + goto val_entry2; + break; + + case TK_CODE_POINT: + v = tok->u.code; + in_israw = 1; + val_entry: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); + if (len < 0) { + r = len; + goto err; + } + in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); + val_entry2: + r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, + &state, env); + if (r != 0) goto err; + break; + + case TK_POSIX_BRACKET_OPEN: + r = parse_posix_bracket(cc, &p, end, env); + if (r < 0) goto err; + if (r == 1) { /* is not POSIX bracket */ + CC_ESC_WARN(env, (UChar* )"["); + p = tok->backp; + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry; + } + goto next_class; + break; + + case TK_CHAR_TYPE: + r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); + if (r != 0) return r; + + next_class: + r = next_state_class(cc, &vs, &val_type, &state, env); + if (r != 0) goto err; + break; + + case TK_CHAR_PROPERTY: + { + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); + if (r != 0) return r; + goto next_class; + } + break; + + case TK_CC_RANGE: + if (state == CCS_VALUE) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) { /* allow [x-] */ + range_end_val: + v = (OnigCodePoint )'-'; + in_israw = 0; + goto val_entry; + } + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + state = CCS_RANGE; + } + else if (state == CCS_START) { + /* [-xa] is allowed */ + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + /* [--x] or [a&&-x] is warned. */ + if (r == TK_CC_RANGE || and_start != 0) + CC_ESC_WARN(env, (UChar* )"-"); + + goto val_entry; + } + else if (state == CCS_RANGE) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [!--x] is allowed */ + } + else { /* CCS_COMPLETE */ + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ + } + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + break; + + case TK_CC_CC_OPEN: /* [ */ + { + Node *anode; + CClassNode* acc; + + r = parse_char_class(&anode, tok, &p, end, env); + if (r == 0) { + acc = NCCLASS(anode); + r = or_cclass(cc, acc, env->enc); + } + onig_node_free(anode); + if (r != 0) goto err; + } + break; + + case TK_CC_AND: /* && */ + { + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + /* initialize local variables */ + and_start = 1; + state = CCS_START; + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + } + else { + prev_cc = cc; + cc = &work_cc; + } + initialize_cclass(cc); + } + break; + + case TK_EOT: + r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; + goto err; + break; + default: + r = ONIGERR_PARSER_BUG; + goto err; + break; + } + + if (fetched) + r = tok->type; + else { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + } + } + + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + cc = prev_cc; + } + + if (neg != 0) + NCCLASS_SET_NOT(cc); + else + NCCLASS_CLEAR_NOT(cc); + if (IS_NCCLASS_NOT(cc) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { + int is_empty; + + is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + if (is_empty != 0) + BITSET_IS_EMPTY(cc->bs, is_empty); + + if (is_empty == 0) { +#define NEWLINE_CODE 0x0a + + if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { + if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) + BITSET_SET_BIT(cc->bs, NEWLINE_CODE); + else + add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); + } + } + } + *src = p; + return 0; + + err: + if (cc != NCCLASS(*np)) + bbuf_free(cc->mbuf); + return r; +} + +static int parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env); + +static int +parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, num; + Node *target; + OnigOptionType option; + OnigCodePoint c; + OnigEncoding enc = env->enc; + +#ifdef USE_NAMED_GROUP + int list_capture; +#endif + + UChar* p = *src; + PFETCH_READY; + + *np = NULL; + if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + + option = env->option; + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH(c); + switch (c) { + case ':': /* (?:...) grouping only */ + group: + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(np, tok, term, &p, end, env); + if (r < 0) return r; + *src = p; + return 1; /* group */ + break; + + case '=': + *np = onig_node_new_anchor(ANCHOR_PREC_READ); + break; + case '!': /* preceding read */ + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); + break; + case '>': /* (?>...) stop backtrack */ + *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + break; + +#ifdef USE_NAMED_GROUP + case '\'': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + goto named_group1; + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#endif + + case '<': /* look behind (?<=...), (?syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; + + PUNFETCH; + c = '<'; + + named_group1: + list_capture = 0; + + named_group2: + name = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + if (r < 0) return r; + + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_enclose_memory(env->option, 1); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } +#else + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } +#endif + break; + + case '@': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { +#ifdef USE_NAMED_GROUP + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<' || c == '\'') { + list_capture = 1; + goto named_group2; /* (?@...) */ + } + PUNFETCH; + } +#endif + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) { + onig_node_free(*np); + return num; + } + else if (num >= (int )BIT_STATUS_BITS_NUM) { + onig_node_free(*np); + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + } + NENCLOSE(*np)->regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + +#ifdef USE_POSIXLINE_OPTION + case 'p': +#endif + case '-': case 'i': case 'm': case 's': case 'x': + { + int neg = 0; + + while (1) { + switch (c) { + case ':': + case ')': + break; + + case '-': neg = 1; break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 's': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'm': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#ifdef USE_POSIXLINE_OPTION + case 'p': + ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); + break; +#endif + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + *src = p; + return 2; /* option only */ + } + else if (c == ':') { + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->target = target; + *src = p; + return 0; + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + } + } + break; + + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } + else { + if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) + goto group; + + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + NENCLOSE(*np)->regnum = num; + } + + CHECK_NULL_RETURN_MEMERR(*np); + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + if (r < 0) { + onig_node_free(target); + return r; + } + + if (NTYPE(*np) == NT_ANCHOR) + NANCHOR(*np)->target = target; + else { + NENCLOSE(*np)->target = target; + if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { + /* Don't move this to previous of parse_subexp() */ + r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); + if (r != 0) return r; + } + } + + *src = p; + return 0; +} + +static const char* const PopularQStr[] = { + "?", "*", "+", "??", "*?", "+?" +}; + +static const char* const ReduceQStr[] = { + "", "", "*", "*?", "??", "+ and ??", "+? and ?" +}; + +static int +set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) +{ + QtfrNode* qn; + + qn = NQTFR(qnode); + if (qn->lower == 1 && qn->upper == 1) { + return 1; + } + + switch (NTYPE(target)) { + case NT_STR: + if (! group) { + StrNode* sn = NSTR(target); + if (str_node_can_be_split(sn, env->enc)) { + Node* n = str_node_split_last_char(sn, env->enc); + if (IS_NOT_NULL(n)) { + qn->target = n; + return 2; + } + } + } + break; + + case NT_QTFR: + { /* check redundant double repeat. */ + /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ + QtfrNode* qnt = NQTFR(target); + int nestq_num = popular_quantifier_num(qn); + int targetq_num = popular_quantifier_num(qnt); + +#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR + if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { + UChar buf[WARN_BUFSIZE]; + + switch(ReduceTypeTable[targetq_num][nestq_num]) { + case RQ_ASIS: + break; + + case RQ_DEL: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + + default: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", + PopularQStr[targetq_num], PopularQStr[nestq_num], + ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + } + } + + warn_exit: +#endif + if (targetq_num >= 0) { + if (nestq_num >= 0) { + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } + } + } + break; + + default: + break; + } + + qn->target = target; + q_exit: + return 0; +} + + +#ifdef USE_SHARED_CCLASS_TABLE + +#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 + +/* for ctype node hash table */ + +typedef struct { + OnigEncoding enc; + int not; + int type; +} type_cclass_key; + +static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) +{ + if (x->type != y->type) return 1; + if (x->enc != y->enc) return 1; + if (x->not != y->not) return 1; + return 0; +} + +static int type_cclass_hash(type_cclass_key* key) +{ + int i, val; + UChar *p; + + val = 0; + + p = (UChar* )&(key->enc); + for (i = 0; i < (int )sizeof(key->enc); i++) { + val = val * 997 + (int )*p++; + } + + p = (UChar* )(&key->type); + for (i = 0; i < (int )sizeof(key->type); i++) { + val = val * 997 + (int )*p++; + } + + val += key->not; + return val + (val >> 5); +} + +static const struct st_hash_type type_type_cclass_hash = { + type_cclass_cmp, + type_cclass_hash, +}; + +static st_table* OnigTypeCClassTable; + + +static int +i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) +{ + if (IS_NOT_NULL(node)) { + CClassNode* cc = NCCLASS(node); + if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); + xfree(node); + } + + if (IS_NOT_NULL(key)) xfree(key); + return ST_DELETE; +} + +extern int +onig_free_shared_cclass_table(void) +{ + THREAD_ATOMIC_START; + if (IS_NOT_NULL(OnigTypeCClassTable)) { + onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + onig_st_free_table(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; + } + THREAD_ATOMIC_END; + + return 0; +} + +#endif /* USE_SHARED_CCLASS_TABLE */ + + +#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +static int +clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) +{ + BBuf *tbuf; + int r; + + if (IS_NCCLASS_NOT(cc)) { + bitset_invert(cc->bs); + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + r = not_code_range_buf(enc, cc->mbuf, &tbuf); + if (r != 0) return r; + + bbuf_free(cc->mbuf); + cc->mbuf = tbuf; + } + + NCCLASS_CLEAR_NOT(cc); + } + + return 0; +} +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + +typedef struct { + ScanEnv* env; + CClassNode* cc; + Node* alt_root; + Node** ptail; +} IApplyCaseFoldArg; + +static int +i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], + int to_len, void* arg) +{ + IApplyCaseFoldArg* iarg; + ScanEnv* env; + CClassNode* cc; + BitSetRef bs; + + iarg = (IApplyCaseFoldArg* )arg; + env = iarg->env; + cc = iarg->cc; + bs = cc->bs; + + if (to_len == 1) { + int is_in = onig_is_code_in_cc(env->enc, from, cc); +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || + (is_in == 0 && IS_NCCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + BITSET_SET_BIT(bs, *to); + } + } +#else + if (is_in != 0) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + if (IS_NCCLASS_NOT(cc)) { + BITSET_CLEAR_BIT(bs, *to); + } + else + BITSET_SET_BIT(bs, *to); + } + } +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + } + else { + int r, i, len; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + Node *snode = NULL_NODE; + + if (onig_is_code_in_cc(env->enc, from, cc) +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + && !IS_NCCLASS_NOT(cc) +#endif + ) { + for (i = 0; i < to_len; i++) { + len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); + if (i == 0) { + snode = onig_node_new_str(buf, buf + len); + CHECK_NULL_RETURN_MEMERR(snode); + + /* char-class expanded multi-char only + compare with string folded at match time. */ + NSTRING_SET_AMBIG(snode); + } + else { + r = onig_node_str_cat(snode, buf, buf + len); + if (r < 0) { + onig_node_free(snode); + return r; + } + } + } + + *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); + iarg->ptail = &(NCDR((*(iarg->ptail)))); + } + } + + return 0; +} + +static int +parse_exp(Node** np, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r, len, group = 0; + Node* qn; + Node** targetp; + + *np = NULL; + if (tok->type == (enum TokenSyms )term) + goto end_of_token; + + switch (tok->type) { + case TK_ALT: + case TK_EOT: + end_of_token: + *np = node_new_empty(); + return tok->type; + + case TK_SUBEXP_OPEN: + r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); + if (r < 0) return r; + if (r == 1) group = 1; + else if (r == 2) { /* option only */ + Node* target; + OnigOptionType prev = env->option; + + env->option = NENCLOSE(*np)->option; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, src, end, env); + env->option = prev; + if (r < 0) { + onig_node_free(target); + return r; + } + NENCLOSE(*np)->target = target; + return tok->type; + } + break; + + case TK_SUBEXP_CLOSE: + if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) + return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; + + if (tok->escaped) goto tk_raw_byte; + else goto tk_byte; + break; + + case TK_STRING: + tk_byte: + { + *np = node_new_str(tok->backp, *src); + CHECK_NULL_RETURN_MEMERR(*np); + + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_STRING) break; + + r = onig_node_str_cat(*np, tok->backp, *src); + if (r < 0) return r; + } + + string_end: + targetp = np; + goto repeat; + } + break; + + case TK_RAW_BYTE: + tk_raw_byte: + { + *np = node_new_str_raw_char((UChar )tok->u.c); + CHECK_NULL_RETURN_MEMERR(*np); + len = 1; + while (1) { + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { + r = fetch_token(tok, src, end, env); + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } + + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { + /* Don't use this, it is wrong for little endian encodings. */ +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR + int rem; + if (len < ONIGENC_MBC_MINLEN(env->enc)) { + rem = ONIGENC_MBC_MINLEN(env->enc) - len; + (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); + if (len + rem == enclen(env->enc, NSTR(*np)->s)) { + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } +#endif + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + } + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; + + len++; + } + } + break; + + case TK_CODE_POINT: + { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + if (num < 0) return num; +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + *np = node_new_str_raw(buf, buf + num); +#else + *np = node_new_str(buf, buf + num); +#endif + CHECK_NULL_RETURN_MEMERR(*np); + } + break; + + case TK_QUOTE_OPEN: + { + OnigCodePoint end_op[2]; + UChar *qstart, *qend, *nextp; + + end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); + end_op[1] = (OnigCodePoint )'E'; + qstart = *src; + qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); + if (IS_NULL(qend)) { + nextp = qend = end; + } + *np = node_new_str(qstart, qend); + CHECK_NULL_RETURN_MEMERR(*np); + *src = nextp; + } + break; + + case TK_CHAR_TYPE: + { + switch (tok->u.prop.ctype) { + case ONIGENC_CTYPE_D: + case ONIGENC_CTYPE_S: + case ONIGENC_CTYPE_W: + { + CClassNode* cc; + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); + } + break; + + case ONIGENC_CTYPE_WORD: + *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); + CHECK_NULL_RETURN_MEMERR(*np); + break; + + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_XDIGIT: + { + CClassNode* cc; + +#ifdef USE_SHARED_CCLASS_TABLE + const OnigCodePoint *mbr; + OnigCodePoint sb_out; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, + &sb_out, &mbr); + if (r == 0 && + ONIGENC_CODE_RANGE_NUM(mbr) + >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { + type_cclass_key key; + type_cclass_key* new_key; + + key.enc = env->enc; + key.not = tok->u.prop.not; + key.type = tok->u.prop.ctype; + + THREAD_ATOMIC_START; + + if (IS_NULL(OnigTypeCClassTable)) { + OnigTypeCClassTable + = onig_st_init_table_with_size(&type_type_cclass_hash, 10); + if (IS_NULL(OnigTypeCClassTable)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + } + else { + if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, + (st_data_t* )np)) { + THREAD_ATOMIC_END; + break; + } + } + + *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, + sb_out, mbr); + if (IS_NULL(*np)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + + cc = NCCLASS(*np); + NCCLASS_SET_SHARE(cc); + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + xmemcpy(new_key, &key, sizeof(type_cclass_key)); + onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, + (st_data_t )*np); + + THREAD_ATOMIC_END; + } + else { +#endif + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); +#ifdef USE_SHARED_CCLASS_TABLE + } +#endif + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + } + break; + + case TK_CHAR_PROPERTY: + r = parse_char_property(np, tok, src, end, env); + if (r != 0) return r; + break; + + case TK_CC_OPEN: + { + CClassNode* cc; + + r = parse_char_class(np, tok, src, end, env); + if (r != 0) return r; + + cc = NCCLASS(*np); + if (IS_IGNORECASE(env->option)) { + IApplyCaseFoldArg iarg; + + iarg.env = env; + iarg.cc = cc; + iarg.alt_root = NULL_NODE; + iarg.ptail = &(iarg.alt_root); + + r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, + i_apply_case_fold, &iarg); + if (r != 0) { + onig_node_free(iarg.alt_root); + return r; + } + if (IS_NOT_NULL(iarg.alt_root)) { + Node* work = onig_node_new_alt(*np, iarg.alt_root); + if (IS_NULL(work)) { + onig_node_free(iarg.alt_root); + return ONIGERR_MEMORY; + } + *np = work; + } + } + } + break; + + case TK_ANYCHAR: + *np = node_new_anychar(); + CHECK_NULL_RETURN_MEMERR(*np); + break; + + case TK_ANYCHAR_ANYTIME: + *np = node_new_anychar(); + CHECK_NULL_RETURN_MEMERR(*np); + qn = node_new_quantifier(0, REPEAT_INFINITE, 0); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->target = *np; + *np = qn; + break; + + case TK_BACKREF: + len = tok->u.backref.num; + *np = node_new_backref(len, + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_WITH_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); + CHECK_NULL_RETURN_MEMERR(*np); + break; + +#ifdef USE_SUBEXP_CALL + case TK_CALL: + { + int gnum = tok->u.call.gnum; + + if (gnum < 0) { + gnum = BACKREF_REL_TO_ABS(gnum, env); + if (gnum <= 0) + return ONIGERR_INVALID_BACKREF; + } + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + CHECK_NULL_RETURN_MEMERR(*np); + env->num_call++; + } + break; +#endif + + case TK_ANCHOR: + *np = onig_node_new_anchor(tok->u.anchor); + break; + + case TK_OP_REPEAT: + case TK_INTERVAL: + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + else + *np = node_new_empty(); + } + else { + goto tk_byte; + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + { + targetp = np; + + re_entry: + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + + repeat: + if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + if (is_invalid_quantifier_target(*targetp)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->greedy = tok->u.repeat.greedy; + r = set_quantifier(qn, *targetp, group, env); + if (r < 0) return r; + + if (tok->u.repeat.possessive != 0) { + Node* en; + en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NULL(en)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + NENCLOSE(en)->target = qn; + qn = en; + } + + if (r == 0) { + *targetp = qn; + } + else if (r == 1) { + onig_node_free(qn); + } + else if (r == 2) { /* split case: /abc+/ */ + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + CHECK_NULL_RETURN_MEMERR(*targetp); + tmp = NCDR(*targetp) = node_new_list(qn, NULL); + CHECK_NULL_RETURN_MEMERR(tmp); + targetp = &(NCAR(tmp)); + } + goto re_entry; + } + } + + return r; +} + +static int +parse_branch(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == TK_EOT || r == term || r == TK_ALT) { + *top = node; + } + else { + *top = node_new_list(node, NULL); + headp = &(NCDR(*top)); + while (r != TK_EOT && r != term && r != TK_ALT) { + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (NTYPE(node) == NT_LIST) { + *headp = node; + while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); + headp = &(NCDR(node)); + } + else { + *headp = node_new_list(node, NULL); + headp = &(NCDR(*headp)); + } + } + } + + return r; +} + +/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ +static int +parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == term) { + *top = node; + } + else if (r == TK_ALT) { + *top = onig_node_new_alt(node, NULL); + headp = &(NCDR(*top)); + while (r == TK_ALT) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + *headp = onig_node_new_alt(node, NULL); + headp = &(NCDR(*headp)); + } + + if (tok->type != (enum TokenSyms )term) + goto err; + } + else { + onig_node_free(node); + err: + if (term == TK_SUBEXP_CLOSE) + return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + else + return ONIGERR_PARSER_BUG; + } + + return r; +} + +static int +parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigToken tok; + + r = fetch_token(&tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(top, &tok, TK_EOT, src, end, env); + if (r < 0) return r; + return 0; +} + +extern int +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) +{ + int r; + UChar* p; + +#ifdef USE_NAMED_GROUP + names_clear(reg); +#endif + + scan_env_clear(env); + env->option = reg->options; + env->case_fold_flag = reg->case_fold_flag; + env->enc = reg->enc; + env->syntax = reg->syntax; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; + env->reg = reg; + + *root = NULL; + p = (UChar* )pattern; + r = parse_regexp(root, &p, (UChar* )end, env); + reg->num_mem = env->num_mem; + return r; +} + +extern void +onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, + UChar* arg, UChar* arg_end) +{ + env->error = arg; + env->error_end = arg_end; +} diff --git a/regparse.h b/regparse.h new file mode 100644 index 0000000..cd653f5 --- /dev/null +++ b/regparse.h @@ -0,0 +1,351 @@ +#ifndef ONIGURUMA_REGPARSE_H +#define ONIGURUMA_REGPARSE_H +/********************************************************************** + regparse.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +/* node type */ +#define NT_STR 0 +#define NT_CCLASS 1 +#define NT_CTYPE 2 +#define NT_CANY 3 +#define NT_BREF 4 +#define NT_QTFR 5 +#define NT_ENCLOSE 6 +#define NT_ANCHOR 7 +#define NT_LIST 8 +#define NT_ALT 9 +#define NT_CALL 10 + +/* node type bit */ +#define NTYPE2BIT(type) (1<<(type)) + +#define BIT_NT_STR NTYPE2BIT(NT_STR) +#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) +#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) +#define BIT_NT_CANY NTYPE2BIT(NT_CANY) +#define BIT_NT_BREF NTYPE2BIT(NT_BREF) +#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) +#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) +#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) +#define BIT_NT_LIST NTYPE2BIT(NT_LIST) +#define BIT_NT_ALT NTYPE2BIT(NT_ALT) +#define BIT_NT_CALL NTYPE2BIT(NT_CALL) + +#define IS_NODE_TYPE_SIMPLE(type) \ + ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ + BIT_NT_CANY | BIT_NT_BREF)) != 0) + +#define NTYPE(node) ((node)->u.base.type) +#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) + +#define NSTR(node) (&((node)->u.str)) +#define NCCLASS(node) (&((node)->u.cclass)) +#define NCTYPE(node) (&((node)->u.ctype)) +#define NBREF(node) (&((node)->u.bref)) +#define NQTFR(node) (&((node)->u.qtfr)) +#define NENCLOSE(node) (&((node)->u.enclose)) +#define NANCHOR(node) (&((node)->u.anchor)) +#define NCONS(node) (&((node)->u.cons)) +#define NCALL(node) (&((node)->u.call)) + +#define NCAR(node) (NCONS(node)->car) +#define NCDR(node) (NCONS(node)->cdr) + + + +#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) + +#define ENCLOSE_MEMORY (1<<0) +#define ENCLOSE_OPTION (1<<1) +#define ENCLOSE_STOP_BACKTRACK (1<<2) + +#define NODE_STR_MARGIN 16 +#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE 6 + +#define NSTR_RAW (1<<0) /* by backslashed number */ +#define NSTR_AMBIG (1<<1) +#define NSTR_DONT_GET_OPT_INFO (1<<2) + +#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW +#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW +#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG +#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ + (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO +#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) +#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) +#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ + (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) + +#define BACKREFS_P(br) \ + (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); + +#define NQ_TARGET_ISNOT_EMPTY 0 +#define NQ_TARGET_IS_EMPTY 1 +#define NQ_TARGET_IS_EMPTY_MEM 2 +#define NQ_TARGET_IS_EMPTY_REC 3 + +/* status bits */ +#define NST_MIN_FIXED (1<<0) +#define NST_MAX_FIXED (1<<1) +#define NST_CLEN_FIXED (1<<2) +#define NST_MARK1 (1<<3) +#define NST_MARK2 (1<<4) +#define NST_MEM_BACKREFED (1<<5) +#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) +#define NST_RECURSION (1<<7) +#define NST_CALLED (1<<8) +#define NST_ADDR_FIXED (1<<9) +#define NST_NAMED_GROUP (1<<10) +#define NST_NAME_REF (1<<11) +#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) +#define NST_BY_NUMBER (1<<14) /* {n,m} */ + +#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) +#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) + +#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) +#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) +#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) +#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) +#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) +#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) +#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) +#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) +#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ + (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) +#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) + +#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION +#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) +#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) + +#define CALLNODE_REFNUM_UNDEF -1 + +typedef struct { + NodeBase base; + UChar* s; + UChar* end; + unsigned int flag; + int capa; /* (allocated size - 1) or 0: use buf[] */ + UChar buf[NODE_STR_BUF_SIZE]; +} StrNode; + +typedef struct { + NodeBase base; + int state; + struct _Node* target; + int lower; + int upper; + int greedy; + int target_empty_info; + struct _Node* head_exact; + struct _Node* next_head_exact; + int is_refered; /* include called node. don't eliminate even if {0} */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QtfrNode; + +typedef struct { + NodeBase base; + int state; + int type; + int regnum; + OnigOptionType option; + struct _Node* target; + AbsAddrType call_addr; + /* for multiple call reference */ + OnigDistance min_len; /* min length (byte) */ + OnigDistance max_len; /* max length (byte) */ + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ +} EncloseNode; + +#ifdef USE_SUBEXP_CALL + +typedef struct { + int offset; + struct _Node* target; +} UnsetAddr; + +typedef struct { + int num; + int alloc; + UnsetAddr* us; +} UnsetAddrList; + +typedef struct { + NodeBase base; + int state; + int group_num; + UChar* name; + UChar* name_end; + struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ + UnsetAddrList* unset_addr_list; +} CallNode; + +#endif + +typedef struct { + NodeBase base; + int state; + int back_num; + int back_static[NODE_BACKREFS_SIZE]; + int* back_dynamic; + int nest_level; +} BRefNode; + +typedef struct { + NodeBase base; + int type; + struct _Node* target; + int char_len; +} AnchorNode; + +typedef struct { + NodeBase base; + struct _Node* car; + struct _Node* cdr; +} ConsAltNode; + +typedef struct { + NodeBase base; + int ctype; + int not; +} CtypeNode; + +typedef struct _Node { + union { + NodeBase base; + StrNode str; + CClassNode cclass; + QtfrNode qtfr; + EncloseNode enclose; + BRefNode bref; + AnchorNode anchor; + ConsAltNode cons; + CtypeNode ctype; +#ifdef USE_SUBEXP_CALL + CallNode call; +#endif + } u; +} Node; + + +#define NULL_NODE ((Node* )0) + +#define SCANENV_MEMNODES_SIZE 8 +#define SCANENV_MEM_NODES(senv) \ + (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ + (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) + +typedef struct { + OnigOptionType option; + OnigCaseFoldType case_fold_flag; + OnigEncoding enc; + const OnigSyntaxType* syntax; + BitStatusType capture_history; + BitStatusType bt_mem_start; + BitStatusType bt_mem_end; + BitStatusType backrefed_mem; + UChar* pattern; + UChar* pattern_end; + UChar* error; + UChar* error_end; + regex_t* reg; /* for reg->names only */ + int num_call; +#ifdef USE_SUBEXP_CALL + UnsetAddrList* unset_addr_list; +#endif + int num_mem; +#ifdef USE_NAMED_GROUP + int num_named; +#endif + int mem_alloc; + Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; + Node** mem_nodes_dynamic; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int num_comb_exp_check; + int comb_exp_max_regnum; + int curr_max_regnum; + int has_recursion; +#endif +} ScanEnv; + + +#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) +#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) +#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) + +#ifdef USE_NAMED_GROUP +typedef struct { + int new_val; +} GroupNumRemap; + +extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); +#endif + +extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); +extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); +extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); +extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); +extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); +extern void onig_node_conv_to_str_node P_((Node* node, int raw)); +extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); +extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); +extern void onig_node_free P_((Node* node)); +extern Node* onig_node_new_enclose P_((int type)); +extern Node* onig_node_new_anchor P_((int type)); +extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); +extern Node* onig_node_new_list P_((Node* left, Node* right)); +extern Node* onig_node_list_add P_((Node* list, Node* x)); +extern Node* onig_node_new_alt P_((Node* left, Node* right)); +extern void onig_node_str_clear P_((Node* node)); +extern int onig_free_node_list P_((void)); +extern int onig_names_free P_((regex_t* reg)); +extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); +extern int onig_free_shared_cclass_table P_((void)); + +#ifdef ONIG_DEBUG +#ifdef USE_NAMED_GROUP +extern int onig_print_names(FILE*, regex_t*); +#endif +#endif + +#endif /* ONIGURUMA_REGPARSE_H */ diff --git a/regsyntax.c b/regsyntax.c new file mode 100644 index 0000000..96348b0 --- /dev/null +++ b/regsyntax.c @@ -0,0 +1,315 @@ +/********************************************************************** + regsyntax.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +const OnigSyntaxType OnigSyntaxASIS = { + 0 + , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE + , 0 + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxPosixBasic = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_BRACE_INTERVAL ) + , 0 + , 0 + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxPosixExtended = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | + ONIG_SYN_OP_BRACE_INTERVAL | + ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) + , 0 + , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | + ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | + ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxEmacs = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | + ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | + ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | + ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) + , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR + , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxGrep = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | + ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | + ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | + ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) + , 0 + , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxGnuRegex = { + SYN_GNU_REGEX_OP + , 0 + , SYN_GNU_REGEX_BV + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxJava = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | + ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) + , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) + , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +const OnigSyntaxType OnigSyntaxPerl = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) + , SYN_GNU_REGEX_BV + , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +/* Perl + named group */ +const OnigSyntaxType OnigSyntaxPerl_NG = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) + , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + + + +extern int +onig_set_default_syntax(const OnigSyntaxType* syntax) +{ + if (IS_NULL(syntax)) + syntax = ONIG_SYNTAX_RUBY; + + OnigDefaultSyntax = syntax; + return 0; +} + +extern void +onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from) +{ + *to = *from; +} + +extern void +onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +{ + syntax->op = op; +} + +extern void +onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +{ + syntax->op2 = op2; +} + +extern void +onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +{ + syntax->behavior = behavior; +} + +extern void +onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) +{ + syntax->options = options; +} + +extern unsigned int +onig_get_syntax_op(OnigSyntaxType* syntax) +{ + return syntax->op; +} + +extern unsigned int +onig_get_syntax_op2(OnigSyntaxType* syntax) +{ + return syntax->op2; +} + +extern unsigned int +onig_get_syntax_behavior(OnigSyntaxType* syntax) +{ + return syntax->behavior; +} + +extern OnigOptionType +onig_get_syntax_options(OnigSyntaxType* syntax) +{ + return syntax->options; +} + +#ifdef USE_VARIABLE_META_CHARS +extern int onig_set_meta_char(OnigSyntaxType* enc, + unsigned int what, OnigCodePoint code) +{ + switch (what) { + case ONIG_META_CHAR_ESCAPE: + enc->meta_char_table.esc = code; + break; + case ONIG_META_CHAR_ANYCHAR: + enc->meta_char_table.anychar = code; + break; + case ONIG_META_CHAR_ANYTIME: + enc->meta_char_table.anytime = code; + break; + case ONIG_META_CHAR_ZERO_OR_ONE_TIME: + enc->meta_char_table.zero_or_one_time = code; + break; + case ONIG_META_CHAR_ONE_OR_MORE_TIME: + enc->meta_char_table.one_or_more_time = code; + break; + case ONIG_META_CHAR_ANYCHAR_ANYTIME: + enc->meta_char_table.anychar_anytime = code; + break; + default: + return ONIGERR_INVALID_ARGUMENT; + break; + } + return 0; +} +#endif /* USE_VARIABLE_META_CHARS */ diff --git a/ruby.c b/ruby.c new file mode 100644 index 0000000..2794cac --- /dev/null +++ b/ruby.c @@ -0,0 +1,1858 @@ +/********************************************************************** + + ruby.c - + + $Author: yugui $ + created at: Tue Aug 10 12:47:31 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#ifdef __CYGWIN__ +#include +#include +#endif +#include "ruby/ruby.h" +#include "ruby/encoding.h" +#include "eval_intern.h" +#include "dln.h" +#include +#include +#include + +#ifdef __hpux +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif +#if defined(HAVE_FCNTL_H) +#include +#elif defined(HAVE_SYS_FCNTL_H) +#include +#endif +#ifdef HAVE_SYS_PARAM_H +# include +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif + +#include "ruby/util.h" + +#ifndef HAVE_STDLIB_H +char *getenv(); +#endif + +VALUE rb_parser_get_yydebug(VALUE); +VALUE rb_parser_set_yydebug(VALUE, VALUE); + +const char *ruby_get_inplace_mode(void); +void ruby_set_inplace_mode(const char *); + +#define DISABLE_BIT(bit) (1U << disable_##bit) +enum disable_flag_bits { + disable_gems, + disable_rubyopt +}; + +#define DUMP_BIT(bit) (1U << dump_##bit) +enum dump_flag_bits { + dump_insns +}; + +struct cmdline_options { + int sflag, xflag; + int do_loop, do_print; + int do_check, do_line; + int do_split, do_search; + int usage; + int version; + int copyright; + unsigned int disable; + int verbose; + int yydebug; + int safe_level; + unsigned int setids; + unsigned int dump; + const char *script; + VALUE script_name; + VALUE e_script; + struct { + struct { + VALUE name; + int index; + } enc; + } src, ext, intern; + VALUE req_list; +}; + +static void init_ids(struct cmdline_options *); + +#define src_encoding_index GET_VM()->src_encoding_index + +static struct cmdline_options * +cmdline_options_init(struct cmdline_options *opt) +{ + MEMZERO(opt, *opt, 1); + init_ids(opt); + opt->src.enc.index = src_encoding_index; + return opt; +} + +struct cmdline_arguments { + int argc; + char **argv; + struct cmdline_options *opt; +}; + +static NODE *load_file(VALUE, const char *, int, struct cmdline_options *); +static void forbid_setid(const char *, struct cmdline_options *); +#define forbid_setid(s) forbid_setid(s, opt) + +static struct { + int argc; + char **argv; +#if !defined(PSTAT_SETCMD) && !defined(HAVE_SETPROCTITLE) + int len; +#endif +} origarg; + +static void +usage(const char *name) +{ + /* This message really ought to be max 23 lines. + * Removed -h because the user already knows that option. Others? */ + + static const char *const usage_msg[] = { + "-0[octal] specify record separator (\\0, if no argument)", + "-a autosplit mode with -n or -p (splits $_ into $F)", + "-c check syntax only", + "-Cdirectory cd to directory, before executing your script", + "-d set debugging flags (set $DEBUG to true)", + "-e 'command' one line of script. Several -e's allowed. Omit [programfile]", + "-Eex[:in] specify the default external and internal character encodings", + "-Fpattern split() pattern for autosplit (-a)", + "-i[extension] edit ARGV files in place (make backup if extension supplied)", + "-Idirectory specify $LOAD_PATH directory (may be used more than once)", + "-l enable line ending processing", + "-n assume 'while gets(); ... end' loop around your script", + "-p assume loop like -n but print line also like sed", + "-rlibrary require the library, before executing your script", + "-s enable some switch parsing for switches after script name", + "-S look for the script using PATH environment variable", + "-T[level] turn on tainting checks", + "-v print version number, then turn on verbose mode", + "-w turn warnings on for your script", + "-W[level] set warning level; 0=silence, 1=medium, 2=verbose (default for level)", + "-x[directory] strip off text before #!ruby line and perhaps cd to directory", + "--copyright print the copyright", + "--version print the version", + NULL + }; + const char *const *p = usage_msg; + + printf("Usage: %s [switches] [--] [programfile] [arguments]\n", name); + while (*p) + printf(" %s\n", *p++); +} + +VALUE rb_get_load_path(void); + +#ifndef CharNext /* defined as CharNext[AW] on Windows. */ +#define CharNext(p) ((p) + mblen(p, RUBY_MBCHAR_MAXSIZE)) +#endif + +#if defined DOSISH || defined __CYGWIN__ +static inline void +translate_char(char *p, int from, int to) +{ + while (*p) { + if ((unsigned char)*p == from) + *p = to; + p = CharNext(p); + } +} +#endif + +#if defined _WIN32 || defined __CYGWIN__ +static VALUE +rubylib_mangled_path(const char *s, unsigned int l) +{ + static char *newp, *oldp; + static int newl, oldl, notfound; + char *ptr; + VALUE ret; + + if (!newp && !notfound) { + newp = getenv("RUBYLIB_PREFIX"); + if (newp) { + oldp = newp = strdup(newp); + while (*newp && !ISSPACE(*newp) && *newp != ';') { + newp = CharNext(newp); /* Skip digits. */ + } + oldl = newp - oldp; + while (*newp && (ISSPACE(*newp) || *newp == ';')) { + newp = CharNext(newp); /* Skip whitespace. */ + } + newl = strlen(newp); + if (newl == 0 || oldl == 0) { + rb_fatal("malformed RUBYLIB_PREFIX"); + } + translate_char(newp, '\\', '/'); + } + else { + notfound = 1; + } + } + if (!newp || l < oldl || STRNCASECMP(oldp, s, oldl) != 0) { + return rb_str_new(s, l); + } + ret = rb_str_new(0, l + newl - oldl); + ptr = RSTRING_PTR(ret); + memcpy(ptr, newp, newl); + memcpy(ptr + newl, s + oldl, l - oldl); + ptr[l + newl - oldl] = 0; + return ret; +} + +static VALUE +rubylib_mangled_path2(const char *s) +{ + return rubylib_mangled_path(s, strlen(s)); +} +#else +#define rubylib_mangled_path rb_str_new +#define rubylib_mangled_path2 rb_str_new_cstr +#endif + +static void +push_include(const char *path, VALUE (*filter)(VALUE)) +{ + const char sep = PATH_SEP_CHAR; + const char *p, *s; + VALUE load_path = GET_VM()->load_path; + + p = path; + while (*p) { + while (*p == sep) + p++; + if (!*p) break; + for (s = p; *s && *s != sep; s = CharNext(s)); + rb_ary_push(load_path, (*filter)(rubylib_mangled_path(p, s - p))); + p = s; + } +} + +#ifdef __CYGWIN__ +static void +push_include_cygwin(const char *path, VALUE (*filter)(VALUE)) +{ + const char *p, *s; + char rubylib[FILENAME_MAX]; + VALUE buf = 0; + + p = path; + while (*p) { + unsigned int len; + while (*p == ';') + p++; + if (!*p) break; + for (s = p; *s && *s != ';'; s = CharNext(s)); + len = s - p; + if (*s) { + if (!buf) { + buf = rb_str_new(p, len); + p = RSTRING_PTR(buf); + } + else { + rb_str_resize(buf, len); + p = strncpy(RSTRING_PTR(buf), p, len); + } + } + if (cygwin_conv_to_posix_path(p, rubylib) == 0) + p = rubylib; + push_include(p, filter); + if (!*s) break; + p = s + 1; + } +} + +#define push_include push_include_cygwin +#endif + +void +ruby_push_include(const char *path, VALUE (*filter)(VALUE)) +{ + if (path == 0) + return; + push_include(path, filter); +} + +static VALUE +identical_path(VALUE path) +{ + return path; +} +static VALUE +locale_path(VALUE path) +{ + rb_enc_associate(path, rb_locale_encoding()); + return path; +} + +void +ruby_incpush(const char *path) +{ + ruby_push_include(path, locale_path); +} + +static VALUE +expand_include_path(VALUE path) +{ + char *p = RSTRING_PTR(path); + if (!p) + return path; + if (*p == '.' && p[1] == '/') + return path; + return rb_file_expand_path(path, Qnil); +} + +void +ruby_incpush_expand(const char *path) +{ + ruby_push_include(path, expand_include_path); +} + +#if defined DOSISH || defined __CYGWIN__ +#define LOAD_RELATIVE 1 +#endif + +#if defined _WIN32 || defined __CYGWIN__ +static HMODULE libruby; + +BOOL WINAPI +DllMain(HINSTANCE dll, DWORD reason, LPVOID reserved) +{ + if (reason == DLL_PROCESS_ATTACH) + libruby = dll; + return TRUE; +} +#endif + +void ruby_init_loadpath_safe(int safe_level); + +void +ruby_init_loadpath(void) +{ + ruby_init_loadpath_safe(0); +} + +void +ruby_init_loadpath_safe(int safe_level) +{ + VALUE load_path; +#if defined LOAD_RELATIVE + char libpath[MAXPATHLEN + 1]; + char *p; + int rest; + +#if defined _WIN32 || defined __CYGWIN__ + GetModuleFileName(libruby, libpath, sizeof libpath); +#elif defined(__EMX__) + _execname(libpath, sizeof(libpath) - 1); +#endif + + libpath[sizeof(libpath) - 1] = '\0'; +#if defined DOSISH + translate_char(libpath, '\\', '/'); +#elif defined __CYGWIN__ + { + char rubylib[FILENAME_MAX]; + cygwin_conv_to_posix_path(libpath, rubylib); + strncpy(libpath, rubylib, sizeof(libpath)); + } +#endif + p = strrchr(libpath, '/'); + if (p) { + *p = 0; + if (p - libpath > 3 && !STRCASECMP(p - 4, "/bin")) { + p -= 4; + *p = 0; + } + } + else { + strcpy(libpath, "."); + p = libpath + 1; + } + + rest = sizeof(libpath) - 1 - (p - libpath); + +#define RUBY_RELATIVE(path) (strncpy(p, (path), rest), libpath) +#else +#define RUBY_RELATIVE(path) (path) +#endif +#define incpush(path) rb_ary_push(load_path, rubylib_mangled_path2(path)) + load_path = GET_VM()->load_path; + + if (safe_level == 0) { + ruby_push_include(getenv("RUBYLIB"), identical_path); + } + +#ifdef RUBY_SEARCH_PATH + incpush(RUBY_RELATIVE(RUBY_SEARCH_PATH)); +#endif + + incpush(RUBY_RELATIVE(RUBY_SITE_LIB2)); +#ifdef RUBY_SITE_THIN_ARCHLIB + incpush(RUBY_RELATIVE(RUBY_SITE_THIN_ARCHLIB)); +#endif + incpush(RUBY_RELATIVE(RUBY_SITE_ARCHLIB)); + incpush(RUBY_RELATIVE(RUBY_SITE_LIB)); + + incpush(RUBY_RELATIVE(RUBY_VENDOR_LIB2)); +#ifdef RUBY_VENDOR_THIN_ARCHLIB + incpush(RUBY_RELATIVE(RUBY_VENDOR_THIN_ARCHLIB)); +#endif + incpush(RUBY_RELATIVE(RUBY_VENDOR_ARCHLIB)); + incpush(RUBY_RELATIVE(RUBY_VENDOR_LIB)); + + incpush(RUBY_RELATIVE(RUBY_LIB)); +#ifdef RUBY_THIN_ARCHLIB + incpush(RUBY_RELATIVE(RUBY_THIN_ARCHLIB)); +#endif + incpush(RUBY_RELATIVE(RUBY_ARCHLIB)); + + if (safe_level == 0) { + incpush("."); + } +} + + +static void +add_modules(struct cmdline_options *opt, const char *mod) +{ + VALUE list = opt->req_list; + + if (!list) { + opt->req_list = list = rb_ary_new(); + RBASIC(list)->klass = 0; + } + rb_ary_push(list, rb_obj_freeze(rb_str_new2(mod))); +} + +extern void Init_ext(void); +extern VALUE rb_vm_top_self(void); + +static void +require_libraries(struct cmdline_options *opt) +{ + VALUE list = opt->req_list; + ID require; + rb_thread_t *th = GET_THREAD(); + rb_block_t *prev_base_block = th->base_block; + int prev_parse_in_eval = th->parse_in_eval; + th->base_block = 0; + th->parse_in_eval = 0; + + Init_ext(); /* should be called here for some reason :-( */ + CONST_ID(require, "require"); + while (list && RARRAY_LEN(list) > 0) { + VALUE feature = rb_ary_shift(list); + rb_funcall2(rb_vm_top_self(), require, 1, &feature); + } + opt->req_list = 0; + + th->parse_in_eval = prev_parse_in_eval; + th->base_block = prev_base_block; +} + +static void +process_sflag(struct cmdline_options *opt) +{ + if (opt->sflag) { + long n; + VALUE *args; + VALUE argv = rb_argv; + + n = RARRAY_LEN(argv); + args = RARRAY_PTR(argv); + while (n > 0) { + VALUE v = *args++; + char *s = StringValuePtr(v); + char *p; + int hyphen = Qfalse; + + if (s[0] != '-') + break; + n--; + if (s[1] == '-' && s[2] == '\0') + break; + + v = Qtrue; + /* check if valid name before replacing - with _ */ + for (p = s + 1; *p; p++) { + if (*p == '=') { + *p++ = '\0'; + v = rb_str_new2(p); + break; + } + if (*p == '-') { + hyphen = Qtrue; + } + else if (*p != '_' && !ISALNUM(*p)) { + VALUE name_error[2]; + name_error[0] = + rb_str_new2("invalid name for global variable - "); + if (!(p = strchr(p, '='))) { + rb_str_cat2(name_error[0], s); + } + else { + rb_str_cat(name_error[0], s, p - s); + } + name_error[1] = args[-1]; + rb_exc_raise(rb_class_new_instance(2, name_error, rb_eNameError)); + } + } + s[0] = '$'; + if (hyphen) { + for (p = s + 1; *p; ++p) { + if (*p == '-') + *p = '_'; + } + } + rb_gv_set(s, v); + } + n = RARRAY_LEN(argv) - n; + while (n--) { + rb_ary_shift(argv); + } + } + opt->sflag = 0; +} + +NODE *rb_parser_append_print(VALUE, NODE *); +NODE *rb_parser_while_loop(VALUE, NODE *, int, int); +static int proc_options(int argc, char **argv, struct cmdline_options *opt, int envopt); + +static void +moreswitches(const char *s, struct cmdline_options *opt, int envopt) +{ + int argc, i; + char **argv, *p; + const char *ap = 0; + VALUE argstr, argary; + + while (ISSPACE(*s)) s++; + if (!*s) return; + argstr = rb_str_tmp_new(strlen(s) + 2); + argary = rb_str_tmp_new(0); + + p = RSTRING_PTR(argstr); + *p++ = ' '; + strcpy(p, s); + ap = 0; + rb_str_cat(argary, (char *)&ap, sizeof(ap)); + while (*p) { + ap = p; + rb_str_cat(argary, (char *)&ap, sizeof(ap)); + while (*p && !ISSPACE(*p)) ++p; + if (!*p) break; + *p++ = '\0'; + while (ISSPACE(*p)) ++p; + } + argc = RSTRING_LEN(argary) / sizeof(ap); + ap = 0; + rb_str_cat(argary, (char *)&ap, sizeof(ap)); + argv = (char **)RSTRING_PTR(argary); + + while ((i = proc_options(argc, argv, opt, envopt)) > 1 && (argc -= i) > 0) { + argv += i; + if (**argv != '-') { + *--*argv = '-'; + } + if ((*argv)[1]) { + ++argc; + --argv; + } + } + + /* get rid of GC */ + rb_str_resize(argary, 0); + rb_str_resize(argstr, 0); +} + +#define NAME_MATCH_P(name, str, len) \ + ((len) < sizeof(name) && strncmp((str), name, (len)) == 0) + +#define UNSET_WHEN(name, bit, str, len) \ + if (NAME_MATCH_P(name, str, len)) { \ + *(unsigned int *)arg &= ~(bit); \ + return; \ + } + +#define SET_WHEN(name, bit, str, len) \ + if (NAME_MATCH_P(name, str, len)) { \ + *(unsigned int *)arg |= (bit); \ + return; \ + } + +static void +enable_option(const char *str, int len, void *arg) +{ +#define UNSET_WHEN_DISABLE(bit) UNSET_WHEN(#bit, DISABLE_BIT(bit), str, len) + UNSET_WHEN_DISABLE(gems); + UNSET_WHEN_DISABLE(rubyopt); + if (NAME_MATCH_P("all", str, len)) { + *(unsigned int *)arg = 0U; + return; + } + rb_warn("unknown argument for --enable: `%.*s'", len, str); +} + +static void +disable_option(const char *str, int len, void *arg) +{ +#define SET_WHEN_DISABLE(bit) SET_WHEN(#bit, DISABLE_BIT(bit), str, len) + SET_WHEN_DISABLE(gems); + SET_WHEN_DISABLE(rubyopt); + if (NAME_MATCH_P("all", str, len)) { + *(unsigned int *)arg = ~0U; + return; + } + rb_warn("unknown argument for --disable: `%.*s'", len, str); +} + +static void +dump_option(const char *str, int len, void *arg) +{ +#define SET_WHEN_DUMP(bit) SET_WHEN(#bit, DUMP_BIT(bit), str, len) + SET_WHEN_DUMP(insns); + rb_warn("don't know how to dump `%.*s', (insns)", len, str); +} + +static void +set_option_encoding_once(const char *type, VALUE *name, const char *e, int elen) +{ + VALUE ename; + + if (!elen) elen = strlen(e); + ename = rb_str_new(e, elen); + + if (*name && + rb_funcall(ename, rb_intern("casecmp"), 1, *name) != INT2FIX(0)) { + rb_raise(rb_eRuntimeError, + "%s already set to %s", type, RSTRING_PTR(*name)); + } + *name = ename; +} + +#define set_internal_encoding_once(opt, e, elen) \ + set_option_encoding_once("default_intenal", &opt->intern.enc.name, e, elen) +#define set_external_encoding_once(opt, e, elen) \ + set_option_encoding_once("default_extenal", &opt->ext.enc.name, e, elen) +#define set_source_encoding_once(opt, e, elen) \ + set_option_encoding_once("source", &opt->src.enc.name, e, elen) + +static int +proc_options(int argc, char **argv, struct cmdline_options *opt, int envopt) +{ + int n, argc0 = argc; + const char *s; + + if (argc == 0) + return 0; + + for (argc--, argv++; argc > 0; argc--, argv++) { + const char *const arg = argv[0]; + if (arg[0] != '-' || !arg[1]) + break; + + s = arg + 1; + reswitch: + switch (*s) { + case 'a': + if (envopt) goto noenvopt; + opt->do_split = Qtrue; + s++; + goto reswitch; + + case 'p': + if (envopt) goto noenvopt; + opt->do_print = Qtrue; + /* through */ + case 'n': + if (envopt) goto noenvopt; + opt->do_loop = Qtrue; + s++; + goto reswitch; + + case 'd': + ruby_debug = Qtrue; + ruby_verbose = Qtrue; + s++; + goto reswitch; + + case 'y': + if (envopt) goto noenvopt; + opt->yydebug = 1; + s++; + goto reswitch; + + case 'v': + if (opt->verbose) { + s++; + goto reswitch; + } + ruby_show_version(); + opt->verbose = 1; + case 'w': + ruby_verbose = Qtrue; + s++; + goto reswitch; + + case 'W': + { + int numlen; + int v = 2; /* -W as -W2 */ + + if (*++s) { + v = scan_oct(s, 1, &numlen); + if (numlen == 0) + v = 1; + s += numlen; + } + switch (v) { + case 0: + ruby_verbose = Qnil; + break; + case 1: + ruby_verbose = Qfalse; + break; + default: + ruby_verbose = Qtrue; + break; + } + } + goto reswitch; + + case 'c': + if (envopt) goto noenvopt; + opt->do_check = Qtrue; + s++; + goto reswitch; + + case 's': + if (envopt) goto noenvopt; + forbid_setid("-s"); + opt->sflag = 1; + s++; + goto reswitch; + + case 'h': + if (envopt) goto noenvopt; + usage(origarg.argv[0]); + rb_exit(EXIT_SUCCESS); + break; + + case 'l': + if (envopt) goto noenvopt; + opt->do_line = Qtrue; + rb_output_rs = rb_rs; + s++; + goto reswitch; + + case 'S': + if (envopt) goto noenvopt; + forbid_setid("-S"); + opt->do_search = Qtrue; + s++; + goto reswitch; + + case 'e': + if (envopt) goto noenvopt; + forbid_setid("-e"); + if (!*++s) { + s = argv[1]; + argc--, argv++; + } + if (!s) { + rb_raise(rb_eRuntimeError, "no code specified for -e"); + } + if (!opt->e_script) { + opt->e_script = rb_str_new(0, 0); + if (opt->script == 0) + opt->script = "-e"; + } + rb_str_cat2(opt->e_script, s); + rb_str_cat2(opt->e_script, "\n"); + break; + + case 'r': + forbid_setid("-r"); + if (*++s) { + add_modules(opt, s); + } + else if (argv[1]) { + add_modules(opt, argv[1]); + argc--, argv++; + } + break; + + case 'i': + if (envopt) goto noenvopt; + forbid_setid("-i"); + ruby_set_inplace_mode(s + 1); + break; + + case 'x': + if (envopt) goto noenvopt; + opt->xflag = Qtrue; + s++; + if (*s && chdir(s) < 0) { + rb_fatal("Can't chdir to %s", s); + } + break; + + case 'C': + case 'X': + if (envopt) goto noenvopt; + s++; + if (!*s) { + s = argv[1]; + argc--, argv++; + } + if (!s || !*s) { + rb_fatal("Can't chdir"); + } + if (chdir(s) < 0) { + rb_fatal("Can't chdir to %s", s); + } + break; + + case 'F': + if (envopt) goto noenvopt; + if (*++s) { + rb_fs = rb_reg_new(s, strlen(s), 0); + } + break; + + case 'E': + if (!*++s && (!--argc || !(s = *++argv))) { + rb_raise(rb_eRuntimeError, "missing argument for -E"); + } + goto encoding; + + case 'U': + set_internal_encoding_once(opt, "UTF-8", 0); + ++s; + goto reswitch; + + case 'K': + if (*++s) { + const char *enc_name = 0; + switch (*s) { + case 'E': case 'e': + enc_name = "EUC-JP"; + break; + case 'S': case 's': + enc_name = "Windows-31J"; + break; + case 'U': case 'u': + enc_name = "UTF-8"; + break; + case 'N': case 'n': case 'A': case 'a': + enc_name = "ASCII-8BIT"; + break; + } + if (enc_name) { + opt->src.enc.name = rb_str_new2(enc_name); + if (!opt->ext.enc.name) + opt->ext.enc.name = opt->src.enc.name; + } + s++; + } + goto reswitch; + + case 'T': + { + int numlen; + int v = 1; + + if (*++s) { + v = scan_oct(s, 2, &numlen); + if (numlen == 0) + v = 1; + s += numlen; + } + if (v > opt->safe_level) opt->safe_level = v; + } + goto reswitch; + + case 'I': + forbid_setid("-I"); + if (*++s) + ruby_incpush_expand(s); + else if (argv[1]) { + ruby_incpush_expand(argv[1]); + argc--, argv++; + } + break; + + case '0': + if (envopt) goto noenvopt; + { + int numlen; + int v; + char c; + + v = scan_oct(s, 4, &numlen); + s += numlen; + if (v > 0377) + rb_rs = Qnil; + else if (v == 0 && numlen >= 2) { + rb_rs = rb_str_new2("\n\n"); + } + else { + c = v & 0xff; + rb_rs = rb_str_new(&c, 1); + } + } + goto reswitch; + + case '-': + if (!s[1] || (s[1] == '\r' && !s[2])) { + argc--, argv++; + goto switch_end; + } + s++; + +# define is_option_end(c, allow_hyphen) \ + (!(c) || (allow_hyphen && (c) == '-') || (c) == '=') +# define check_envopt(name, allow_envopt) \ + ((allow_envopt || !envopt) ? (void)0 : \ + rb_raise(rb_eRuntimeError, "invalid switch in RUBYOPT: --" name)) +# define need_argument(name, s) \ + ((*s++ ? !*s : (!--argc || !(s = *++argv))) ? \ + rb_raise(rb_eRuntimeError, "missing argument for --" name) \ + : (void)0) +# define is_option_with_arg(name, allow_hyphen, allow_envopt) \ + (strncmp(name, s, n = sizeof(name) - 1) == 0 && is_option_end(s[n], allow_hyphen) ? \ + (check_envopt(name, allow_envopt), s += n, need_argument(name, s), 1) : 0) + + if (strcmp("copyright", s) == 0) { + if (envopt) goto noenvopt_long; + opt->copyright = 1; + } + else if (strcmp("debug", s) == 0) { + ruby_debug = Qtrue; + ruby_verbose = Qtrue; + } + else if (is_option_with_arg("enable", Qtrue, Qtrue)) { + ruby_each_words(s, enable_option, &opt->disable); + } + else if (is_option_with_arg("disable", Qtrue, Qtrue)) { + ruby_each_words(s, disable_option, &opt->disable); + } + else if (is_option_with_arg("encoding", Qfalse, Qtrue)) { + char *p; + encoding: + do { +# define set_encoding_part(type) \ + if (!(p = strchr(s, ':'))) { \ + set_##type##_encoding_once(opt, s, 0); \ + break; \ + } \ + else if (p > s) { \ + set_##type##_encoding_once(opt, s, p-s); \ + } + set_encoding_part(external); + if (!*(s = ++p)) break; + set_encoding_part(internal); + if (!*(s = ++p)) break; + rb_raise(rb_eRuntimeError, "extra argument for %s: %s", + (arg[1] == '-' ? "--encoding" : "-E"), s); +# undef set_encoding_part + } while (0); + } + else if (is_option_with_arg("internal-encoding", Qfalse, Qtrue)) { + set_internal_encoding_once(opt, s, 0); + } + else if (is_option_with_arg("external-encoding", Qfalse, Qtrue)) { + set_external_encoding_once(opt, s, 0); + } + else if (strcmp("version", s) == 0) { + if (envopt) goto noenvopt_long; + opt->version = 1; + } + else if (strcmp("verbose", s) == 0) { + opt->verbose = 1; + ruby_verbose = Qtrue; + } + else if (strcmp("yydebug", s) == 0) { + if (envopt) goto noenvopt_long; + opt->yydebug = 1; + } + else if (is_option_with_arg("dump", Qfalse, Qfalse)) { + ruby_each_words(s, dump_option, &opt->dump); + } + else if (strcmp("help", s) == 0) { + if (envopt) goto noenvopt_long; + usage(origarg.argv[0]); + rb_exit(EXIT_SUCCESS); + } + else { + rb_raise(rb_eRuntimeError, + "invalid option --%s (-h will show valid options)", s); + } + break; + + case '\r': + if (!s[1]) + break; + + default: + { + if (ISPRINT(*s)) { + rb_raise(rb_eRuntimeError, + "invalid option -%c (-h will show valid options)", + (int)(unsigned char)*s); + } + else { + rb_raise(rb_eRuntimeError, + "invalid option -\\x%02X (-h will show valid options)", + (int)(unsigned char)*s); + } + } + goto switch_end; + + noenvopt: + /* "EIdvwWrKU" only */ + rb_raise(rb_eRuntimeError, "invalid switch in RUBYOPT: -%c", *s); + break; + + noenvopt_long: + rb_raise(rb_eRuntimeError, "invalid switch in RUBYOPT: --%s", s); + break; + + case 0: + break; +# undef is_option_end +# undef check_envopt +# undef need_argument +# undef is_option_with_arg + } + } + + switch_end: + return argc0 - argc; +} + +void Init_prelude(void); + +static void +ruby_init_gems(int enable) +{ + if (enable) rb_define_module("Gem"); + Init_prelude(); +} + +static int +opt_enc_index(VALUE enc_name) +{ + const char *s = RSTRING_PTR(enc_name); + int i = rb_enc_find_index(s); + + if (i < 0) { + rb_raise(rb_eRuntimeError, "unknown encoding name - %s", s); + } + else if (rb_enc_dummy_p(rb_enc_from_index(i))) { + rb_raise(rb_eRuntimeError, "dummy encoding is not acceptable - %s ", s); + } + return i; +} + +#define rb_progname (GET_VM()->progname) +VALUE rb_argv0; + +static VALUE +false_value(void) +{ + return Qfalse; +} + +static VALUE +true_value(void) +{ + return Qtrue; +} + +#define rb_define_readonly_boolean(name, val) \ + rb_define_virtual_variable((name), (val) ? true_value : false_value, 0) + +static VALUE +uscore_get() +{ + VALUE line; + + line = rb_lastline_get(); + if (TYPE(line) != T_STRING) { + rb_raise(rb_eTypeError, "$_ value need to be String (%s given)", + NIL_P(line) ? "nil" : rb_obj_classname(line)); + } + return line; +} + +/* + * call-seq: + * sub(pattern, replacement) => $_ + * sub(pattern) { block } => $_ + * + * Equivalent to $_.sub(args), except that + * $_ will be updated if substitution occurs. + * Available only when -p/-n command line option specified. + */ + +static VALUE +rb_f_sub(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str = rb_funcall3(uscore_get(), rb_intern("sub"), argc, argv); + rb_lastline_set(str); + return str; +} + +/* + * call-seq: + * gsub(pattern, replacement) => string + * gsub(pattern) {|...| block } => string + * + * Equivalent to $_.gsub..., except that $_ + * receives the modified result. + * Available only when -p/-n command line option specified. + * + */ + +static VALUE +rb_f_gsub(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str = rb_funcall3(uscore_get(), rb_intern("gsub"), argc, argv); + rb_lastline_set(str); + return str; +} + +/* + * call-seq: + * chop => string + * + * Equivalent to ($_.dup).chop!, except nil + * is never returned. See String#chop!. + * Available only when -p/-n command line option specified. + * + */ + +static VALUE +rb_f_chop() +{ + VALUE str = rb_funcall3(uscore_get(), rb_intern("chop"), 0, 0); + rb_lastline_set(str); + return str; +} + + +/* + * call-seq: + * chomp => $_ + * chomp(string) => $_ + * + * Equivalent to $_ = $_.chomp(string). See + * String#chomp. + * Available only when -p/-n command line option specified. + * + */ + +static VALUE +rb_f_chomp(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str = rb_funcall3(uscore_get(), rb_intern("chomp"), argc, argv); + rb_lastline_set(str); + return str; +} + +void rb_stdio_set_default_encoding(void); + +static VALUE +process_options(VALUE arg) +{ + struct cmdline_arguments *argp = (struct cmdline_arguments *)arg; + struct cmdline_options *opt = argp->opt; + int argc = argp->argc; + char **argv = argp->argv; + NODE *tree = 0; + VALUE parser; + VALUE iseq; + rb_encoding *enc, *lenc; + const char *s; + char fbuf[MAXPATHLEN]; + int i = proc_options(argc, argv, opt, 0); + rb_thread_t *th = GET_THREAD(); + rb_env_t *env = 0; + + argc -= i; + argv += i; + + if (!(opt->disable & DISABLE_BIT(rubyopt)) && + opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { + VALUE src_enc_name = opt->src.enc.name; + VALUE ext_enc_name = opt->ext.enc.name; + VALUE int_enc_name = opt->intern.enc.name; + + opt->src.enc.name = opt->ext.enc.name = opt->intern.enc.name = 0; + moreswitches(s, opt, 1); + if (src_enc_name) + opt->src.enc.name = src_enc_name; + if (ext_enc_name) + opt->ext.enc.name = ext_enc_name; + if (int_enc_name) + opt->intern.enc.name = int_enc_name; + } + + if (opt->version) { + ruby_show_version(); + return Qtrue; + } + if (opt->copyright) { + ruby_show_copyright(); + } + + if (opt->safe_level >= 4) { + OBJ_TAINT(rb_argv); + OBJ_TAINT(GET_VM()->load_path); + } + + if (!opt->e_script) { + if (argc == 0) { /* no more args */ + if (opt->verbose) + return Qtrue; + opt->script = "-"; + } + else { + opt->script = argv[0]; + if (opt->script[0] == '\0') { + opt->script = "-"; + } + else if (opt->do_search) { + char *path = getenv("RUBYPATH"); + + opt->script = 0; + if (path) { + opt->script = dln_find_file_r(argv[0], path, fbuf, sizeof(fbuf)); + } + if (!opt->script) { + opt->script = dln_find_file_r(argv[0], getenv(PATH_ENV), fbuf, sizeof(fbuf)); + } + if (!opt->script) + opt->script = argv[0]; + } + argc--; + argv++; + } + } + + opt->script_name = rb_str_new_cstr(opt->script); + opt->script = RSTRING_PTR(opt->script_name); +#if defined DOSISH || defined __CYGWIN__ + translate_char(RSTRING_PTR(opt->script_name), '\\', '/'); +#endif + rb_obj_freeze(opt->script_name); + + ruby_init_loadpath_safe(opt->safe_level); + rb_enc_find_index("encdb"); + lenc = rb_locale_encoding(); + rb_enc_associate(rb_progname, lenc); + parser = rb_parser_new(); + if (opt->yydebug) rb_parser_set_yydebug(parser, Qtrue); + if (opt->ext.enc.name != 0) { + opt->ext.enc.index = opt_enc_index(opt->ext.enc.name); + } + if (opt->intern.enc.name != 0) { + opt->intern.enc.index = opt_enc_index(opt->intern.enc.name); + } + if (opt->src.enc.name != 0) { + opt->src.enc.index = opt_enc_index(opt->src.enc.name); + src_encoding_index = opt->src.enc.index; + } + if (opt->ext.enc.index >= 0) { + enc = rb_enc_from_index(opt->ext.enc.index); + } + else { + enc = lenc; + } + rb_enc_set_default_external(rb_enc_from_encoding(enc)); + if (opt->intern.enc.index >= 0) { + enc = rb_enc_from_index(opt->intern.enc.index); + rb_enc_set_default_internal(rb_enc_from_encoding(enc)); + opt->intern.enc.index = -1; + } + rb_enc_associate(opt->script_name, lenc); + { + long i; + VALUE load_path = GET_VM()->load_path; + for (i = 0; i < RARRAY_LEN(load_path); ++i) { + rb_enc_associate(RARRAY_PTR(load_path)[i], lenc); + } + } + ruby_init_gems(!(opt->disable & DISABLE_BIT(gems))); + ruby_set_argv(argc, argv); + process_sflag(opt); + + { + /* set eval context */ + VALUE toplevel_binding = rb_const_get(rb_cObject, rb_intern("TOPLEVEL_BINDING")); + rb_binding_t *bind; + + GetBindingPtr(toplevel_binding, bind); + GetEnvPtr(bind->env, env); + } + +#define PREPARE_PARSE_MAIN(expr) do { \ + th->parse_in_eval--; \ + th->base_block = &env->block; \ + expr; \ + th->parse_in_eval++; \ + th->base_block = 0; \ +} while (0) + + if (opt->e_script) { + rb_encoding *eenc; + if (opt->src.enc.index >= 0) { + eenc = rb_enc_from_index(opt->src.enc.index); + } + else { + eenc = lenc; + } + rb_enc_associate(opt->e_script, eenc); + require_libraries(opt); + + PREPARE_PARSE_MAIN({ + tree = rb_parser_compile_string(parser, opt->script, opt->e_script, 1); + }); + } + else { + if (opt->script[0] == '-' && !opt->script[1]) { + forbid_setid("program input from stdin"); + } + + PREPARE_PARSE_MAIN({ + tree = load_file(parser, opt->script, 1, opt); + }); + } + + if (opt->ext.enc.index >= 0) { + enc = rb_enc_from_index(opt->ext.enc.index); + } + else { + enc = lenc; + } + rb_enc_set_default_external(rb_enc_from_encoding(enc)); + if (opt->intern.enc.index >= 0) { + /* Set in the shebang line */ + enc = rb_enc_from_index(opt->intern.enc.index); + rb_enc_set_default_internal(rb_enc_from_encoding(enc)); + } + else if (!rb_default_internal_encoding()) + /* Freeze default_internal */ + rb_enc_set_default_internal(Qnil); + rb_stdio_set_default_encoding(); + + if (!tree) return Qfalse; + + process_sflag(opt); + opt->xflag = 0; + + if (opt->safe_level >= 4) { + FL_UNSET(rb_argv, FL_TAINT); + FL_UNSET(GET_VM()->load_path, FL_TAINT); + } + + if (opt->do_check) { + printf("Syntax OK\n"); + return Qtrue; + } + + if (opt->do_print) { + PREPARE_PARSE_MAIN({ + tree = rb_parser_append_print(parser, tree); + }); + } + if (opt->do_loop) { + PREPARE_PARSE_MAIN({ + tree = rb_parser_while_loop(parser, tree, opt->do_line, opt->do_split); + }); + rb_define_global_function("sub", rb_f_sub, -1); + rb_define_global_function("gsub", rb_f_gsub, -1); + rb_define_global_function("chop", rb_f_chop, 0); + rb_define_global_function("chomp", rb_f_chomp, -1); + } + + PREPARE_PARSE_MAIN({ + iseq = rb_iseq_new_main(tree, opt->script_name); + }); + + if (opt->dump & DUMP_BIT(insns)) { + rb_io_write(rb_stdout, rb_iseq_disasm(iseq)); + rb_io_flush(rb_stdout); + return Qtrue; + } + + rb_set_safe_level(opt->safe_level); + rb_progname = opt->script_name; + + return iseq; +} + +struct load_file_arg { + VALUE parser; + const char *fname; + int script; + struct cmdline_options *opt; +}; + +static VALUE +load_file_internal(VALUE arg) +{ + extern VALUE rb_stdin; + struct load_file_arg *argp = (struct load_file_arg *)arg; + VALUE parser = argp->parser; + const char *fname = argp->fname; + int script = argp->script; + struct cmdline_options *opt = argp->opt; + VALUE f; + int line_start = 1; + NODE *tree = 0; + rb_encoding *enc; + ID set_encoding; + + if (!fname) + rb_load_fail(fname); + if (strcmp(fname, "-") == 0) { + f = rb_stdin; + } + else { + int fd, mode = O_RDONLY; +#if defined DOSISH || defined __CYGWIN__ + { + const char *ext = strrchr(fname, '.'); + if (ext && STRCASECMP(ext, ".exe") == 0) + mode |= O_BINARY; + } +#endif + if ((fd = open(fname, mode)) < 0) { + rb_load_fail(fname); + } + + f = rb_io_fdopen(fd, mode, fname); + } + + CONST_ID(set_encoding, "set_encoding"); + if (script) { + VALUE c = 1; /* something not nil */ + VALUE line; + char *p; + int no_src_enc = !opt->src.enc.name; + int no_ext_enc = !opt->ext.enc.name; + int no_int_enc = !opt->intern.enc.name; + + enc = rb_ascii8bit_encoding(); + rb_funcall(f, set_encoding, 1, rb_enc_from_encoding(enc)); + + if (opt->xflag) { + forbid_setid("-x"); + opt->xflag = Qfalse; + while (!NIL_P(line = rb_io_gets(f))) { + line_start++; + if (RSTRING_LEN(line) > 2 + && RSTRING_PTR(line)[0] == '#' + && RSTRING_PTR(line)[1] == '!') { + if ((p = strstr(RSTRING_PTR(line), "ruby")) != 0) { + goto start_read; + } + } + } + rb_raise(rb_eLoadError, "no Ruby script found in input"); + } + + c = rb_io_getbyte(f); + if (c == INT2FIX('#')) { + c = rb_io_getbyte(f); + if (c == INT2FIX('!')) { + line = rb_io_gets(f); + if (NIL_P(line)) + return 0; + + if ((p = strstr(RSTRING_PTR(line), "ruby")) == 0) { + /* not ruby script, kick the program */ + char **argv; + char *path; + char *pend = RSTRING_PTR(line) + RSTRING_LEN(line); + + p = RSTRING_PTR(line); /* skip `#!' */ + if (pend[-1] == '\n') + pend--; /* chomp line */ + if (pend[-1] == '\r') + pend--; + *pend = '\0'; + while (p < pend && ISSPACE(*p)) + p++; + path = p; /* interpreter path */ + while (p < pend && !ISSPACE(*p)) + p++; + *p++ = '\0'; + if (p < pend) { + argv = ALLOCA_N(char *, origarg.argc + 3); + argv[1] = p; + MEMCPY(argv + 2, origarg.argv + 1, char *, origarg.argc); + } + else { + argv = origarg.argv; + } + argv[0] = path; + execv(path, argv); + + rb_fatal("Can't exec %s", path); + } + + start_read: + p += 4; + RSTRING_PTR(line)[RSTRING_LEN(line) - 1] = '\0'; + if (RSTRING_PTR(line)[RSTRING_LEN(line) - 2] == '\r') + RSTRING_PTR(line)[RSTRING_LEN(line) - 2] = '\0'; + if ((p = strstr(p, " -")) != 0) { + moreswitches(p + 1, opt, 0); + } + + /* push back shebang for pragma may exist in next line */ + rb_io_ungetbyte(f, rb_str_new2("!\n")); + } + else if (!NIL_P(c)) { + rb_io_ungetbyte(f, c); + } + rb_io_ungetbyte(f, INT2FIX('#')); + if (no_src_enc && opt->src.enc.name) { + opt->src.enc.index = opt_enc_index(opt->src.enc.name); + src_encoding_index = opt->src.enc.index; + } + if (no_ext_enc && opt->ext.enc.name) { + opt->ext.enc.index = opt_enc_index(opt->ext.enc.name); + } + if (no_int_enc && opt->intern.enc.name) { + opt->intern.enc.index = opt_enc_index(opt->intern.enc.name); + } + } + else if (!NIL_P(c)) { + rb_io_ungetbyte(f, c); + } + require_libraries(opt); /* Why here? unnatural */ + } + if (opt->src.enc.index >= 0) { + enc = rb_enc_from_index(opt->src.enc.index); + } + else if (f == rb_stdin) { + enc = rb_locale_encoding(); + } + else { + enc = rb_usascii_encoding(); + } + rb_funcall(f, set_encoding, 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-")); + tree = (NODE *)rb_parser_compile_file(parser, fname, f, line_start); + rb_funcall(f, set_encoding, 1, rb_parser_encoding(parser)); + if (script && rb_parser_end_seen_p(parser)) { + rb_define_global_const("DATA", f); + } + else if (f != rb_stdin) { + rb_io_close(f); + } + else { + rb_io_ungetbyte(f, Qnil); + } + return (VALUE)tree; +} + +static VALUE +restore_lineno(VALUE lineno) +{ + return rb_gv_set("$.", lineno); +} + +static NODE * +load_file(VALUE parser, const char *fname, int script, struct cmdline_options *opt) +{ + struct load_file_arg arg; + arg.parser = parser; + arg.fname = fname; + arg.script = script; + arg.opt = opt; + return (NODE *)rb_ensure(load_file_internal, (VALUE)&arg, restore_lineno, rb_gv_get("$.")); +} + +void * +rb_load_file(const char *fname) +{ + struct cmdline_options opt; + + return load_file(rb_parser_new(), fname, 0, cmdline_options_init(&opt)); +} + +#if !defined(PSTAT_SETCMD) && !defined(HAVE_SETPROCTITLE) +#if !defined(_WIN32) && !(defined(HAVE_SETENV) && defined(HAVE_UNSETENV)) +#define USE_ENVSPACE_FOR_ARG0 +#endif + +#ifdef USE_ENVSPACE_FOR_ARG0 +extern char **environ; +#endif + +static int +get_arglen(int argc, char **argv) +{ + char *s = argv[0]; + int i; + + if (!argc) return 0; + s += strlen(s); + /* See if all the arguments are contiguous in memory */ + for (i = 1; i < argc; i++) { + if (argv[i] == s + 1) { + s++; + s += strlen(s); /* this one is ok too */ + } + else { + break; + } + } +#if defined(USE_ENVSPACE_FOR_ARG0) + if (environ && (s == environ[0])) { + s += strlen(s); + for (i = 1; environ[i]; i++) { + if (environ[i] == s + 1) { + s++; + s += strlen(s); /* this one is ok too */ + } + } + ruby_setenv("", NULL); /* duplicate environ vars */ + } +#endif + return s - argv[0]; +} +#endif + +static void +set_arg0(VALUE val, ID id) +{ + char *s; + long i; + + if (origarg.argv == 0) + rb_raise(rb_eRuntimeError, "$0 not initialized"); + StringValue(val); + s = RSTRING_PTR(val); + i = RSTRING_LEN(val); +#if defined(PSTAT_SETCMD) + if (i > PST_CLEN) { + union pstun un; + char buf[PST_CLEN + 1]; /* PST_CLEN is 64 (HP-UX 11.23) */ + strlcpy(buf, s, sizeof(buf)); + un.pst_command = buf; + pstat(PSTAT_SETCMD, un, PST_CLEN, 0, 0); + } + else { + union pstun un; + un.pst_command = s; + pstat(PSTAT_SETCMD, un, i, 0, 0); + } +#elif defined(HAVE_SETPROCTITLE) + setproctitle("%.*s", (int)i, s); +#else + + if (i >= origarg.len) { + i = origarg.len; + } + + memcpy(origarg.argv[0], s, i); + + { + int j; + char *t = origarg.argv[0] + i; + *t = '\0'; + + if (i + 1 < origarg.len) memset(t + 1, ' ', origarg.len - i - 1); + for (j = 1; j < origarg.argc; j++) { + origarg.argv[j] = t; + } + } +#endif + rb_progname = rb_obj_freeze(rb_external_str_new(s, i)); +} + +void +ruby_script(const char *name) +{ + if (name) { + rb_progname = rb_obj_freeze(rb_external_str_new(name, strlen(name))); + } +} + +static void +init_ids(struct cmdline_options *opt) +{ + rb_uid_t uid = getuid(); + rb_uid_t euid = geteuid(); + rb_gid_t gid = getgid(); + rb_gid_t egid = getegid(); + + if (uid != euid) opt->setids |= 1; + if (egid != gid) opt->setids |= 2; + if (uid && opt->setids) { + if (opt->safe_level < 1) opt->safe_level = 1; + } +} + +#undef forbid_setid +static void +forbid_setid(const char *s, struct cmdline_options *opt) +{ + if (opt->setids & 1) + rb_raise(rb_eSecurityError, "no %s allowed while running setuid", s); + if (opt->setids & 2) + rb_raise(rb_eSecurityError, "no %s allowed while running setgid", s); + if (opt->safe_level > 0) + rb_raise(rb_eSecurityError, "no %s allowed in tainted mode", s); +} + +static void +verbose_setter(VALUE val, ID id, void *data) +{ + VALUE *variable = data; + *variable = RTEST(val) ? Qtrue : val; +} + +static VALUE +opt_W_getter(ID id, void *data) +{ + VALUE *variable = data; + switch (*variable) { + case Qnil: + return INT2FIX(0); + case Qfalse: + return INT2FIX(1); + case Qtrue: + return INT2FIX(2); + } + return Qnil; /* not reached */ +} + +void +ruby_prog_init(void) +{ + rb_define_hooked_variable("$VERBOSE", &ruby_verbose, 0, verbose_setter); + rb_define_hooked_variable("$-v", &ruby_verbose, 0, verbose_setter); + rb_define_hooked_variable("$-w", &ruby_verbose, 0, verbose_setter); + rb_define_hooked_variable("$-W", &ruby_verbose, opt_W_getter, rb_gvar_readonly_setter); + rb_define_variable("$DEBUG", &ruby_debug); + rb_define_variable("$-d", &ruby_debug); + + rb_define_hooked_variable("$0", &rb_progname, 0, set_arg0); + rb_define_hooked_variable("$PROGRAM_NAME", &rb_progname, 0, set_arg0); + + rb_define_global_const("ARGV", rb_argv); +} + +void +ruby_set_argv(int argc, char **argv) +{ + int i; + VALUE av = rb_argv; + +#if defined(USE_DLN_A_OUT) + if (origarg.argv) + dln_argv0 = origarg.argv[0]; + else + dln_argv0 = argv[0]; +#endif + rb_ary_clear(av); + for (i = 0; i < argc; i++) { + VALUE arg = rb_external_str_new(argv[i], strlen(argv[i])); + + OBJ_FREEZE(arg); + rb_ary_push(av, arg); + } +} + +void * +ruby_process_options(int argc, char **argv) +{ + struct cmdline_arguments args; + struct cmdline_options opt; + NODE *tree; + + ruby_script(argv[0]); /* for the time being */ + rb_argv0 = rb_str_new4(rb_progname); + rb_gc_register_mark_object(rb_argv0); + args.argc = argc; + args.argv = argv; + args.opt = cmdline_options_init(&opt); + opt.ext.enc.index = -1; + opt.intern.enc.index = -1; + tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(), + process_options, (VALUE)&args, + 0, rb_progname); + + rb_define_readonly_boolean("$-p", opt.do_print); + rb_define_readonly_boolean("$-l", opt.do_line); + rb_define_readonly_boolean("$-a", opt.do_split); + + return tree; +} + +void +ruby_sysinit(int *argc, char ***argv) +{ +#if defined(__APPLE__) && (defined(__MACH__) || defined(__DARWIN__)) + int i, n = *argc, len = 0; + char **v1 = *argv, **v2, *p; + + for (i = 0; i < n; ++i) { + len += strlen(v1[i]) + 1; + } + v2 = malloc((n + 1)* sizeof(char*) + len); + p = (char *)&v2[n + 1]; + for (i = 0; i < n; ++i) { + int l = strlen(v1[i]); + memcpy(p, v1[i], l + 1); + v2[i] = p; + p += l + 1; + } + v2[n] = 0; + *argv = v2; +#elif defined(_WIN32) + void rb_w32_sysinit(int *argc, char ***argv); + rb_w32_sysinit(argc, argv); +#endif + origarg.argc = *argc; + origarg.argv = *argv; +#if !defined(PSTAT_SETCMD) && !defined(HAVE_SETPROCTITLE) + origarg.len = get_arglen(origarg.argc, origarg.argv); +#endif +#if defined(USE_DLN_A_OUT) + dln_argv0 = origarg.argv[0]; +#endif +} diff --git a/rubytest.rb b/rubytest.rb new file mode 100755 index 0000000..ef23d64 --- /dev/null +++ b/rubytest.rb @@ -0,0 +1,28 @@ +#! ./miniruby + +exit if defined?(CROSS_COMPILING) +ruby = ENV["RUBY"] +unless ruby + load './rbconfig.rb' + ruby = "./#{RbConfig::CONFIG['ruby_install_name']}#{RbConfig::CONFIG['EXEEXT']}" +end +unless File.exist? ruby + print "#{ruby} is not found.\n" + print "Try `make' first, then `make test', please.\n" + exit false +end + +$stderr.reopen($stdout) +error = '' + +srcdir = File.dirname(__FILE__) +`#{ruby} #{srcdir}/sample/test.rb`.each_line do |line| + if line =~ /^end of test/ + print "\ntest succeeded\n" + exit true + end + error << line if %r:^(sample/test.rb|not): =~ line +end +print error +print "test failed\n" +exit false diff --git a/runruby.rb b/runruby.rb new file mode 100755 index 0000000..c439784 --- /dev/null +++ b/runruby.rb @@ -0,0 +1,79 @@ +#!./miniruby + +pure = true +while arg = ARGV[0] + break ARGV.shift if arg == '--' + /\A--([-\w]+)(?:=(.*))?\z/ =~ arg or break + arg, value = $1, $2 + re = Regexp.new('\A'+arg.gsub(/\w+\b/, '\&\\w*')+'\z', "i") + case + when re =~ "srcdir" + srcdir = value + when re =~ "archdir" + archdir = value + when re =~ "extout" + extout = value + when re =~ "pure" + pure = (value != "no") + when re =~ "debugger" + debugger = value ? (value.split unless value == "no") : %w"gdb --args" + else + break + end + ARGV.shift +end + +srcdir ||= File.dirname(__FILE__) +archdir ||= '.' + +abs_archdir = File.expand_path(archdir) +$:.unshift(abs_archdir) + +require 'rbconfig' +config = RbConfig::CONFIG + +ruby = File.join(archdir, config["RUBY_INSTALL_NAME"]+config['EXEEXT']) +unless File.exist?(ruby) + abort "#{ruby} is not found.\nTry `make' first, then `make test', please.\n" +end + +libs = [abs_archdir] +if extout + abs_extout = File.expand_path(extout) + libs << File.expand_path("common", abs_extout) << File.expand_path(RUBY_PLATFORM, abs_extout) +end +libs << File.expand_path("lib", srcdir) +config["bindir"] = abs_archdir + +env = {} + +env["RUBY"] = File.expand_path(ruby) +env["PATH"] = [abs_archdir, ENV["PATH"]].compact.join(File::PATH_SEPARATOR) + +if pure + libs << File.expand_path("ext", srcdir) << "-" +elsif e = ENV["RUBYLIB"] + libs |= e.split(File::PATH_SEPARATOR) +end +env["RUBYLIB"] = $:.replace(libs).join(File::PATH_SEPARATOR) + +libruby_so = File.join(abs_archdir, config['LIBRUBY_SO']) +if File.file?(libruby_so) + if e = config['LIBPATHENV'] and !e.empty? + env[e] = [abs_archdir, ENV[e]].compact.join(File::PATH_SEPARATOR) + end + if /linux/ =~ RUBY_PLATFORM + env["LD_PRELOAD"] = [libruby_so, ENV["LD_PRELOAD"]].compact.join(' ') + end +end + +ENV.update env + +cmd = [ruby] +cmd << "-rpurelib.rb" if pure +cmd.concat(ARGV) +cmd.unshift(*debugger) if debugger + +#require 'shellwords'; puts Shellwords.join(env.map {|k,v| "#{k}=#{v}" } + cmd) + +exec(*cmd) diff --git a/safe.c b/safe.c new file mode 100644 index 0000000..6a6be15 --- /dev/null +++ b/safe.c @@ -0,0 +1,129 @@ +/********************************************************************** + + eval.c - + + $Author: yugui $ + created at: Tue Sep 23 09:44:32 JST 2008 + + Copyright (C) 2008 Yukihiro Matsumoto + +**********************************************************************/ + +/* safe-level: + 0 - strings from streams/environment/ARGV are tainted (default) + 1 - no dangerous operation by tainted value + 2 - process/file operations prohibited + 3 - all generated objects are tainted + 4 - no global (non-tainted) variable modification/no direct output +*/ + +#define SAFE_LEVEL_MAX 4 + +#include "ruby/ruby.h" +#include "vm_core.h" + +/* $SAFE accessor */ + +int +rb_safe_level(void) +{ + return GET_THREAD()->safe_level; +} + +void +rb_set_safe_level_force(int safe) +{ + GET_THREAD()->safe_level = safe; +} + +void +rb_set_safe_level(int level) +{ + rb_thread_t *th = GET_THREAD(); + + if (level > th->safe_level) { + if (level > SAFE_LEVEL_MAX) { + level = SAFE_LEVEL_MAX; + } + th->safe_level = level; + } +} + +static VALUE +safe_getter(void) +{ + return INT2NUM(rb_safe_level()); +} + +static void +safe_setter(VALUE val) +{ + int level = NUM2INT(val); + rb_thread_t *th = GET_THREAD(); + + if (level < th->safe_level) { + rb_raise(rb_eSecurityError, + "tried to downgrade safe level from %d to %d", + th->safe_level, level); + } + if (level == 3) { + rb_warning("$SAFE=3 does no sandboxing; you might want to use $SAFE=4"); + } + if (level > SAFE_LEVEL_MAX) { + level = SAFE_LEVEL_MAX; + } + th->safe_level = level; +} + +void +rb_secure(int level) +{ + if (level <= rb_safe_level()) { + if (rb_frame_callee()) { + rb_raise(rb_eSecurityError, "Insecure operation `%s' at level %d", + rb_id2name(rb_frame_callee()), rb_safe_level()); + } + else { + rb_raise(rb_eSecurityError, "Insecure operation at level %d", + rb_safe_level()); + } + } +} + +void +rb_secure_update(VALUE obj) +{ + if (!OBJ_TAINTED(obj)) + rb_secure(4); +} + +void +rb_check_safe_obj(VALUE x) +{ + if (rb_safe_level() > 0 && OBJ_TAINTED(x)) { + if (rb_frame_callee()) { + rb_raise(rb_eSecurityError, "Insecure operation - %s", + rb_id2name(rb_frame_callee())); + } + else { + rb_raise(rb_eSecurityError, "Insecure operation: -r"); + } + } + rb_secure(4); +} + +void +rb_check_safe_str(VALUE x) +{ + rb_check_safe_obj(x); + if (TYPE(x) != T_STRING) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected String)", + rb_obj_classname(x)); + } +} + +void +Init_safe(void) +{ + rb_define_virtual_variable("$SAFE", safe_getter, safe_setter); +} diff --git a/signal.c b/signal.c new file mode 100644 index 0000000..5da1769 --- /dev/null +++ b/signal.c @@ -0,0 +1,1155 @@ +/********************************************************************** + + signal.c - + + $Author: yugui $ + created at: Tue Dec 20 10:13:44 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "vm_core.h" +#include +#include +#include + +#ifdef _WIN32 +typedef LONG rb_atomic_t; + +# define ATOMIC_TEST(var) InterlockedExchange(&(var), 0) +# define ATOMIC_SET(var, val) InterlockedExchange(&(var), (val)) +# define ATOMIC_INC(var) InterlockedIncrement(&(var)) +# define ATOMIC_DEC(var) InterlockedDecrement(&(var)) + +#else +typedef int rb_atomic_t; + +# define ATOMIC_TEST(var) ((var) ? ((var) = 0, 1) : 0) +# define ATOMIC_SET(var, val) ((var) = (val)) +# define ATOMIC_INC(var) (++(var)) +# define ATOMIC_DEC(var) (--(var)) +#endif + +#ifdef __BEOS__ +#undef SIGBUS +#endif + +#if defined HAVE_SIGPROCMASK || defined HAVE_SIGSETMASK +#define USE_TRAP_MASK 1 +#else +#define USE_TRAP_MASK 0 +#endif + +#ifndef NSIG +# define NSIG (_SIGMAX + 1) /* For QNX */ +#endif + +static const struct signals { + const char *signm; + int signo; +} siglist [] = { + {"EXIT", 0}, +#ifdef SIGHUP + {"HUP", SIGHUP}, +#endif + {"INT", SIGINT}, +#ifdef SIGQUIT + {"QUIT", SIGQUIT}, +#endif +#ifdef SIGILL + {"ILL", SIGILL}, +#endif +#ifdef SIGTRAP + {"TRAP", SIGTRAP}, +#endif +#ifdef SIGIOT + {"IOT", SIGIOT}, +#endif +#ifdef SIGABRT + {"ABRT", SIGABRT}, +#endif +#ifdef SIGEMT + {"EMT", SIGEMT}, +#endif +#ifdef SIGFPE + {"FPE", SIGFPE}, +#endif +#ifdef SIGKILL + {"KILL", SIGKILL}, +#endif +#ifdef SIGBUS + {"BUS", SIGBUS}, +#endif +#ifdef SIGSEGV + {"SEGV", SIGSEGV}, +#endif +#ifdef SIGSYS + {"SYS", SIGSYS}, +#endif +#ifdef SIGPIPE + {"PIPE", SIGPIPE}, +#endif +#ifdef SIGALRM + {"ALRM", SIGALRM}, +#endif +#ifdef SIGTERM + {"TERM", SIGTERM}, +#endif +#ifdef SIGURG + {"URG", SIGURG}, +#endif +#ifdef SIGSTOP + {"STOP", SIGSTOP}, +#endif +#ifdef SIGTSTP + {"TSTP", SIGTSTP}, +#endif +#ifdef SIGCONT + {"CONT", SIGCONT}, +#endif +#ifdef SIGCHLD + {"CHLD", SIGCHLD}, +#endif +#ifdef SIGCLD + {"CLD", SIGCLD}, +#else +# ifdef SIGCHLD + {"CLD", SIGCHLD}, +# endif +#endif +#ifdef SIGTTIN + {"TTIN", SIGTTIN}, +#endif +#ifdef SIGTTOU + {"TTOU", SIGTTOU}, +#endif +#ifdef SIGIO + {"IO", SIGIO}, +#endif +#ifdef SIGXCPU + {"XCPU", SIGXCPU}, +#endif +#ifdef SIGXFSZ + {"XFSZ", SIGXFSZ}, +#endif +#ifdef SIGVTALRM + {"VTALRM", SIGVTALRM}, +#endif +#ifdef SIGPROF + {"PROF", SIGPROF}, +#endif +#ifdef SIGWINCH + {"WINCH", SIGWINCH}, +#endif +#ifdef SIGUSR1 + {"USR1", SIGUSR1}, +#endif +#ifdef SIGUSR2 + {"USR2", SIGUSR2}, +#endif +#ifdef SIGLOST + {"LOST", SIGLOST}, +#endif +#ifdef SIGMSG + {"MSG", SIGMSG}, +#endif +#ifdef SIGPWR + {"PWR", SIGPWR}, +#endif +#ifdef SIGPOLL + {"POLL", SIGPOLL}, +#endif +#ifdef SIGDANGER + {"DANGER", SIGDANGER}, +#endif +#ifdef SIGMIGRATE + {"MIGRATE", SIGMIGRATE}, +#endif +#ifdef SIGPRE + {"PRE", SIGPRE}, +#endif +#ifdef SIGGRANT + {"GRANT", SIGGRANT}, +#endif +#ifdef SIGRETRACT + {"RETRACT", SIGRETRACT}, +#endif +#ifdef SIGSOUND + {"SOUND", SIGSOUND}, +#endif +#ifdef SIGINFO + {"INFO", SIGINFO}, +#endif + {NULL, 0} +}; + +static int +signm2signo(const char *nm) +{ + const struct signals *sigs; + + for (sigs = siglist; sigs->signm; sigs++) + if (strcmp(sigs->signm, nm) == 0) + return sigs->signo; + return 0; +} + +static const char* +signo2signm(int no) +{ + const struct signals *sigs; + + for (sigs = siglist; sigs->signm; sigs++) + if (sigs->signo == no) + return sigs->signm; + return 0; +} + +const char * +ruby_signal_name(int no) +{ + return signo2signm(no); +} + +/* + * call-seq: + * SignalException.new(sig) => signal_exception + * + * Construct a new SignalException object. +sig+ should be a known + * signal name, or a signal number. + */ + +static VALUE +esignal_init(int argc, VALUE *argv, VALUE self) +{ + int argnum = 1; + VALUE sig = Qnil; + int signo; + const char *signm; + + if (argc > 0) { + sig = rb_check_to_integer(argv[0], "to_int"); + if (!NIL_P(sig)) argnum = 2; + else sig = argv[0]; + } + if (argc < 1 || argnum < argc) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", + argc, argnum); + } + if (argnum == 2) { + signo = NUM2INT(sig); + if (signo < 0 || signo > NSIG) { + rb_raise(rb_eArgError, "invalid signal number (%d)", signo); + } + if (argc > 1) { + sig = argv[1]; + } + else { + signm = signo2signm(signo); + if (signm) { + sig = rb_sprintf("SIG%s", signm); + } + else { + sig = rb_sprintf("SIG%u", signo); + } + } + } + else { + signm = SYMBOL_P(sig) ? rb_id2name(SYM2ID(sig)) : StringValuePtr(sig); + if (strncmp(signm, "SIG", 3) == 0) signm += 3; + signo = signm2signo(signm); + if (!signo) { + rb_raise(rb_eArgError, "unsupported name `SIG%s'", signm); + } + sig = rb_sprintf("SIG%s", signm); + } + rb_call_super(1, &sig); + rb_iv_set(self, "signo", INT2NUM(signo)); + + return self; +} + +/* + * call-seq: + * signal_exception.signo => num + * + * Returns a signal number. + */ + +static VALUE +esignal_signo(VALUE self) +{ + return rb_iv_get(self, "signo"); +} + +static VALUE +interrupt_init(int argc, VALUE *argv, VALUE self) +{ + VALUE args[2]; + + args[0] = INT2FIX(SIGINT); + rb_scan_args(argc, argv, "01", &args[1]); + return rb_call_super(2, args); +} + +void +ruby_default_signal(int sig) +{ + signal(sig, SIG_DFL); + raise(sig); +} + +/* + * call-seq: + * Process.kill(signal, pid, ...) => fixnum + * + * Sends the given signal to the specified process id(s), or to the + * current process if _pid_ is zero. _signal_ may be an + * integer signal number or a POSIX signal name (either with or without + * a +SIG+ prefix). If _signal_ is negative (or starts + * with a minus sign), kills process groups instead of + * processes. Not all signals are available on all platforms. + * + * pid = fork do + * Signal.trap("HUP") { puts "Ouch!"; exit } + * # ... do some work ... + * end + * # ... + * Process.kill("HUP", pid) + * Process.wait + * + * produces: + * + * Ouch! + */ + +VALUE +rb_f_kill(int argc, VALUE *argv) +{ +#ifndef HAS_KILLPG +#define killpg(pg, sig) kill(-(pg), sig) +#endif + int negative = 0; + int sig; + int i; + const char *s; + + rb_secure(2); + if (argc < 2) + rb_raise(rb_eArgError, "wrong number of arguments -- kill(sig, pid...)"); + switch (TYPE(argv[0])) { + case T_FIXNUM: + sig = FIX2INT(argv[0]); + break; + + case T_SYMBOL: + s = rb_id2name(SYM2ID(argv[0])); + if (!s) rb_raise(rb_eArgError, "bad signal"); + goto str_signal; + + case T_STRING: + s = RSTRING_PTR(argv[0]); + if (s[0] == '-') { + negative++; + s++; + } + str_signal: + if (strncmp("SIG", s, 3) == 0) + s += 3; + if((sig = signm2signo(s)) == 0) + rb_raise(rb_eArgError, "unsupported name `SIG%s'", s); + + if (negative) + sig = -sig; + break; + + default: + { + VALUE str; + + str = rb_check_string_type(argv[0]); + if (!NIL_P(str)) { + s = RSTRING_PTR(str); + goto str_signal; + } + rb_raise(rb_eArgError, "bad signal type %s", + rb_obj_classname(argv[0])); + } + break; + } + + if (sig < 0) { + sig = -sig; + for (i=1; i +#endif + +void +rb_disable_interrupt(void) +{ +#ifndef _WIN32 + sigset_t mask; + sigfillset(&mask); + sigdelset(&mask, SIGVTALRM); + sigdelset(&mask, SIGSEGV); + pthread_sigmask(SIG_SETMASK, &mask, NULL); +#endif +} + +void +rb_enable_interrupt(void) +{ +#ifndef _WIN32 + sigset_t mask; + sigemptyset(&mask); + pthread_sigmask(SIG_SETMASK, &mask, NULL); +#endif +} + +int +rb_get_next_signal(void) +{ + int i, sig = 0; + + for (i=1; i 0) { + rb_disable_interrupt(); + { + ATOMIC_DEC(signal_buff.cnt[i]); + ATOMIC_DEC(signal_buff.size); + } + rb_enable_interrupt(); + sig = i; + break; + } + } + return sig; +} + +#ifdef SIGBUS +static RETSIGTYPE +sigbus(int sig) +{ + rb_bug("Bus Error"); +} +#endif + +#ifdef SIGSEGV +static int segv_received = 0; +static RETSIGTYPE +sigsegv(int sig SIGINFO_ARG) +{ +#ifdef USE_SIGALTSTACK + int ruby_stack_overflowed_p(const rb_thread_t *, const void *); + NORETURN(void ruby_thread_stack_overflow(rb_thread_t *th)); + rb_thread_t *th = GET_THREAD(); + if (ruby_stack_overflowed_p(th, info->si_addr)) { + ruby_thread_stack_overflow(th); + } +#endif + if (segv_received) { + fprintf(stderr, "SEGV recieved in SEGV handler\n"); + exit(EXIT_FAILURE); + } + else { + extern int ruby_disable_gc_stress; + segv_received = 1; + ruby_disable_gc_stress = 1; + rb_bug("Segmentation fault"); + } +} +#endif + +#ifdef SIGPIPE +static RETSIGTYPE +sigpipe(int sig) +{ + /* do nothing */ +} +#endif + +static void +signal_exec(VALUE cmd, int safe, int sig) +{ + VALUE signum = INT2NUM(sig); + rb_eval_cmd(cmd, rb_ary_new3(1, signum), safe); +} + +void +rb_trap_exit(void) +{ + rb_vm_t *vm = GET_VM(); + VALUE trap_exit = vm->trap_list[0].cmd; + + if (trap_exit) { + vm->trap_list[0].cmd = 0; + signal_exec(trap_exit, vm->trap_list[0].safe, 0); + } +} + +void +rb_signal_exec(rb_thread_t *th, int sig) +{ + rb_vm_t *vm = GET_VM(); + VALUE cmd = vm->trap_list[sig].cmd; + int safe = vm->trap_list[sig].safe; + + if (cmd == 0) { + switch (sig) { + case SIGINT: + rb_interrupt(); + break; +#ifdef SIGHUP + case SIGHUP: +#endif +#ifdef SIGQUIT + case SIGQUIT: +#endif +#ifdef SIGTERM + case SIGTERM: +#endif +#ifdef SIGALRM + case SIGALRM: +#endif +#ifdef SIGUSR1 + case SIGUSR1: +#endif +#ifdef SIGUSR2 + case SIGUSR2: +#endif + rb_thread_signal_raise(th, sig); + break; + } + } + else if (cmd == Qundef) { + rb_thread_signal_exit(th); + } + else { + signal_exec(cmd, safe, sig); + } +} + +struct trap_arg { +#if USE_TRAP_MASK +# ifdef HAVE_SIGPROCMASK + sigset_t mask; +# else + int mask; +# endif +#endif + int sig; + sighandler_t func; + VALUE cmd; +}; + +static sighandler_t +default_handler(int sig) +{ + sighandler_t func; + switch (sig) { + case SIGINT: +#ifdef SIGHUP + case SIGHUP: +#endif +#ifdef SIGQUIT + case SIGQUIT: +#endif +#ifdef SIGTERM + case SIGTERM: +#endif +#ifdef SIGALRM + case SIGALRM: +#endif +#ifdef SIGUSR1 + case SIGUSR1: +#endif +#ifdef SIGUSR2 + case SIGUSR2: +#endif + func = sighandler; + break; +#ifdef SIGBUS + case SIGBUS: + func = sigbus; + break; +#endif +#ifdef SIGSEGV + case SIGSEGV: + func = (sighandler_t)sigsegv; +# ifdef USE_SIGALTSTACK + register_sigaltstack(); +# endif + break; +#endif +#ifdef SIGPIPE + case SIGPIPE: + func = sigpipe; + break; +#endif + default: + func = SIG_DFL; + break; + } + + return func; +} + +static sighandler_t +trap_handler(VALUE *cmd, int sig) +{ + sighandler_t func = sighandler; + VALUE command; + + if (NIL_P(*cmd)) { + func = SIG_IGN; + } + else { + command = rb_check_string_type(*cmd); + if (NIL_P(command) && SYMBOL_P(*cmd)) { + command = rb_id2str(SYM2ID(*cmd)); + if (!command) rb_raise(rb_eArgError, "bad handler"); + } + if (!NIL_P(command)) { + SafeStringValue(command); /* taint check */ + *cmd = command; + switch (RSTRING_LEN(command)) { + case 0: + goto sig_ign; + break; + case 14: + if (strncmp(RSTRING_PTR(command), "SYSTEM_DEFAULT", 14) == 0) { + func = SIG_DFL; + *cmd = 0; + } + break; + case 7: + if (strncmp(RSTRING_PTR(command), "SIG_IGN", 7) == 0) { +sig_ign: + func = SIG_IGN; + *cmd = 0; + } + else if (strncmp(RSTRING_PTR(command), "SIG_DFL", 7) == 0) { +sig_dfl: + func = default_handler(sig); + *cmd = 0; + } + else if (strncmp(RSTRING_PTR(command), "DEFAULT", 7) == 0) { + goto sig_dfl; + } + break; + case 6: + if (strncmp(RSTRING_PTR(command), "IGNORE", 6) == 0) { + goto sig_ign; + } + break; + case 4: + if (strncmp(RSTRING_PTR(command), "EXIT", 4) == 0) { + *cmd = Qundef; + } + break; + } + } + else { + rb_proc_t *proc; + GetProcPtr(*cmd, proc); + } + } + + return func; +} + +static int +trap_signm(VALUE vsig) +{ + int sig = -1; + const char *s; + + switch (TYPE(vsig)) { + case T_FIXNUM: + sig = FIX2INT(vsig); + if (sig < 0 || sig >= NSIG) { + rb_raise(rb_eArgError, "invalid signal number (%d)", sig); + } + break; + + case T_SYMBOL: + s = rb_id2name(SYM2ID(vsig)); + if (!s) rb_raise(rb_eArgError, "bad signal"); + goto str_signal; + + default: + s = StringValuePtr(vsig); + + str_signal: + if (strncmp("SIG", s, 3) == 0) + s += 3; + sig = signm2signo(s); + if (sig == 0 && strcmp(s, "EXIT") != 0) + rb_raise(rb_eArgError, "unsupported signal SIG%s", s); + } + return sig; +} + +static VALUE +trap(struct trap_arg *arg) +{ + sighandler_t oldfunc, func = arg->func; + VALUE oldcmd, command = arg->cmd; + int sig = arg->sig; + rb_vm_t *vm = GET_VM(); + + oldfunc = ruby_signal(sig, func); + oldcmd = vm->trap_list[sig].cmd; + switch (oldcmd) { + case 0: + if (oldfunc == SIG_IGN) oldcmd = rb_str_new2("IGNORE"); + else if (oldfunc == sighandler) oldcmd = rb_str_new2("DEFAULT"); + else oldcmd = Qnil; + break; + case Qundef: + oldcmd = rb_str_new2("EXIT"); + break; + } + + vm->trap_list[sig].cmd = command; + vm->trap_list[sig].safe = rb_safe_level(); + /* enable at least specified signal. */ +#if USE_TRAP_MASK +#ifdef HAVE_SIGPROCMASK + sigdelset(&arg->mask, sig); +#else + arg->mask &= ~sigmask(sig); +#endif +#endif + return oldcmd; +} + +#if USE_TRAP_MASK +static VALUE +trap_ensure(struct trap_arg *arg) +{ + /* enable interrupt */ +#ifdef HAVE_SIGPROCMASK + sigprocmask(SIG_SETMASK, &arg->mask, NULL); +#else + sigsetmask(arg->mask); +#endif + trap_last_mask = arg->mask; + return 0; +} +#endif + +void +rb_trap_restore_mask(void) +{ +#if USE_TRAP_MASK +# ifdef HAVE_SIGPROCMASK + sigprocmask(SIG_SETMASK, &trap_last_mask, NULL); +# else + sigsetmask(trap_last_mask); +# endif +#endif +} + +/* + * call-seq: + * Signal.trap( signal, command ) => obj + * Signal.trap( signal ) {| | block } => obj + * + * Specifies the handling of signals. The first parameter is a signal + * name (a string such as ``SIGALRM'', ``SIGUSR1'', and so on) or a + * signal number. The characters ``SIG'' may be omitted from the + * signal name. The command or block specifies code to be run when the + * signal is raised. + * If the command is the string ``IGNORE'' or ``SIG_IGN'', the signal + * will be ignored. + * If the command is ``DEFAULT'' or ``SIG_DFL'', the Ruby's default handler + * will be invoked. + * If the command is ``EXIT'', the script will be terminated by the signal. + * If the command is ``SYSTEM_DEFAULT'', the operating system's default + * handler will be invoked. + * Otherwise, the given command or block will be run. + * The special signal name ``EXIT'' or signal number zero will be + * invoked just prior to program termination. + * trap returns the previous handler for the given signal. + * + * Signal.trap(0, proc { puts "Terminating: #{$$}" }) + * Signal.trap("CLD") { puts "Child died" } + * fork && Process.wait + * + * produces: + * Terminating: 27461 + * Child died + * Terminating: 27460 + */ +static VALUE +sig_trap(int argc, VALUE *argv) +{ + struct trap_arg arg; + + rb_secure(2); + if (argc == 0 || argc > 2) { + rb_raise(rb_eArgError, "wrong number of arguments -- trap(sig, cmd)/trap(sig){...}"); + } + + arg.sig = trap_signm(argv[0]); + if (argc == 1) { + arg.cmd = rb_block_proc(); + arg.func = sighandler; + } + else if (argc == 2) { + arg.cmd = argv[1]; + arg.func = trap_handler(&arg.cmd, arg.sig); + } + + if (OBJ_TAINTED(arg.cmd)) { + rb_raise(rb_eSecurityError, "Insecure: tainted signal trap"); + } +#if USE_TRAP_MASK + /* disable interrupt */ +# ifdef HAVE_SIGPROCMASK + sigfillset(&arg.mask); + sigprocmask(SIG_BLOCK, &arg.mask, &arg.mask); +# else + arg.mask = sigblock(~0); +# endif + + return rb_ensure(trap, (VALUE)&arg, trap_ensure, (VALUE)&arg); +#else + return trap(&arg); +#endif +} + +/* + * call-seq: + * Signal.list => a_hash + * + * Returns a list of signal names mapped to the corresponding + * underlying signal numbers. + * + * Signal.list #=> {"EXIT"=>0, "HUP"=>1, "INT"=>2, "QUIT"=>3, "ILL"=>4, "TRAP"=>5, "IOT"=>6, "ABRT"=>6, "FPE"=>8, "KILL"=>9, "BUS"=>7, "SEGV"=>11, "SYS"=>31, "PIPE"=>13, "ALRM"=>14, "TERM"=>15, "URG"=>23, "STOP"=>19, "TSTP"=>20, "CONT"=>18, "CHLD"=>17, "CLD"=>17, "TTIN"=>21, "TTOU"=>22, "IO"=>29, "XCPU"=>24, "XFSZ"=>25, "VTALRM"=>26, "PROF"=>27, "WINCH"=>28, "USR1"=>10, "USR2"=>12, "PWR"=>30, "POLL"=>29} + */ +static VALUE +sig_list(void) +{ + VALUE h = rb_hash_new(); + const struct signals *sigs; + + for (sigs = siglist; sigs->signm; sigs++) { + rb_hash_aset(h, rb_str_new2(sigs->signm), INT2FIX(sigs->signo)); + } + return h; +} + +static void +install_sighandler(int signum, sighandler_t handler) +{ + sighandler_t old; + + old = ruby_signal(signum, handler); + if (old != SIG_DFL) { + ruby_signal(signum, old); + } +} + +#if defined(SIGCLD) || defined(SIGCHLD) +static void +init_sigchld(int sig) +{ + sighandler_t oldfunc; +#if USE_TRAP_MASK +# ifdef HAVE_SIGPROCMASK + sigset_t mask; +# else + int mask; +# endif +#endif + +#if USE_TRAP_MASK + /* disable interrupt */ +# ifdef HAVE_SIGPROCMASK + sigfillset(&mask); + sigprocmask(SIG_BLOCK, &mask, &mask); +# else + mask = sigblock(~0); +# endif +#endif + + oldfunc = ruby_signal(sig, SIG_DFL); + if (oldfunc != SIG_DFL && oldfunc != SIG_IGN) { + ruby_signal(sig, oldfunc); + } else { + GET_VM()->trap_list[sig].cmd = 0; + } + +#if USE_TRAP_MASK +#ifdef HAVE_SIGPROCMASK + sigdelset(&mask, sig); + sigprocmask(SIG_SETMASK, &mask, NULL); +#else + mask &= ~sigmask(sig); + sigsetmask(mask); +#endif + trap_last_mask = mask; +#endif +} +#endif + +void +ruby_sig_finalize() +{ + sighandler_t oldfunc; + + oldfunc = ruby_signal(SIGINT, SIG_IGN); + if (oldfunc == sighandler) { + ruby_signal(SIGINT, SIG_DFL); + } +} + + +#ifdef RUBY_DEBUG_ENV +int ruby_enable_coredump = 0; +#endif + +/* + * Many operating systems allow signals to be sent to running + * processes. Some signals have a defined effect on the process, while + * others may be trapped at the code level and acted upon. For + * example, your process may trap the USR1 signal and use it to toggle + * debugging, and may use TERM to initiate a controlled shutdown. + * + * pid = fork do + * Signal.trap("USR1") do + * $debug = !$debug + * puts "Debug now: #$debug" + * end + * Signal.trap("TERM") do + * puts "Terminating..." + * shutdown() + * end + * # . . . do some work . . . + * end + * + * Process.detach(pid) + * + * # Controlling program: + * Process.kill("USR1", pid) + * # ... + * Process.kill("USR1", pid) + * # ... + * Process.kill("TERM", pid) + * + * produces: + * Debug now: true + * Debug now: false + * Terminating... + * + * The list of available signal names and their interpretation is + * system dependent. Signal delivery semantics may also vary between + * systems; in particular signal delivery may not always be reliable. + */ +void +Init_signal(void) +{ + VALUE mSignal = rb_define_module("Signal"); + + rb_define_global_function("trap", sig_trap, -1); + rb_define_module_function(mSignal, "trap", sig_trap, -1); + rb_define_module_function(mSignal, "list", sig_list, 0); + + rb_define_method(rb_eSignal, "initialize", esignal_init, -1); + rb_define_method(rb_eSignal, "signo", esignal_signo, 0); + rb_alias(rb_eSignal, rb_intern("signm"), rb_intern("message")); + rb_define_method(rb_eInterrupt, "initialize", interrupt_init, -1); + + install_sighandler(SIGINT, sighandler); +#ifdef SIGHUP + install_sighandler(SIGHUP, sighandler); +#endif +#ifdef SIGQUIT + install_sighandler(SIGQUIT, sighandler); +#endif +#ifdef SIGTERM + install_sighandler(SIGTERM, sighandler); +#endif +#ifdef SIGALRM + install_sighandler(SIGALRM, sighandler); +#endif +#ifdef SIGUSR1 + install_sighandler(SIGUSR1, sighandler); +#endif +#ifdef SIGUSR2 + install_sighandler(SIGUSR2, sighandler); +#endif + +#ifdef RUBY_DEBUG_ENV + if (!ruby_enable_coredump) +#endif + { +#ifdef SIGBUS + install_sighandler(SIGBUS, sigbus); +#endif +#ifdef SIGSEGV +# ifdef USE_SIGALTSTACK + register_sigaltstack(); +# endif + install_sighandler(SIGSEGV, (sighandler_t)sigsegv); +#endif + } +#ifdef SIGPIPE + install_sighandler(SIGPIPE, sigpipe); +#endif + +#if defined(SIGCLD) + init_sigchld(SIGCLD); +#elif defined(SIGCHLD) + init_sigchld(SIGCHLD); +#endif +} diff --git a/sprintf.c b/sprintf.c new file mode 100644 index 0000000..152f651 --- /dev/null +++ b/sprintf.c @@ -0,0 +1,1203 @@ +/********************************************************************** + + sprintf.c - + + $Author: yugui $ + created at: Fri Oct 15 10:39:26 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/re.h" +#include "ruby/encoding.h" +#include +#include + +#ifdef HAVE_IEEEFP_H +#include +#endif + +#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ +#define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT) +#define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n))) + +static void fmt_setup(char*,size_t,int,int,int,int); + +static char* +remove_sign_bits(char *str, int base) +{ + char *s, *t; + + s = t = str; + + if (base == 16) { + while (*t == 'f') { + t++; + } + } + else if (base == 8) { + *t |= EXTENDSIGN(3, strlen(t)); + while (*t == '7') { + t++; + } + } + else if (base == 2) { + while (*t == '1') { + t++; + } + } + + return t; +} + +static char +sign_bits(int base, const char *p) +{ + char c = '.'; + + switch (base) { + case 16: + if (*p == 'X') c = 'F'; + else c = 'f'; + break; + case 8: + c = '7'; break; + case 2: + c = '1'; break; + } + return c; +} + +#define FNONE 0 +#define FSHARP 1 +#define FMINUS 2 +#define FPLUS 4 +#define FZERO 8 +#define FSPACE 16 +#define FWIDTH 32 +#define FPREC 64 +#define FPREC0 128 + +#define CHECK(l) do {\ + while (blen + (l) >= bsiz) {\ + bsiz*=2;\ + }\ + rb_str_resize(result, bsiz);\ + buf = RSTRING_PTR(result);\ +} while (0) + +#define PUSH(s, l) do { \ + CHECK(l);\ + memcpy(&buf[blen], s, l);\ + blen += (l);\ +} while (0) + +#define FILL(c, l) do { \ + CHECK(l);\ + memset(&buf[blen], c, l);\ + blen += (l);\ +} while (0) + +#define GETARG() (nextvalue != Qundef ? nextvalue : \ + posarg == -1 ? \ + (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \ + posarg == -2 ? \ + (rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg), 0) : \ + (posarg = nextarg++, GETNTHARG(posarg))) + +#define GETPOSARG(n) (posarg > 0 ? \ + (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg), 0) : \ + posarg == -2 ? \ + (rb_raise(rb_eArgError, "numbered(%d) after named", n), 0) : \ + ((n < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", n), 0) : \ + (posarg = -1, GETNTHARG(n)))) + +#define GETNTHARG(nth) \ + ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth]) + +#define GETNAMEARG(id) (posarg > 0 ? \ + (rb_raise(rb_eArgError, "named after unnumbered(%d)", posarg), 0) : \ + posarg == -1 ? \ + (rb_raise(rb_eArgError, "named after numbered"), 0) : \ + rb_hash_fetch(get_hash(&hash, argc, argv), id)) + +#define GETNUM(n, val) \ + for (; p < end && rb_enc_isdigit(*p, enc); p++) { \ + int next_n = 10 * n + (*p - '0'); \ + if (next_n / 10 != n) {\ + rb_raise(rb_eArgError, #val " too big"); \ + } \ + n = next_n; \ + } \ + if (p >= end) { \ + rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \ + } + +#define GETASTER(val) do { \ + t = p++; \ + n = 0; \ + GETNUM(n, val); \ + if (*p == '$') { \ + tmp = GETPOSARG(n); \ + } \ + else { \ + tmp = GETARG(); \ + p = t; \ + } \ + val = NUM2INT(tmp); \ +} while (0) + +static VALUE +get_hash(volatile VALUE *hash, int argc, const VALUE *argv) +{ + VALUE tmp; + + if (*hash != Qundef) return *hash; + if (argc != 2) { + rb_raise(rb_eArgError, "one hash required"); + } + tmp = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash"); + if (NIL_P(tmp)) { + rb_raise(rb_eArgError, "one hash required"); + } + return (*hash = tmp); +} + +/* + * call-seq: + * format(format_string [, arguments...] ) => string + * sprintf(format_string [, arguments...] ) => string + * + * Returns the string resulting from applying format_string to + * any additional arguments. Within the format string, any characters + * other than format sequences are copied to the result. + * + * The syntax of a format sequence is follows. + * + * %[flags][width][.precision]type + * + * A format + * sequence consists of a percent sign, followed by optional flags, + * width, and precision indicators, then terminated with a field type + * character. The field type controls how the corresponding + * sprintf argument is to be interpreted, while the flags + * modify that interpretation. + * + * The field type characters are: + * + * Field | Integer Format + * ------+-------------------------------------------------------------- + * b | Convert argument as a binary number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with `..1'. + * B | Equivalent to `b', but uses an uppercase 0B for prefix + * | in the alternative format by #. + * d | Convert argument as a decimal number. + * i | Identical to `d'. + * o | Convert argument as an octal number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with `..7'. + * u | Identical to `d'. + * x | Convert argument as a hexadecimal number. + * | Negative numbers will be displayed as a two's complement + * | prefixed with `..f' (representing an infinite string of + * | leading 'ff's). + * X | Equivalent to `x', but uses uppercase letters. + * + * Field | Float Format + * ------+-------------------------------------------------------------- + * e | Convert floating point argument into exponential notation + * | with one digit before the decimal point as [-]d.dddddde[+-]dd. + * | The precision specifies the number of digits after the decimal + * | point (defaulting to six). + * E | Equivalent to `e', but uses an uppercase E to indicate + * | the exponent. + * f | Convert floating point argument as [-]ddd.dddddd, + * | where the precision specifies the number of digits after + * | the decimal point. + * g | Convert a floating point number using exponential form + * | if the exponent is less than -4 or greater than or + * | equal to the precision, or in dd.dddd form otherwise. + * | The precision specifies the number of significant digits. + * G | Equivalent to `g', but use an uppercase `E' in exponent form. + * + * Field | Other Format + * ------+-------------------------------------------------------------- + * c | Argument is the numeric code for a single character or + * | a single character string itself. + * p | The valuing of argument.inspect. + * s | Argument is a string to be substituted. If the format + * | sequence contains a precision, at most that many characters + * | will be copied. + * % | A percent sign itself will be displayed. No argument taken. + * + * The flags modifies the behavior of the formats. + * The flag characters are: + * + * Flag | Applies to | Meaning + * ---------+---------------+----------------------------------------- + * space | bBdiouxX | Leave a space at the start of + * | eEfgG | non-negative numbers. + * | (numeric fmt) | For `o', `x', `X', `b' and `B', use + * | | a minus sign with absolute value for + * | | negative values. + * ---------+---------------+----------------------------------------- + * (digit)$ | all | Specifies the absolute argument number + * | | for this field. Absolute and relative + * | | argument numbers cannot be mixed in a + * | | sprintf string. + * ---------+---------------+----------------------------------------- + * # | bBoxX | Use an alternative format. + * | eEfgG | For the conversions `o', increase the precision + * | | until the first digit will be `0' if + * | | it is not formatted as complements. + * | | For the conversions `x', `X', `b' and `B' + * | | on non-zero, prefix the result with ``0x'', + * | | ``0X'', ``0b'' and ``0B'', respectively. + * | | For `e', `E', `f', `g', and 'G', + * | | force a decimal point to be added, + * | | even if no digits follow. + * | | For `g' and 'G', do not remove trailing zeros. + * ---------+---------------+----------------------------------------- + * + | bBdiouxX | Add a leading plus sign to non-negative + * | eEfgG | numbers. + * | (numeric fmt) | For `o', `x', `X', `b' and `B', use + * | | a minus sign with absolute value for + * | | negative values. + * ---------+---------------+----------------------------------------- + * - | all | Left-justify the result of this conversion. + * ---------+---------------+----------------------------------------- + * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. + * | eEfgG | For `o', `x', `X', `b' and `B', radix-1 + * | (numeric fmt) | is used for negative numbers formatted as + * | | complements. + * ---------+---------------+----------------------------------------- + * * | all | Use the next argument as the field width. + * | | If negative, left-justify the result. If the + * | | asterisk is followed by a number and a dollar + * | | sign, use the indicated argument as the width. + * + * Examples of flags: + * + * # `+' and space flag specifies the sign of non-negative numbers. + * sprintf("%d", 123) #=> "123" + * sprintf("%+d", 123) #=> "+123" + * sprintf("% d", 123) #=> " 123" + * + * # `#' flag for `o' increases number of digits to show `0'. + * # `+' and space flag changes format of negative numbers. + * sprintf("%o", 123) #=> "173" + * sprintf("%#o", 123) #=> "0173" + * sprintf("%+o", -123) #=> "-173" + * sprintf("%o", -123) #=> "..7605" + * sprintf("%#o", -123) #=> "..7605" + * + * # `#' flag for `x' add a prefix `0x' for non-zero numbers. + * # `+' and space flag disables complements for negative numbers. + * sprintf("%x", 123) #=> "7b" + * sprintf("%#x", 123) #=> "0x7b" + * sprintf("%+x", -123) #=> "-7b" + * sprintf("%x", -123) #=> "..f85" + * sprintf("%#x", -123) #=> "0x..f85" + * sprintf("%#x", 0) #=> "0" + * + * # `#' for `X' uses the prefix `0X'. + * sprintf("%X", 123) #=> "7B" + * sprintf("%#X", 123) #=> "0X7B" + * + * # `#' flag for `b' add a prefix `0b' for non-zero numbers. + * # `+' and space flag disables complements for negative numbers. + * sprintf("%b", 123) #=> "1111011" + * sprintf("%#b", 123) #=> "0b1111011" + * sprintf("%+b", -123) #=> "-1111011" + * sprintf("%b", -123) #=> "..10000101" + * sprintf("%#b", -123) #=> "0b..10000101" + * sprintf("%#b", 0) #=> "0" + * + * # `#' for `B' uses the prefix `0B'. + * sprintf("%B", 123) #=> "1111011" + * sprintf("%#B", 123) #=> "0B1111011" + * + * # `#' for `e' forces to show the decimal point. + * sprintf("%.0e", 1) #=> "1e+00" + * sprintf("%#.0e", 1) #=> "1.e+00" + * + * # `#' for `f' forces to show the decimal point. + * sprintf("%.0f", 1234) #=> "1234" + * sprintf("%#.0f", 1234) #=> "1234." + * + * # `#' for `g' forces to show the decimal point. + * # It also disables stripping lowest zeros. + * sprintf("%g", 123.4) #=> "123.4" + * sprintf("%#g", 123.4) #=> "123.400" + * sprintf("%g", 123456) #=> "123456" + * sprintf("%#g", 123456) #=> "123456." + * + * The field width is an optional integer, followed optionally by a + * period and a precision. The width specifies the minimum number of + * characters that will be written to the result for this field. + * + * Examples of width: + * + * # padding is done by spaces, width=20 + * # 0 or radix-1. <------------------> + * sprintf("%20d", 123) #=> " 123" + * sprintf("%+20d", 123) #=> " +123" + * sprintf("%020d", 123) #=> "00000000000000000123" + * sprintf("%+020d", 123) #=> "+0000000000000000123" + * sprintf("% 020d", 123) #=> " 0000000000000000123" + * sprintf("%-20d", 123) #=> "123 " + * sprintf("%-+20d", 123) #=> "+123 " + * sprintf("%- 20d", 123) #=> " 123 " + * sprintf("%020x", -123) #=> "..ffffffffffffffff85" + * + * For + * numeric fields, the precision controls the number of decimal places + * displayed. For string fields, the precision determines the maximum + * number of characters to be copied from the string. (Thus, the format + * sequence %10.10s will always contribute exactly ten + * characters to the result.) + * + * Examples of precisions: + * + * # precision for `d', 'o', 'x' and 'b' is + * # minimum number of digits <------> + * sprintf("%20.8d", 123) #=> " 00000123" + * sprintf("%20.8o", 123) #=> " 00000173" + * sprintf("%20.8x", 123) #=> " 0000007b" + * sprintf("%20.8b", 123) #=> " 01111011" + * sprintf("%20.8d", -123) #=> " -00000123" + * sprintf("%20.8o", -123) #=> " ..777605" + * sprintf("%20.8x", -123) #=> " ..ffff85" + * sprintf("%20.8b", -11) #=> " ..110101" + * + * # "0x" and "0b" for `#x' and `#b' is not counted for + * # precision but "0" for `#o' is counted. <------> + * sprintf("%#20.8d", 123) #=> " 00000123" + * sprintf("%#20.8o", 123) #=> " 00000173" + * sprintf("%#20.8x", 123) #=> " 0x0000007b" + * sprintf("%#20.8b", 123) #=> " 0b01111011" + * sprintf("%#20.8d", -123) #=> " -00000123" + * sprintf("%#20.8o", -123) #=> " ..777605" + * sprintf("%#20.8x", -123) #=> " 0x..ffff85" + * sprintf("%#20.8b", -11) #=> " 0b..110101" + * + * # precision for `e' is number of + * # digits after the decimal point <------> + * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" + * + * # precision for `f' is number of + * # digits after the decimal point <------> + * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" + * + * # precision for `g' is number of + * # significant digits <-------> + * sprintf("%20.8g", 1234.56789) #=> " 1234.5679" + * + * # <-------> + * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" + * + * # precision for `s' is + * # maximum number of characters <------> + * sprintf("%20.8s", "string test") #=> " string t" + * + * Examples: + * + * sprintf("%d %04x", 123, 123) #=> "123 007b" + * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" + * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" + * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" + * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" + * sprintf("%u", -123) #=> "-123" + */ + +VALUE +rb_f_sprintf(int argc, const VALUE *argv) +{ + return rb_str_format(argc - 1, argv + 1, GETNTHARG(0)); +} + +VALUE +rb_str_format(int argc, const VALUE *argv, VALUE fmt) +{ + rb_encoding *enc; + const char *p, *end; + char *buf; + int blen, bsiz; + VALUE result; + + int width, prec, flags = FNONE; + int nextarg = 1; + int posarg = 0; + int tainted = 0; + VALUE nextvalue; + VALUE tmp; + VALUE str; + volatile VALUE hash = Qundef; + +#define CHECK_FOR_WIDTH(f) \ + if ((f) & FWIDTH) { \ + rb_raise(rb_eArgError, "width given twice"); \ + } \ + if ((f) & FPREC0) { \ + rb_raise(rb_eArgError, "width after precision"); \ + } +#define CHECK_FOR_FLAGS(f) \ + if ((f) & FWIDTH) { \ + rb_raise(rb_eArgError, "flag after width"); \ + } \ + if ((f) & FPREC0) { \ + rb_raise(rb_eArgError, "flag after precision"); \ + } + + ++argc; + --argv; + if (OBJ_TAINTED(fmt)) tainted = 1; + StringValue(fmt); + enc = rb_enc_get(fmt); + fmt = rb_str_new4(fmt); + p = RSTRING_PTR(fmt); + end = p + RSTRING_LEN(fmt); + blen = 0; + bsiz = 120; + result = rb_str_buf_new(bsiz); + rb_enc_copy(result, fmt); + buf = RSTRING_PTR(result); + memset(buf, 0, bsiz); + + for (; p < end; p++) { + const char *t; + int n; + + for (t = p; t < end && *t != '%'; t++) ; + PUSH(p, t - p); + if (t >= end) { + /* end of fmt string */ + goto sprint_exit; + } + p = t + 1; /* skip `%' */ + + width = prec = -1; + nextvalue = Qundef; + retry: + switch (*p) { + default: + if (rb_enc_isprint(*p, enc)) + rb_raise(rb_eArgError, "malformed format string - %%%c", *p); + else + rb_raise(rb_eArgError, "malformed format string"); + break; + + case ' ': + CHECK_FOR_FLAGS(flags); + flags |= FSPACE; + p++; + goto retry; + + case '#': + CHECK_FOR_FLAGS(flags); + flags |= FSHARP; + p++; + goto retry; + + case '+': + CHECK_FOR_FLAGS(flags); + flags |= FPLUS; + p++; + goto retry; + + case '-': + CHECK_FOR_FLAGS(flags); + flags |= FMINUS; + p++; + goto retry; + + case '0': + CHECK_FOR_FLAGS(flags); + flags |= FZERO; + p++; + goto retry; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = 0; + GETNUM(n, width); + if (*p == '$') { + if (nextvalue != Qundef) { + rb_raise(rb_eArgError, "value given twice - %d$", n); + } + nextvalue = GETPOSARG(n); + p++; + goto retry; + } + CHECK_FOR_WIDTH(flags); + width = n; + flags |= FWIDTH; + goto retry; + + case '<': + case '{': + { + const char *start = p; + char term = (*p == '<') ? '>' : '}'; + ID id; + + for (; p < end && *p != term; ) { + p += rb_enc_mbclen(p, end, enc); + } + if (p >= end) { + rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); + } + id = rb_intern3(start + 1, p - start - 1, enc); + nextvalue = GETNAMEARG(ID2SYM(id)); + if (term == '}') goto format_s; + p++; + goto retry; + } + + case '*': + CHECK_FOR_WIDTH(flags); + flags |= FWIDTH; + GETASTER(width); + if (width < 0) { + flags |= FMINUS; + width = -width; + } + p++; + goto retry; + + case '.': + if (flags & FPREC0) { + rb_raise(rb_eArgError, "precision given twice"); + } + flags |= FPREC|FPREC0; + + prec = 0; + p++; + if (*p == '*') { + GETASTER(prec); + if (prec < 0) { /* ignore negative precision */ + flags &= ~FPREC; + } + p++; + goto retry; + } + + GETNUM(prec, precision); + goto retry; + + case '\n': + case '\0': + p--; + case '%': + if (flags != FNONE) { + rb_raise(rb_eArgError, "invalid format character - %%"); + } + PUSH("%", 1); + break; + + case 'c': + { + VALUE val = GETARG(); + VALUE tmp; + unsigned int c; + int n; + + tmp = rb_check_string_type(val); + if (!NIL_P(tmp)) { + if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { + rb_raise(rb_eArgError, "%%c requires a character"); + } + c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc); + } + else { + c = NUM2INT(val); + } + n = rb_enc_codelen(c, enc); + if (n <= 0) { + rb_raise(rb_eArgError, "invalid character"); + } + if (!(flags & FWIDTH)) { + CHECK(n); + rb_enc_mbcput(c, &buf[blen], enc); + blen += n; + } + else if ((flags & FMINUS)) { + CHECK(n); + rb_enc_mbcput(c, &buf[blen], enc); + blen += n; + FILL(' ', width-1); + } + else { + FILL(' ', width-1); + CHECK(n); + rb_enc_mbcput(c, &buf[blen], enc); + blen += n; + } + } + break; + + case 's': + case 'p': + format_s: + { + VALUE arg = GETARG(); + long len, slen; + + if (*p == 'p') arg = rb_inspect(arg); + str = rb_obj_as_string(arg); + if (OBJ_TAINTED(str)) tainted = 1; + len = RSTRING_LEN(str); + rb_str_set_len(result, blen); + enc = rb_enc_check(result, str); + if (flags&(FPREC|FWIDTH)) { + slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); + if (slen < 0) { + rb_raise(rb_eArgError, "invalid mbstring sequence"); + } + if ((flags&FPREC) && (prec < slen)) { + char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), + prec, enc); + slen = prec; + len = p - RSTRING_PTR(str); + } + /* need to adjust multi-byte string pos */ + if ((flags&FWIDTH) && (width > slen)) { + width -= slen; + if (!(flags&FMINUS)) { + CHECK(width); + while (width--) { + buf[blen++] = ' '; + } + } + CHECK(len); + memcpy(&buf[blen], RSTRING_PTR(str), len); + blen += len; + if (flags&FMINUS) { + CHECK(width); + while (width--) { + buf[blen++] = ' '; + } + } + rb_enc_associate(result, enc); + break; + } + } + PUSH(RSTRING_PTR(str), len); + rb_enc_associate(result, enc); + } + break; + + case 'd': + case 'i': + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + case 'u': + { + volatile VALUE tmp1; + volatile VALUE val = GETARG(); + char fbuf[32], nbuf[64], *s; + const char *prefix = 0; + int sign = 0, dots = 0; + char sc = 0; + long v = 0; + int base, bignum = 0; + int len, pos; + + switch (*p) { + case 'd': + case 'i': + case 'u': + sign = 1; break; + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + if (flags&(FPLUS|FSPACE)) sign = 1; + break; + } + if (flags & FSHARP) { + switch (*p) { + case 'o': + prefix = "0"; break; + case 'x': + prefix = "0x"; break; + case 'X': + prefix = "0X"; break; + case 'b': + prefix = "0b"; break; + case 'B': + prefix = "0B"; break; + } + } + + bin_retry: + switch (TYPE(val)) { + case T_FLOAT: + if (FIXABLE(RFLOAT_VALUE(val))) { + val = LONG2FIX((long)RFLOAT_VALUE(val)); + goto bin_retry; + } + val = rb_dbl2big(RFLOAT_VALUE(val)); + if (FIXNUM_P(val)) goto bin_retry; + bignum = 1; + break; + case T_STRING: + val = rb_str_to_inum(val, 0, Qtrue); + goto bin_retry; + case T_BIGNUM: + bignum = 1; + break; + case T_FIXNUM: + v = FIX2LONG(val); + break; + default: + val = rb_Integer(val); + goto bin_retry; + } + + switch (*p) { + case 'o': + base = 8; break; + case 'x': + case 'X': + base = 16; break; + case 'b': + case 'B': + base = 2; break; + case 'u': + case 'd': + case 'i': + default: + base = 10; break; + } + + if (!bignum) { + if (base == 2) { + val = rb_int2big(v); + goto bin_retry; + } + if (sign) { + char c = *p; + if (c == 'i') c = 'd'; /* %d and %i are identical */ + if (v < 0) { + v = -v; + sc = '-'; + width--; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + snprintf(fbuf, sizeof(fbuf), "%%l%c", c); + snprintf(nbuf, sizeof(nbuf), fbuf, v); + s = nbuf; + } + else { + s = nbuf; + if (v < 0) { + dots = 1; + } + snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p); + snprintf(++s, sizeof(nbuf) - 1, fbuf, v); + if (v < 0) { + char d = 0; + + s = remove_sign_bits(s, base); + switch (base) { + case 16: + d = 'f'; break; + case 8: + d = '7'; break; + } + if (d && *s != d) { + *--s = d; + } + } + } + } + else { + if (sign) { + tmp = rb_big2str(val, base); + s = RSTRING_PTR(tmp); + if (s[0] == '-') { + s++; + sc = '-'; + width--; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + } + else { + if (!RBIGNUM_SIGN(val)) { + val = rb_big_clone(val); + rb_big_2comp(val); + } + tmp1 = tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val)); + s = RSTRING_PTR(tmp); + if (*s == '-') { + dots = 1; + if (base == 10) { + rb_warning("negative number for %%u specifier"); + } + s = remove_sign_bits(++s, base); + switch (base) { + case 16: + if (s[0] != 'f') *--s = 'f'; break; + case 8: + if (s[0] != '7') *--s = '7'; break; + case 2: + if (s[0] != '1') *--s = '1'; break; + } + } + } + } + + pos = -1; + len = strlen(s); + if (dots) { + prec -= 2; + width -= 2; + } + + if (*p == 'X') { + char *pp = s; + int c; + while ((c = (int)(unsigned char)*pp) != 0) { + *pp = rb_enc_toupper(c, enc); + pp++; + } + } + if (prefix && !prefix[1]) { /* octal */ + if (dots) { + prefix = 0; + } + else if (len == 1 && *s == '0') { + len = 0; + if (flags & FPREC) prec--; + } + else if ((flags & FPREC) && (prec > len)) { + prefix = 0; + } + } + else if (len == 1 && *s == '0') { + prefix = 0; + } + if (prefix) { + width -= strlen(prefix); + } + if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { + prec = width; + width = 0; + } + else { + if (prec < len) { + if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; + prec = len; + } + width -= prec; + } + if (!(flags&FMINUS)) { + CHECK(width); + while (width-- > 0) { + buf[blen++] = ' '; + } + } + if (sc) PUSH(&sc, 1); + if (prefix) { + int plen = strlen(prefix); + PUSH(prefix, plen); + } + CHECK(prec - len); + if (dots) PUSH("..", 2); + if (!bignum && v < 0) { + char c = sign_bits(base, p); + while (len < prec--) { + buf[blen++] = c; + } + } + else if ((flags & (FMINUS|FPREC)) != FMINUS) { + char c; + + if (!sign && bignum && !RBIGNUM_SIGN(val)) + c = sign_bits(base, p); + else + c = '0'; + while (len < prec--) { + buf[blen++] = c; + } + } + PUSH(s, len); + CHECK(width); + while (width-- > 0) { + buf[blen++] = ' '; + } + } + break; + + case 'f': + case 'g': + case 'G': + case 'e': + case 'E': + { + VALUE val = GETARG(); + double fval; + int i, need = 6; + char fbuf[32]; + + fval = RFLOAT_VALUE(rb_Float(val)); + if (isnan(fval) || isinf(fval)) { + const char *expr; + + if (isnan(fval)) { + expr = "NaN"; + } + else { + expr = "Inf"; + } + need = strlen(expr); + if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) + need++; + if ((flags & FWIDTH) && need < width) + need = width; + + CHECK(need + 1); + snprintf(&buf[blen], need + 1, "%*s", need, ""); + if (flags & FMINUS) { + if (!isnan(fval) && fval < 0.0) + buf[blen++] = '-'; + else if (flags & FPLUS) + buf[blen++] = '+'; + else if (flags & FSPACE) + blen++; + strncpy(&buf[blen], expr, strlen(expr)); + } + else { + if (!isnan(fval) && fval < 0.0) + buf[blen + need - strlen(expr) - 1] = '-'; + else if (flags & FPLUS) + buf[blen + need - strlen(expr) - 1] = '+'; + else if ((flags & FSPACE) && need > width) + blen++; + strncpy(&buf[blen + need - strlen(expr)], expr, + strlen(expr)); + } + blen += strlen(&buf[blen]); + break; + } + + fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); + need = 0; + if (*p != 'e' && *p != 'E') { + i = INT_MIN; + frexp(fval, &i); + if (i > 0) + need = BIT_DIGITS(i); + } + need += (flags&FPREC) ? prec : 6; + if ((flags&FWIDTH) && need < width) + need = width; + need += 20; + + CHECK(need); + snprintf(&buf[blen], need, fbuf, fval); + blen += strlen(&buf[blen]); + } + break; + } + flags = FNONE; + } + + sprint_exit: + /* XXX - We cannot validate the number of arguments if (digit)$ style used. + */ + if (posarg >= 0 && nextarg < argc) { + const char *mesg = "too many arguments for format string"; + if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); + if (RTEST(ruby_verbose)) rb_warn("%s", mesg); + } + rb_str_resize(result, blen); + + if (tainted) OBJ_TAINT(result); + return result; +} + +static void +fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec) +{ + char *end = buf + size; + *buf++ = '%'; + if (flags & FSHARP) *buf++ = '#'; + if (flags & FPLUS) *buf++ = '+'; + if (flags & FMINUS) *buf++ = '-'; + if (flags & FZERO) *buf++ = '0'; + if (flags & FSPACE) *buf++ = ' '; + + if (flags & FWIDTH) { + snprintf(buf, end - buf, "%d", width); + buf += strlen(buf); + } + + if (flags & FPREC) { + snprintf(buf, end - buf, ".%d", prec); + buf += strlen(buf); + } + + *buf++ = c; + *buf = '\0'; +} + +#undef FILE +#define FILE rb_printf_buffer +#define __sbuf rb_printf_sbuf +#define __sFILE rb_printf_sfile +#undef feof +#undef ferror +#undef clearerr +#undef fileno +#if SIZEOF_LONG < SIZEOF_VOIDP +# if SIZEOF_LONG_LONG == SIZEOF_VOIDP +# define _HAVE_SANE_QUAD_ +# define _HAVE_LLP64_ +# define quad_t LONG_LONG +# define u_quad_t unsigned LONG_LONG +# endif +#endif +#undef snprintf +#define FLOATING_POINT 1 +#define BSD__dtoa ruby_dtoa +#include "missing/vsnprintf.c" + +static int +ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio) +{ + struct __siov *iov; + VALUE result = (VALUE)fp->_bf._base; + char *buf = (char*)fp->_p; + size_t len, n; + size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w; + + if (RBASIC(result)->klass) { + rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); + } + if ((len = uio->uio_resid) == 0) + return 0; + CHECK(len); + buf += blen; + fp->_w = bsiz; + for (iov = uio->uio_iov; len > 0; ++iov) { + MEMCPY(buf, iov->iov_base, char, n = iov->iov_len); + buf += n; + len -= n; + } + fp->_p = (unsigned char *)buf; + return 0; +} + +VALUE +rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) +{ + rb_printf_buffer f; + VALUE result; + + f._flags = __SWR | __SSTR; + f._bf._size = 0; + f._w = 120; + result = rb_str_buf_new(f._w); + if (enc) rb_enc_associate(result, enc); + f._bf._base = (unsigned char *)result; + f._p = (unsigned char *)RSTRING_PTR(result); + RBASIC(result)->klass = 0; + f.vwrite = ruby__sfvwrite; + BSD_vfprintf(&f, fmt, ap); + RBASIC(result)->klass = rb_cString; + rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); + + return result; +} + +VALUE +rb_enc_sprintf(rb_encoding *enc, const char *format, ...) +{ + VALUE result; + va_list ap; + + va_start(ap, format); + result = rb_enc_vsprintf(enc, format, ap); + va_end(ap); + + return result; +} + +VALUE +rb_vsprintf(const char *fmt, va_list ap) +{ + return rb_enc_vsprintf(NULL, fmt, ap); +} + +VALUE +rb_sprintf(const char *format, ...) +{ + VALUE result; + va_list ap; + + va_start(ap, format); + result = rb_vsprintf(format, ap); + va_end(ap); + + return result; +} + +VALUE +rb_str_vcatf(VALUE str, const char *fmt, va_list ap) +{ + rb_printf_buffer f; + VALUE klass; + + StringValue(str); + rb_str_modify(str); + f._flags = __SWR | __SSTR; + f._bf._size = 0; + f._w = rb_str_capacity(str); + f._bf._base = (unsigned char *)str; + f._p = (unsigned char *)RSTRING_END(str); + klass = RBASIC(str)->klass; + RBASIC(str)->klass = 0; + f.vwrite = ruby__sfvwrite; + BSD_vfprintf(&f, fmt, ap); + RBASIC(str)->klass = klass; + rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); + + return str; +} + +VALUE +rb_str_catf(VALUE str, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + str = rb_str_vcatf(str, format, ap); + va_end(ap); + + return str; +} diff --git a/st.c b/st.c new file mode 100644 index 0000000..c682146 --- /dev/null +++ b/st.c @@ -0,0 +1,975 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ + +#ifdef NOT_RUBY +#include "regint.h" +#include "st.h" +#else +#include "ruby/ruby.h" +#endif + +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#include + +typedef struct st_table_entry st_table_entry; + +struct st_table_entry { + unsigned int hash; + st_data_t key; + st_data_t record; + st_table_entry *next; + st_table_entry *fore, *back; +}; + +#define ST_DEFAULT_MAX_DENSITY 5 +#define ST_DEFAULT_INIT_TABLE_SIZE 11 + + /* + * DEFAULT_MAX_DENSITY is the default for the largest we allow the + * average number of items per bin before increasing the number of + * bins + * + * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins + * allocated initially + * + */ + +static const struct st_hash_type type_numhash = { + st_numcmp, + st_numhash, +}; + +/* extern int strcmp(const char *, const char *); */ +static int strhash(const char *); +static const struct st_hash_type type_strhash = { + strcmp, + strhash, +}; + +static int strcasehash(const char *); +static const struct st_hash_type type_strcasehash = { + st_strcasecmp, + strcasehash, +}; + +static void rehash(st_table *); + +#ifdef RUBY +#define malloc xmalloc +#define calloc xcalloc +#define free(x) xfree(x) +#endif + +#define alloc(type) (type*)malloc((size_t)sizeof(type)) +#define Calloc(n,s) (char*)calloc((n),(s)) + +#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) + +#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) +#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) + +/* + * MINSIZE is the minimum size of a dictionary. + */ + +#define MINSIZE 8 + +/* +Table of prime numbers 2^n+a, 2<=n<=30. +*/ +static const long primes[] = { + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85, + 0 +}; + +static int +new_size(int size) +{ + int i; + +#if 0 + for (i=3; i<31; i++) { + if ((1< size) return 1< size) return primes[i]; + } + /* Ran out of polynomials */ +#ifndef NOT_RUBY + rb_raise(rb_eRuntimeError, "st_table too big"); +#endif + return -1; /* should raise exception */ +#endif +} + +#ifdef HASH_LOG +static int collision = 0; +static int init_st = 0; + +static void +stat_col() +{ + FILE *f = fopen("/tmp/col", "w"); + fprintf(f, "collision: %d\n", collision); + fclose(f); +} +#endif + +#define MAX_PACKED_NUMHASH 5 + +st_table* +st_init_table_with_size(const struct st_hash_type *type, int size) +{ + st_table *tbl; + +#ifdef HASH_LOG + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } +#endif + + size = new_size(size); /* round up to prime number */ + + tbl = alloc(st_table); + tbl->type = type; + tbl->num_entries = 0; + tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH; + tbl->num_bins = size; + tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + tbl->head = 0; + tbl->tail = 0; + + return tbl; +} + +st_table* +st_init_table(const struct st_hash_type *type) +{ + return st_init_table_with_size(type, 0); +} + +st_table* +st_init_numtable(void) +{ + return st_init_table(&type_numhash); +} + +st_table* +st_init_numtable_with_size(int size) +{ + return st_init_table_with_size(&type_numhash, size); +} + +st_table* +st_init_strtable(void) +{ + return st_init_table(&type_strhash); +} + +st_table* +st_init_strtable_with_size(int size) +{ + return st_init_table_with_size(&type_strhash, size); +} + +st_table* +st_init_strcasetable(void) +{ + return st_init_table(&type_strcasehash); +} + +st_table* +st_init_strcasetable_with_size(int size) +{ + return st_init_table_with_size(&type_strcasehash, size); +} + +void +st_clear(st_table *table) +{ + register st_table_entry *ptr, *next; + int i; + + if (table->entries_packed) { + table->num_entries = 0; + return; + } + + for(i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + table->bins[i] = 0; + while (ptr != 0) { + next = ptr->next; + free(ptr); + ptr = next; + } + } + table->num_entries = 0; + table->head = 0; + table->tail = 0; +} + +void +st_free_table(st_table *table) +{ + st_clear(table); + free(table->bins); + free(table); +} + +#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) + +#ifdef HASH_LOG +#define COLLISION collision++ +#else +#define COLLISION +#endif + +#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ + bin_pos = hash_val%(table)->num_bins;\ + ptr = (table)->bins[bin_pos];\ + if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ + }\ +} while (0) + +int +st_lookup(st_table *table, register st_data_t key, st_data_t *value) +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + int i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + if (value !=0) *value = (st_data_t)table->bins[i*2+1]; + return 1; + } + } + return 0; + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (value != 0) *value = ptr->record; + return 1; + } +} + +int +st_get_key(st_table *table, register st_data_t key, st_data_t *result) +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + int i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + if (result !=0) *result = (st_data_t)table->bins[i*2]; + return 1; + } + } + return 0; + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (result != 0) *result = ptr->key; + return 1; + } +} + +#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +do {\ + st_table_entry *entry;\ + if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + \ + entry = alloc(st_table_entry);\ + \ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + if (table->head != 0) {\ + entry->fore = 0;\ + (entry->back = table->tail)->fore = entry;\ + table->tail = entry;\ + }\ + else {\ + table->head = table->tail = entry;\ + entry->fore = entry->back = 0;\ + }\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ +} while (0) + +static void +unpack_entries(register st_table *table) +{ + int i; + struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2]; + int num_entries = table->num_entries; + + memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * num_entries*2); + table->entries_packed = 0; + table->num_entries = 0; + memset(table->bins, 0, sizeof(struct st_table_entry *) * table->num_bins); + for (i = 0; i < num_entries; i++) { + st_insert(table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]); + } +} + +int +st_insert(register st_table *table, register st_data_t key, st_data_t value) +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + if (table->entries_packed) { + int i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == key) { + table->bins[i*2+1] = (struct st_table_entry*)value; + return 1; + } + } + if ((table->num_entries+1) * 2 <= table->num_bins && table->num_entries+1 <= MAX_PACKED_NUMHASH) { + i = table->num_entries++; + table->bins[i*2] = (struct st_table_entry*)key; + table->bins[i*2+1] = (struct st_table_entry*)value; + return 0; + } + else { + unpack_entries(table); + } + } + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } + else { + ptr->record = value; + return 1; + } +} + +void +st_add_direct(st_table *table, st_data_t key, st_data_t value) +{ + unsigned int hash_val, bin_pos; + + if (table->entries_packed) { + int i; + if ((table->num_entries+1) * 2 <= table->num_bins && table->num_entries+1 <= MAX_PACKED_NUMHASH) { + i = table->num_entries++; + table->bins[i*2] = (struct st_table_entry*)key; + table->bins[i*2+1] = (struct st_table_entry*)value; + return; + } + else { + unpack_entries(table); + } + } + + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos); +} + +static void +rehash(register st_table *table) +{ + register st_table_entry *ptr, **new_bins; + int i, new_num_bins; + unsigned int hash_val; + + new_num_bins = new_size(table->num_bins+1); + new_bins = (st_table_entry**) + xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*)); + for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0; + table->num_bins = new_num_bins; + table->bins = new_bins; + + if ((ptr = table->head) != 0) { + do { + hash_val = ptr->hash % new_num_bins; + ptr->next = new_bins[hash_val]; + new_bins[hash_val] = ptr; + } while ((ptr = ptr->fore) != 0); + } +} + +st_table* +st_copy(st_table *old_table) +{ + st_table *new_table; + st_table_entry *ptr, *entry, *prev, **tail; + int num_bins = old_table->num_bins; + unsigned int hash_val; + + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } + + *new_table = *old_table; + new_table->bins = (st_table_entry**) + Calloc((unsigned)num_bins, sizeof(st_table_entry*)); + + if (new_table->bins == 0) { + free(new_table); + return 0; + } + + if (old_table->entries_packed) { + memcpy(new_table->bins, old_table->bins, sizeof(struct st_table_entry *) * old_table->num_bins); + return new_table; + } + + if ((ptr = old_table->head) != 0) { + prev = 0; + tail = &new_table->head; + do { + entry = alloc(st_table_entry); + if (entry == 0) { + st_free_table(new_table); + return 0; + } + *entry = *ptr; + hash_val = entry->hash % num_bins; + entry->next = new_table->bins[hash_val]; + new_table->bins[hash_val] = entry; + entry->back = prev; + *tail = prev = entry; + tail = &entry->fore; + } while ((ptr = ptr->fore) != 0); + new_table->tail = prev; + } + + return new_table; +} + +#define REMOVE_ENTRY(table, ptr) do \ + { \ + if (ptr->fore == 0 && ptr->back == 0) { \ + table->head = 0; \ + table->tail = 0; \ + } \ + else { \ + st_table_entry *fore = ptr->fore, *back = ptr->back; \ + if (fore) fore->back = back; \ + if (back) back->fore = fore; \ + if (ptr == table->head) table->head = fore; \ + if (ptr == table->tail) table->tail = back; \ + } \ + table->num_entries--; \ + } while (0) + +int +st_delete(register st_table *table, register st_data_t *key, st_data_t *value) +{ + unsigned int hash_val; + st_table_entry **prev; + register st_table_entry *ptr; + + if (table->entries_packed) { + int i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == *key) { + if (value != 0) *value = (st_data_t)table->bins[i*2+1]; + table->num_entries--; + memmove(&table->bins[i*2], &table->bins[(i+1)*2], + sizeof(struct st_table_entry*) * 2*(table->num_entries-i)); + return 1; + } + } + if (value != 0) *value = 0; + return 0; + } + + hash_val = do_hash_bin(*key, table); + + for (prev = &table->bins[hash_val]; (ptr = *prev) != 0; prev = &ptr->next) { + if (EQUAL(table, *key, ptr->key)) { + *prev = ptr->next; + REMOVE_ENTRY(table, ptr); + if (value != 0) *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + } + + if (value != 0) *value = 0; + return 0; +} + +int +st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *value, st_data_t never) +{ + unsigned int hash_val; + register st_table_entry *ptr; + + if (table->entries_packed) { + st_index_t i; + for (i = 0; i < table->num_entries; i++) { + if ((st_data_t)table->bins[i*2] == *key) { + if (value != 0) *value = (st_data_t)table->bins[i*2+1]; + table->bins[i*2] = (void *)never; + return 1; + } + } + if (value != 0) *value = 0; + return 0; + } + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + for (; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + REMOVE_ENTRY(table, ptr); + *key = ptr->key; + if (value != 0) *value = ptr->record; + ptr->key = ptr->record = never; + return 1; + } + } + + if (value != 0) *value = 0; + return 0; +} + +void +st_cleanup_safe(st_table *table, st_data_t never) +{ + st_table_entry *ptr, **last, *tmp; + int i; + + if (table->entries_packed) { + st_index_t i = 0, j = 0; + while ((st_data_t)table->bins[i*2] != never) { + if (i++ == table->num_entries) return; + } + for (j = i; ++i < table->num_entries;) { + if ((st_data_t)table->bins[i*2] == never) continue; + table->bins[j*2] = table->bins[i*2]; + table->bins[j*2+1] = table->bins[i*2+1]; + j++; + } + table->num_entries = j; + return; + } + + for (i = 0; i < table->num_bins; i++) { + ptr = *(last = &table->bins[i]); + while (ptr != 0) { + if (ptr->key == never) { + tmp = ptr; + *last = ptr = ptr->next; + free(tmp); + } + else { + ptr = *(last = &ptr->next); + } + } + } +} + +int +st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg) +{ + st_table_entry *ptr, **last, *tmp; + enum st_retval retval; + int i; + + if (table->entries_packed) { + for (i = 0; i < table->num_entries; i++) { + int j; + st_data_t key, val; + key = (st_data_t)table->bins[i*2]; + val = (st_data_t)table->bins[i*2+1]; + retval = (*func)(key, val, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + for (j = 0; j < table->num_entries; j++) { + if ((st_data_t)table->bins[j*2] == key) + break; + } + if (j == table->num_entries) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + /* fall through */ + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: + table->num_entries--; + memmove(&table->bins[i*2], &table->bins[(i+1)*2], + sizeof(struct st_table_entry*) * 2*(table->num_entries-i)); + i--; + break; + } + } + return 0; + } + + if ((ptr = table->head) != 0) { + do { + retval = (*func)(ptr->key, ptr->record, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + i = ptr->hash % table->num_bins; + for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) { + if (!tmp) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + } + /* fall through */ + case ST_CONTINUE: + ptr = ptr->fore; + break; + case ST_STOP: + return 0; + case ST_DELETE: + last = &table->bins[ptr->hash % table->num_bins]; + for (; (tmp = *last) != 0; last = &tmp->next) { + if (ptr == tmp) { + tmp = ptr->fore; + *last = ptr->next; + REMOVE_ENTRY(table, ptr); + free(ptr); + if (ptr == tmp) return 0; + ptr = tmp; + break; + } + } + } + } while (ptr && table->head); + } + return 0; +} + +#if 0 /* unused right now */ +int +st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg) +{ + st_table_entry *ptr, **last, *tmp; + enum st_retval retval; + int i; + + if (table->entries_packed) { + for (i = table->num_entries-1; 0 <= i; i--) { + int j; + st_data_t key, val; + key = (st_data_t)table->bins[i*2]; + val = (st_data_t)table->bins[i*2+1]; + retval = (*func)(key, val, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + for (j = 0; j < table->num_entries; j++) { + if ((st_data_t)table->bins[j*2] == key) + break; + } + if (j == table->num_entries) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + /* fall through */ + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: + table->num_entries--; + memmove(&table->bins[i*2], &table->bins[(i+1)*2], + sizeof(struct st_table_entry*) * 2*(table->num_entries-i)); + break; + } + } + return 0; + } + + if ((ptr = table->head) != 0) { + ptr = ptr->back; + do { + retval = (*func)(ptr->key, ptr->record, arg, 0); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + i = ptr->hash % table->num_bins; + for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) { + if (!tmp) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); + return 1; + } + } + /* fall through */ + case ST_CONTINUE: + ptr = ptr->back; + break; + case ST_STOP: + return 0; + case ST_DELETE: + last = &table->bins[ptr->hash % table->num_bins]; + for (; (tmp = *last) != 0; last = &tmp->next) { + if (ptr == tmp) { + tmp = ptr->back; + *last = ptr->next; + REMOVE_ENTRY(table, ptr); + free(ptr); + ptr = tmp; + break; + } + } + ptr = ptr->next; + free(tmp); + table->num_entries--; + } + } while (ptr && table->head); + } + return 0; +} +#endif + +/* + * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code + * + * @(#) $Hash32: Revision: 1.1 $ + * @(#) $Hash32: Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $ + * @(#) $Hash32: Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $ + * + *** + * + * Fowler/Noll/Vo hash + * + * The basis of this hash algorithm was taken from an idea sent + * as reviewer comments to the IEEE POSIX P1003.2 committee by: + * + * Phong Vo (http://www.research.att.com/info/kpv/) + * Glenn Fowler (http://www.research.att.com/~gsf/) + * + * In a subsequent ballot round: + * + * Landon Curt Noll (http://www.isthe.com/chongo/) + * + * improved on their algorithm. Some people tried this hash + * and found that it worked rather well. In an EMail message + * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. + * + * FNV hashes are designed to be fast while maintaining a low + * collision rate. The FNV speed allows one to quickly hash lots + * of data while maintaining a reasonable collision rate. See: + * + * http://www.isthe.com/chongo/tech/comp/fnv/index.html + * + * for more details as well as other forms of the FNV hash. + *** + * + * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the + * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str(). + * + *** + * + * Please do not copyright this code. This code is in the public domain. + * + * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO + * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + * + * By: + * chongo /\oo/\ + * http://www.isthe.com/chongo/ + * + * Share and Enjoy! :-) + */ + +/* + * 32 bit FNV-1 and FNV-1a non-zero initial basis + * + * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets: + * + * chongo /\../\ + * + * NOTE: The \'s above are not back-slashing escape characters. + * They are literal ASCII backslash 0x5c characters. + * + * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition. + */ +#define FNV1_32A_INIT 0x811c9dc5 + +/* + * 32 bit magic FNV-1a prime + */ +#define FNV_32_PRIME 0x01000193 + +static int +strhash(register const char *string) +{ + register unsigned int hval = FNV1_32A_INIT; + + /* + * FNV-1a hash each octet in the buffer + */ + while (*string) { + /* xor the bottom with the current octet */ + hval ^= (unsigned int)*string++; + + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + hval *= FNV_32_PRIME; + } + return hval; +} + +int +st_strcasecmp(const char *s1, const char *s2) +{ + unsigned int c1, c2; + + while (1) { + c1 = (unsigned char)*s1++; + c2 = (unsigned char)*s2++; + if (c1 == '\0' || c2 == '\0') { + if (c1 != '\0') return 1; + if (c2 != '\0') return -1; + return 0; + } + if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A'; + if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A'; + if (c1 != c2) { + if (c1 > c2) + return 1; + else + return -1; + } + } +} + +int +st_strncasecmp(const char *s1, const char *s2, size_t n) +{ + unsigned int c1, c2; + + while (n--) { + c1 = (unsigned char)*s1++; + c2 = (unsigned char)*s2++; + if (c1 == '\0' || c2 == '\0') { + if (c1 != '\0') return 1; + if (c2 != '\0') return -1; + return 0; + } + if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A'; + if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A'; + if (c1 != c2) { + if (c1 > c2) + return 1; + else + return -1; + } + } + return 0; +} + +static int +strcasehash(register const char *string) +{ + register unsigned int hval = FNV1_32A_INIT; + + /* + * FNV-1a hash each octet in the buffer + */ + while (*string) { + unsigned int c = (unsigned char)*string++; + if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A'; + hval ^= c; + + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + hval *= FNV_32_PRIME; + } + return hval; +} + +int +st_numcmp(st_data_t x, st_data_t y) +{ + return x != y; +} + +int +st_numhash(st_data_t n) +{ + return (int)n; +} diff --git a/strftime.c b/strftime.c new file mode 100644 index 0000000..38167fd --- /dev/null +++ b/strftime.c @@ -0,0 +1,1108 @@ +/* -*- c-file-style: "linux" -*- */ + +/* + * strftime.c + * + * Public-domain implementation of ANSI C library routine. + * + * It's written in old-style C for maximal portability. + * However, since I'm used to prototypes, I've included them too. + * + * If you want stuff in the System V ascftime routine, add the SYSV_EXT define. + * For extensions from SunOS, add SUNOS_EXT. + * For stuff needed to implement the P1003.2 date command, add POSIX2_DATE. + * For VMS dates, add VMS_EXT. + * For a an RFC822 time format, add MAILHEADER_EXT. + * For ISO week years, add ISO_DATE_EXT. + * For complete POSIX semantics, add POSIX_SEMANTICS. + * + * The code for %c, %x, and %X now follows the 1003.2 specification for + * the POSIX locale. + * This version ignores LOCALE information. + * It also doesn't worry about multi-byte characters. + * So there. + * + * This file is also shipped with GAWK (GNU Awk), gawk specific bits of + * code are included if GAWK is defined. + * + * Arnold Robbins + * January, February, March, 1991 + * Updated March, April 1992 + * Updated April, 1993 + * Updated February, 1994 + * Updated May, 1994 + * Updated January, 1995 + * Updated September, 1995 + * Updated January, 1996 + * + * Fixes from ado@elsie.nci.nih.gov + * February 1991, May 1992 + * Fixes from Tor Lillqvist tml@tik.vtt.fi + * May, 1993 + * Further fixes from ado@elsie.nci.nih.gov + * February 1994 + * %z code from chip@chinacat.unicom.com + * Applied September 1995 + * %V code fixed (again) and %G, %g added, + * January 1996 + */ + +#include "ruby/ruby.h" + +#ifndef GAWK +#include +#include +#include +#include +#include +#include +#endif +#if defined(TM_IN_SYS_TIME) || !defined(GAWK) +#include +#if HAVE_SYS_TIME_H +#include +#endif +#endif +#include + +/* defaults: season to taste */ +#define SYSV_EXT 1 /* stuff in System V ascftime routine */ +#define SUNOS_EXT 1 /* stuff in SunOS strftime routine */ +#define POSIX2_DATE 1 /* stuff in Posix 1003.2 date command */ +#define VMS_EXT 1 /* include %v for VMS date format */ +#define MAILHEADER_EXT 1 /* add %z for HHMM format */ +#define ISO_DATE_EXT 1 /* %G and %g for year of ISO week */ +#ifndef GAWK +#define POSIX_SEMANTICS 1 /* call tzset() if TZ changes */ +#endif + +#if defined(ISO_DATE_EXT) +#if ! defined(POSIX2_DATE) +#define POSIX2_DATE 1 +#endif +#endif + +#if defined(POSIX2_DATE) +#if ! defined(SYSV_EXT) +#define SYSV_EXT 1 +#endif +#if ! defined(SUNOS_EXT) +#define SUNOS_EXT 1 +#endif +#endif + +#if defined(POSIX2_DATE) +#define adddecl(stuff) stuff +#else +#define adddecl(stuff) +#endif + +#undef strchr /* avoid AIX weirdness */ + +#if !defined __STDC__ && !defined _WIN32 +#define const /**/ +static int weeknumber(); +adddecl(static int iso8601wknum();) +#else +static int weeknumber(const struct tm *timeptr, int firstweekday); +adddecl(static int iso8601wknum(const struct tm *timeptr);) +#endif + +#ifdef STDC_HEADERS +#include +#include +#else +extern void *malloc(); +extern void *realloc(); +extern char *getenv(); +extern char *strchr(); +#endif + +#define range(low, item, hi) max(low, min(item, hi)) + +#if defined __WIN32__ || defined _WIN32 +#define DLL_IMPORT __declspec(dllimport) +#endif +#ifndef DLL_IMPORT +#define DLL_IMPORT +#endif +#if !defined(OS2) && defined(HAVE_TZNAME) +extern DLL_IMPORT char *tzname[2]; +#ifdef HAVE_DAYLIGHT +extern DLL_IMPORT int daylight; +#endif +#ifdef HAVE_VAR_TIMEZONE +extern DLL_IMPORT TYPEOF_VAR_TIMEZONE timezone; +#endif +#ifdef HAVE_VAR_ALTZONE +extern DLL_IMPORT TYPEOF_VAR_ALTZONE altzone; +#endif +#endif + +#undef min /* just in case */ + +/* min --- return minimum of two numbers */ + +#ifndef __STDC__ +static inline int +min(a, b) +int a, b; +#else +static inline int +min(int a, int b) +#endif +{ + return (a < b ? a : b); +} + +#undef max /* also, just in case */ + +/* max --- return maximum of two numbers */ + +#ifndef __STDC__ +static inline int +max(a, b) +int a, b; +#else +static inline int +max(int a, int b) +#endif +{ + return (a > b ? a : b); +} + +#ifdef NO_STRING_LITERAL_CONCATENATION +#error No string literal concatenation +#endif + +/* strftime --- produce formatted time */ + +size_t +rb_strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr, const struct timespec *ts, int gmt) +{ + char *endp = s + maxsize; + char *start = s; + const char *sp, *tp; + auto char tbuf[100]; + long off; + int i, w; + long y; + static short first = 1; +#ifdef POSIX_SEMANTICS + static char *savetz = NULL; + static int savetzlen = 0; + char *tz; +#endif /* POSIX_SEMANTICS */ +#ifndef HAVE_TM_ZONE +#ifndef HAVE_TM_NAME +#if ((defined(MAILHEADER_EXT) && !HAVE_VAR_TIMEZONE && HAVE_GETTIMEOFDAY) || \ + (!HAVE_TZNAME && HAVE_TIMEZONE)) + struct timeval tv; + struct timezone zone; +#endif +#endif /* HAVE_TM_NAME */ +#endif /* HAVE_TM_ZONE */ + int precision, flags; + char padding; + enum {LEFT, CHCASE, LOWER, UPPER, LOCALE_O, LOCALE_E}; +#define BIT_OF(n) (1U<<(n)) + + /* various tables, useful in North America */ + static const char days_l[][10] = { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday", + }; + static const char months_l[][10] = { + "January", "February", "March", "April", + "May", "June", "July", "August", "September", + "October", "November", "December", + }; + static const char ampm[][3] = { "AM", "PM", }; + + if (s == NULL || format == NULL || timeptr == NULL || maxsize == 0) + return 0; + + /* quick check if we even need to bother */ + if (strchr(format, '%') == NULL && strlen(format) + 1 >= maxsize) { + err: + errno = ERANGE; + return 0; + } + +#ifndef POSIX_SEMANTICS + if (first) { + tzset(); + first = 0; + } +#else /* POSIX_SEMANTICS */ + tz = getenv("TZ"); + if (first) { + if (tz != NULL) { + int tzlen = strlen(tz); + + savetz = (char *) malloc(tzlen + 1); + if (savetz != NULL) { + savetzlen = tzlen + 1; + strcpy(savetz, tz); + } + } + tzset(); + first = 0; + } + /* if we have a saved TZ, and it is different, recapture and reset */ + if (tz && savetz && (tz[0] != savetz[0] || strcmp(tz, savetz) != 0)) { + i = strlen(tz) + 1; + if (i > savetzlen) { + savetz = (char *) realloc(savetz, i); + if (savetz) { + savetzlen = i; + strcpy(savetz, tz); + } + } else + strcpy(savetz, tz); + tzset(); + } +#endif /* POSIX_SEMANTICS */ + + for (; *format && s < endp - 1; format++) { +#define FLAG_FOUND() do { \ + if (precision > 0 || flags & (BIT_OF(LOCALE_E)|BIT_OF(LOCALE_O))) \ + goto unknown; \ + } while (0) +#define NEEDS(n) do if (s + (n) >= endp - 1) goto err; while (0) +#define FILL_PADDING(i) do { \ + if (!(flags & BIT_OF(LEFT)) && precision > i) { \ + NEEDS(precision); \ + memset(s, padding ? padding : ' ', precision - i); \ + s += precision - i; \ + } \ + else { \ + NEEDS(i); \ + } \ +} while (0); +#define FMT(def_pad, def_prec, fmt, val) \ + do { \ + int l; \ + if (precision <= 0) precision = (def_prec); \ + if (flags & BIT_OF(LEFT)) precision = 1; \ + l = snprintf(s, endp - s, \ + ((padding == '0' || (!padding && def_pad == '0')) ? "%0*"fmt : "%*"fmt), \ + precision, val); \ + if (l < 0) goto err; \ + s += l; \ + } while (0) +#define STRFTIME(fmt) \ + do { \ + i = rb_strftime(s, endp - s, fmt, timeptr, ts, gmt); \ + if (!i) return 0; \ + if (precision > i) {\ + memmove(s + precision - i, s, i);\ + memset(s, padding ? padding : ' ', precision - i); \ + s += precision; \ + }\ + else s += i; \ + } while (0) + + if (*format != '%') { + *s++ = *format; + continue; + } + tp = tbuf; + sp = format; + precision = -1; + flags = 0; + padding = 0; + again: + switch (*++format) { + case '\0': + format--; + goto unknown; + + case '%': + FILL_PADDING(1); + *s++ = '%'; + continue; + + case 'a': /* abbreviated weekday name */ + if (flags & BIT_OF(CHCASE)) { + flags &= ~(BIT_OF(LOWER)|BIT_OF(CHCASE)); + flags |= BIT_OF(UPPER); + } + if (timeptr->tm_wday < 0 || timeptr->tm_wday > 6) + i = 1, tp = "?"; + else + i = 3, tp = days_l[timeptr->tm_wday]; + break; + + case 'A': /* full weekday name */ + if (flags & BIT_OF(CHCASE)) { + flags &= ~(BIT_OF(LOWER)|BIT_OF(CHCASE)); + flags |= BIT_OF(UPPER); + } + if (timeptr->tm_wday < 0 || timeptr->tm_wday > 6) + i = 1, tp = "?"; + else + i = strlen(tp = days_l[timeptr->tm_wday]); + break; + +#ifdef SYSV_EXT + case 'h': /* abbreviated month name */ +#endif + case 'b': /* abbreviated month name */ + if (flags & BIT_OF(CHCASE)) { + flags &= ~(BIT_OF(LOWER)|BIT_OF(CHCASE)); + flags |= BIT_OF(UPPER); + } + if (timeptr->tm_mon < 0 || timeptr->tm_mon > 11) + i = 1, tp = "?"; + else + i = 3, tp = months_l[timeptr->tm_mon]; + break; + + case 'B': /* full month name */ + if (flags & BIT_OF(CHCASE)) { + flags &= ~(BIT_OF(LOWER)|BIT_OF(CHCASE)); + flags |= BIT_OF(UPPER); + } + if (timeptr->tm_mon < 0 || timeptr->tm_mon > 11) + i = 1, tp = "?"; + else + i = strlen(tp = months_l[timeptr->tm_mon]); + break; + + case 'c': /* appropriate date and time representation */ + STRFTIME("%a %b %e %H:%M:%S %Y"); + continue; + + case 'd': /* day of the month, 01 - 31 */ + i = range(1, timeptr->tm_mday, 31); + FMT('0', 2, "d", i); + continue; + + case 'H': /* hour, 24-hour clock, 00 - 23 */ + i = range(0, timeptr->tm_hour, 23); + FMT('0', 2, "d", i); + continue; + + case 'I': /* hour, 12-hour clock, 01 - 12 */ + i = range(0, timeptr->tm_hour, 23); + if (i == 0) + i = 12; + else if (i > 12) + i -= 12; + FMT('0', 2, "d", i); + continue; + + case 'j': /* day of the year, 001 - 366 */ + FMT('0', 3, "d", timeptr->tm_yday + 1); + continue; + + case 'm': /* month, 01 - 12 */ + i = range(0, timeptr->tm_mon, 11); + FMT('0', 2, "d", i + 1); + continue; + + case 'M': /* minute, 00 - 59 */ + i = range(0, timeptr->tm_min, 59); + FMT('0', 2, "d", i); + continue; + + case 'p': /* AM or PM based on 12-hour clock */ + case 'P': /* am or pm based on 12-hour clock */ + if ((*format == 'p' && (flags & BIT_OF(CHCASE))) || + (*format == 'P' && !(flags & (BIT_OF(CHCASE)|BIT_OF(UPPER))))) { + flags &= ~(BIT_OF(UPPER)|BIT_OF(CHCASE)); + flags |= BIT_OF(LOWER); + } + i = range(0, timeptr->tm_hour, 23); + if (i < 12) + tp = ampm[0]; + else + tp = ampm[1]; + i = 2; + break; + + case 's': + FMT('0', 1, "d", (int) ts->tv_sec); + continue; + + case 'S': /* second, 00 - 60 */ + i = range(0, timeptr->tm_sec, 60); + FMT('0', 2, "d", i); + continue; + + case 'U': /* week of year, Sunday is first day of week */ + FMT('0', 2, "d", weeknumber(timeptr, 0)); + continue; + + case 'w': /* weekday, Sunday == 0, 0 - 6 */ + i = range(0, timeptr->tm_wday, 6); + FMT('0', 1, "d", i); + continue; + + case 'W': /* week of year, Monday is first day of week */ + FMT('0', 2, "d", weeknumber(timeptr, 1)); + continue; + + case 'x': /* appropriate date representation */ + STRFTIME("%m/%d/%y"); + continue; + + case 'X': /* appropriate time representation */ + STRFTIME("%H:%M:%S"); + continue; + + case 'y': /* year without a century, 00 - 99 */ + i = timeptr->tm_year % 100; + FMT('0', 2, "d", i); + continue; + + case 'Y': /* year with century */ + FMT('0', 1, "ld", 1900L + timeptr->tm_year); + continue; + +#ifdef MAILHEADER_EXT + /* + * From: Chip Rosenthal + * Date: Sun, 19 Mar 1995 00:33:29 -0600 (CST) + * + * Warning: the %z [code] is implemented by inspecting the + * timezone name conditional compile settings, and + * inferring a method to get timezone offsets. I've tried + * this code on a couple of machines, but I don't doubt + * there is some system out there that won't like it. + * Maybe the easiest thing to do would be to bracket this + * with an #ifdef that can turn it off. The %z feature + * would be an admittedly obscure one that most folks can + * live without, but it would be a great help to those of + * us that muck around with various message processors. + */ + case 'z': /* time zone offset east of GMT e.g. -0600 */ + if (precision < 4) precision = 4; + NEEDS(precision + 1); + if (gmt) { + off = 0; + } + else { +#ifdef HAVE_TM_NAME + /* + * Systems with tm_name probably have tm_tzadj as + * secs west of GMT. Convert to mins east of GMT. + */ + off = -timeptr->tm_tzadj / 60; +#else /* !HAVE_TM_NAME */ +#ifdef HAVE_TM_ZONE + /* + * Systems with tm_zone probably have tm_gmtoff as + * secs east of GMT. Convert to mins east of GMT. + */ + off = timeptr->tm_gmtoff / 60; +#else /* !HAVE_TM_ZONE */ +#if HAVE_VAR_TIMEZONE +#if HAVE_VAR_ALTZONE + off = -(daylight ? timezone : altzone) / 60; +#else + off = -timezone / 60; +#endif +#else /* !HAVE_VAR_TIMEZONE */ +#ifdef HAVE_GETTIMEOFDAY + gettimeofday(&tv, &zone); + off = -zone.tz_minuteswest; +#else + /* no timezone info, then calc by myself */ + { + struct tm utc; + time_t now; + time(&now); + utc = *gmtime(&now); + off = (now - mktime(&utc)) / 60; + } +#endif +#endif /* !HAVE_VAR_TIMEZONE */ +#endif /* !HAVE_TM_ZONE */ +#endif /* !HAVE_TM_NAME */ + } + if (off < 0) { + off = -off; + *s++ = '-'; + } else { + *s++ = '+'; + } + off = off/60*100 + off%60; + i = snprintf(s, endp - s, (padding == ' ' ? "%*ld" : "%.*ld"), + precision - (precision > 4), off); + if (i < 0) goto err; + s += i; + continue; +#endif /* MAILHEADER_EXT */ + + case 'Z': /* time zone name or abbrevation */ + if (flags & BIT_OF(CHCASE)) { + flags &= ~(BIT_OF(UPPER)|BIT_OF(CHCASE)); + flags |= BIT_OF(LOWER); + } + if (gmt) { + i = 3; + tp = "UTC"; + break; + } +#ifdef HAVE_TZNAME + i = (daylight && timeptr->tm_isdst > 0); /* 0 or 1 */ + tp = tzname[i]; +#else +#ifdef HAVE_TM_ZONE + tp = timeptr->tm_zone; +#else +#ifdef HAVE_TM_NAME + tp = timeptr->tm_name; +#else +#ifdef HAVE_TIMEZONE + gettimeofday(& tv, & zone); +#ifdef TIMEZONE_VOID + tp = timezone(); +#else + tp = timezone(zone.tz_minuteswest, timeptr->tm_isdst > 0); +#endif /* TIMEZONE_VOID */ +#endif /* HAVE_TIMEZONE */ +#endif /* HAVE_TM_NAME */ +#endif /* HAVE_TM_ZONE */ +#endif /* HAVE_TZNAME */ + i = strlen(tp); + break; + +#ifdef SYSV_EXT + case 'n': /* same as \n */ + FILL_PADDING(1); + *s++ = '\n'; + continue; + + case 't': /* same as \t */ + FILL_PADDING(1); + *s++ = '\t'; + continue; + + case 'D': /* date as %m/%d/%y */ + STRFTIME("%m/%d/%y"); + continue; + + case 'e': /* day of month, blank padded */ + FMT(' ', 2, "d", range(1, timeptr->tm_mday, 31)); + continue; + + case 'r': /* time as %I:%M:%S %p */ + STRFTIME("%I:%M:%S %p"); + continue; + + case 'R': /* time as %H:%M */ + STRFTIME("%H:%M"); + continue; + + case 'T': /* time as %H:%M:%S */ + STRFTIME("%H:%M:%S"); + continue; +#endif + +#ifdef SUNOS_EXT + case 'k': /* hour, 24-hour clock, blank pad */ + i = range(0, timeptr->tm_hour, 23); + FMT(' ', 2, "d", i); + continue; + + case 'l': /* hour, 12-hour clock, 1 - 12, blank pad */ + i = range(0, timeptr->tm_hour, 23); + if (i == 0) + i = 12; + else if (i > 12) + i -= 12; + FMT(' ', 2, "d", i); + continue; +#endif + + +#ifdef VMS_EXT + case 'v': /* date as dd-bbb-YYYY */ + w = snprintf(s, endp - s, "%2d-%3.3s-%4ld", + range(1, timeptr->tm_mday, 31), + months_l[range(0, timeptr->tm_mon, 11)], + timeptr->tm_year + 1900L); + if (w < 0) goto err; + for (i = 3; i < 6; i++) + if (ISLOWER(s[i])) + s[i] = TOUPPER(s[i]); + s += w; + continue; +#endif + + +#ifdef POSIX2_DATE + case 'C': + FMT('0', 2, "ld", (timeptr->tm_year + 1900L) / 100); + continue; + + + case 'E': + /* POSIX locale extensions, ignored for now */ + flags |= BIT_OF(LOCALE_E); + goto again; + case 'O': + /* POSIX locale extensions, ignored for now */ + flags |= BIT_OF(LOCALE_O); + goto again; + + case 'V': /* week of year according ISO 8601 */ + FMT('0', 2, "d", iso8601wknum(timeptr)); + continue; + + case 'u': + /* ISO 8601: Weekday as a decimal number [1 (Monday) - 7] */ + FMT('0', 1, "d", timeptr->tm_wday == 0 ? 7 : timeptr->tm_wday); + continue; +#endif /* POSIX2_DATE */ + +#ifdef ISO_DATE_EXT + case 'G': + case 'g': + /* + * Year of ISO week. + * + * If it's December but the ISO week number is one, + * that week is in next year. + * If it's January but the ISO week number is 52 or + * 53, that week is in last year. + * Otherwise, it's this year. + */ + w = iso8601wknum(timeptr); + if (timeptr->tm_mon == 11 && w == 1) + y = 1900L + timeptr->tm_year + 1; + else if (timeptr->tm_mon == 0 && w >= 52) + y = 1900L + timeptr->tm_year - 1; + else + y = 1900L + timeptr->tm_year; + + if (*format == 'G') + FMT('0', 1, "ld", y); + else + FMT('0', 2, "ld", y % 100); + continue; +#endif /* ISO_DATE_EXT */ + + + case 'L': + w = 3; + goto subsec; + + case 'N': + /* + * fractional second digits. default is 9 digits + * (nanosecond). + * + * %3N millisecond (3 digits) + * %6N microsecond (6 digits) + * %9N nanosecond (9 digits) + */ + w = 9; + subsec: + { + long n = ts->tv_nsec; + + if (precision <= 0) { + precision = w; + } + NEEDS(precision); + if (precision < w) { + snprintf(tbuf, w + 1, "%.*ld", w, n); + memcpy(s, tbuf, precision); + } + else { + snprintf(s, endp - s, "%.*ld", w, n); + memset(s + w, '0', precision - w); + } + s += precision; + } + continue; + + case 'F': /* Equivalent to %Y-%m-%d */ + { + int mon, mday; + mon = range(0, timeptr->tm_mon, 11) + 1; + mday = range(1, timeptr->tm_mday, 31); + i = snprintf(s, endp - s, "%ld-%02d-%02d", + 1900L + timeptr->tm_year, mon, mday); + if (i < 0) + goto err; + s += i; + } + continue; + + case '-': + FLAG_FOUND(); + flags |= BIT_OF(LEFT); + padding = precision = 0; + goto again; + + case '^': + FLAG_FOUND(); + flags |= BIT_OF(UPPER); + goto again; + + case '#': + FLAG_FOUND(); + flags |= BIT_OF(CHCASE); + goto again; + + case '_': + FLAG_FOUND(); + padding = ' '; + goto again; + + case '0': + padding = '0'; + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + char *e; + precision = (int)strtoul(format, &e, 10); + format = e - 1; + goto again; + } + + default: + unknown: + i = format - sp + 1; + tp = sp; + precision = -1; + flags = 0; + padding = 0; + break; + } + if (i) { + FILL_PADDING(i); + memcpy(s, tp, i); + switch (flags & (BIT_OF(UPPER)|BIT_OF(LOWER))) { + case BIT_OF(UPPER): + do { + if (ISLOWER(*s)) *s = TOUPPER(*s); + } while (s++, --i); + break; + case BIT_OF(LOWER): + do { + if (ISUPPER(*s)) *s = TOLOWER(*s); + } while (s++, --i); + break; + default: + s += i; + break; + } + } + } + if (s >= endp) { + goto err; + } + if (*format == '\0') { + *s = '\0'; + return (s - start); + } else + return 0; +} + +/* isleap --- is a year a leap year? */ + +#ifndef __STDC__ +static int +isleap(year) +long year; +#else +static int +isleap(long year) +#endif +{ + return ((year % 4 == 0 && year % 100 != 0) || year % 400 == 0); +} + + +#ifdef POSIX2_DATE +/* iso8601wknum --- compute week number according to ISO 8601 */ + +#ifndef __STDC__ +static int +iso8601wknum(timeptr) +const struct tm *timeptr; +#else +static int +iso8601wknum(const struct tm *timeptr) +#endif +{ + /* + * From 1003.2: + * If the week (Monday to Sunday) containing January 1 + * has four or more days in the new year, then it is week 1; + * otherwise it is the highest numbered week of the previous + * year (52 or 53), and the next week is week 1. + * + * ADR: This means if Jan 1 was Monday through Thursday, + * it was week 1, otherwise week 52 or 53. + * + * XPG4 erroneously included POSIX.2 rationale text in the + * main body of the standard. Thus it requires week 53. + */ + + int weeknum, jan1day; + + /* get week number, Monday as first day of the week */ + weeknum = weeknumber(timeptr, 1); + + /* + * With thanks and tip of the hatlo to tml@tik.vtt.fi + * + * What day of the week does January 1 fall on? + * We know that + * (timeptr->tm_yday - jan1.tm_yday) MOD 7 == + * (timeptr->tm_wday - jan1.tm_wday) MOD 7 + * and that + * jan1.tm_yday == 0 + * and that + * timeptr->tm_wday MOD 7 == timeptr->tm_wday + * from which it follows that. . . + */ + jan1day = timeptr->tm_wday - (timeptr->tm_yday % 7); + if (jan1day < 0) + jan1day += 7; + + /* + * If Jan 1 was a Monday through Thursday, it was in + * week 1. Otherwise it was last year's highest week, which is + * this year's week 0. + * + * What does that mean? + * If Jan 1 was Monday, the week number is exactly right, it can + * never be 0. + * If it was Tuesday through Thursday, the weeknumber is one + * less than it should be, so we add one. + * Otherwise, Friday, Saturday or Sunday, the week number is + * OK, but if it is 0, it needs to be 52 or 53. + */ + switch (jan1day) { + case 1: /* Monday */ + break; + case 2: /* Tuesday */ + case 3: /* Wednesday */ + case 4: /* Thursday */ + weeknum++; + break; + case 5: /* Friday */ + case 6: /* Saturday */ + case 0: /* Sunday */ + if (weeknum == 0) { +#ifdef USE_BROKEN_XPG4 + /* XPG4 (as of March 1994) says 53 unconditionally */ + weeknum = 53; +#else + /* get week number of last week of last year */ + struct tm dec31ly; /* 12/31 last year */ + dec31ly = *timeptr; + dec31ly.tm_year--; + dec31ly.tm_mon = 11; + dec31ly.tm_mday = 31; + dec31ly.tm_wday = (jan1day == 0) ? 6 : jan1day - 1; + dec31ly.tm_yday = 364 + isleap(dec31ly.tm_year + 1900L); + weeknum = iso8601wknum(& dec31ly); +#endif + } + break; + } + + if (timeptr->tm_mon == 11) { + /* + * The last week of the year + * can be in week 1 of next year. + * Sigh. + * + * This can only happen if + * M T W + * 29 30 31 + * 30 31 + * 31 + */ + int wday, mday; + + wday = timeptr->tm_wday; + mday = timeptr->tm_mday; + if ( (wday == 1 && (mday >= 29 && mday <= 31)) + || (wday == 2 && (mday == 30 || mday == 31)) + || (wday == 3 && mday == 31)) + weeknum = 1; + } + + return weeknum; +} +#endif + +/* weeknumber --- figure how many weeks into the year */ + +/* With thanks and tip of the hatlo to ado@elsie.nci.nih.gov */ + +#ifndef __STDC__ +static int +weeknumber(timeptr, firstweekday) +const struct tm *timeptr; +int firstweekday; +#else +static int +weeknumber(const struct tm *timeptr, int firstweekday) +#endif +{ + int wday = timeptr->tm_wday; + int ret; + + if (firstweekday == 1) { + if (wday == 0) /* sunday */ + wday = 6; + else + wday--; + } + ret = ((timeptr->tm_yday + 7 - wday) / 7); + if (ret < 0) + ret = 0; + return ret; +} + +#if 0 +/* ADR --- I'm loathe to mess with ado's code ... */ + +Date: Wed, 24 Apr 91 20:54:08 MDT +From: Michal Jaegermann +To: arnold@audiofax.com + +Hi Arnold, +in a process of fixing of strftime() in libraries on Atari ST I grabbed +some pieces of code from your own strftime. When doing that it came +to mind that your weeknumber() function compiles a little bit nicer +in the following form: +/* + * firstweekday is 0 if starting in Sunday, non-zero if in Monday + */ +{ + return (timeptr->tm_yday - timeptr->tm_wday + + (firstweekday ? (timeptr->tm_wday ? 8 : 1) : 7)) / 7; +} +How nicer it depends on a compiler, of course, but always a tiny bit. + + Cheers, + Michal + ntomczak@vm.ucs.ualberta.ca +#endif + +#ifdef TEST_STRFTIME + +/* + * NAME: + * tst + * + * SYNOPSIS: + * tst + * + * DESCRIPTION: + * "tst" is a test driver for the function "strftime". + * + * OPTIONS: + * None. + * + * AUTHOR: + * Karl Vogel + * Control Data Systems, Inc. + * vogelke@c-17igp.wpafb.af.mil + * + * BUGS: + * None noticed yet. + * + * COMPILE: + * cc -o tst -DTEST_STRFTIME strftime.c + */ + +/* ADR: I reformatted this to my liking, and deleted some unneeded code. */ + +#ifndef NULL +#include +#endif +#include +#include + +#define MAXTIME 132 + +/* + * Array of time formats. + */ + +static char *array[] = +{ + "(%%A) full weekday name, var length (Sunday..Saturday) %A", + "(%%B) full month name, var length (January..December) %B", + "(%%C) Century %C", + "(%%D) date (%%m/%%d/%%y) %D", + "(%%E) Locale extensions (ignored) %E", + "(%%H) hour (24-hour clock, 00..23) %H", + "(%%I) hour (12-hour clock, 01..12) %I", + "(%%M) minute (00..59) %M", + "(%%O) Locale extensions (ignored) %O", + "(%%R) time, 24-hour (%%H:%%M) %R", + "(%%S) second (00..60) %S", + "(%%T) time, 24-hour (%%H:%%M:%%S) %T", + "(%%U) week of year, Sunday as first day of week (00..53) %U", + "(%%V) week of year according to ISO 8601 %V", + "(%%W) week of year, Monday as first day of week (00..53) %W", + "(%%X) appropriate locale time representation (%H:%M:%S) %X", + "(%%Y) year with century (1970...) %Y", + "(%%Z) timezone (EDT), or blank if timezone not determinable %Z", + "(%%a) locale's abbreviated weekday name (Sun..Sat) %a", + "(%%b) locale's abbreviated month name (Jan..Dec) %b", + "(%%c) full date (Sat Nov 4 12:02:33 1989)%n%t%t%t %c", + "(%%d) day of the month (01..31) %d", + "(%%e) day of the month, blank-padded ( 1..31) %e", + "(%%h) should be same as (%%b) %h", + "(%%j) day of the year (001..366) %j", + "(%%k) hour, 24-hour clock, blank pad ( 0..23) %k", + "(%%l) hour, 12-hour clock, blank pad ( 0..12) %l", + "(%%m) month (01..12) %m", + "(%%p) locale's AM or PM based on 12-hour clock %p", + "(%%r) time, 12-hour (same as %%I:%%M:%%S %%p) %r", + "(%%u) ISO 8601: Weekday as decimal number [1 (Monday) - 7] %u", + "(%%v) VMS date (dd-bbb-YYYY) %v", + "(%%w) day of week (0..6, Sunday == 0) %w", + "(%%x) appropriate locale date representation %x", + "(%%y) last two digits of year (00..99) %y", + "(%%z) timezone offset east of GMT as HHMM (e.g. -0500) %z", + (char *) NULL +}; + +/* main routine. */ + +int +main(argc, argv) +int argc; +char **argv; +{ + long time(); + + char *next; + char string[MAXTIME]; + + int k; + int length; + + struct tm *tm; + + long clock; + + /* Call the function. */ + + clock = time((long *) 0); + tm = localtime(&clock); + + for (k = 0; next = array[k]; k++) { + length = strftime(string, MAXTIME, next, tm); + printf("%s\n", string); + } + + exit(0); +} +#endif /* TEST_STRFTIME */ diff --git a/string.c b/string.c new file mode 100644 index 0000000..122d80f --- /dev/null +++ b/string.c @@ -0,0 +1,7275 @@ +/********************************************************************** + + string.c - + + $Author: yugui $ + created at: Mon Aug 9 17:12:58 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/re.h" +#include "ruby/encoding.h" + +#define BEG(no) regs->beg[no] +#define END(no) regs->end[no] + +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +#undef rb_str_new_cstr +#undef rb_tainted_str_new_cstr +#undef rb_usascii_str_new_cstr +#undef rb_external_str_new_cstr +#undef rb_locale_str_new_cstr +#undef rb_str_new2 +#undef rb_str_new3 +#undef rb_str_new4 +#undef rb_str_new5 +#undef rb_tainted_str_new2 +#undef rb_usascii_str_new2 +#undef rb_str_dup_frozen +#undef rb_str_buf_new_cstr +#undef rb_str_buf_new2 +#undef rb_str_buf_cat2 +#undef rb_str_cat2 + +VALUE rb_cString; +VALUE rb_cSymbol; + +#define RUBY_MAX_CHAR_LEN 16 +#define STR_TMPLOCK FL_USER7 +#define STR_NOEMBED FL_USER1 +#define STR_SHARED FL_USER2 /* = ELTS_SHARED */ +#define STR_ASSOC FL_USER3 +#define STR_SHARED_P(s) FL_ALL(s, STR_NOEMBED|ELTS_SHARED) +#define STR_ASSOC_P(s) FL_ALL(s, STR_NOEMBED|STR_ASSOC) +#define STR_NOCAPA (STR_NOEMBED|ELTS_SHARED|STR_ASSOC) +#define STR_NOCAPA_P(s) (FL_TEST(s,STR_NOEMBED) && FL_ANY(s,ELTS_SHARED|STR_ASSOC)) +#define STR_UNSET_NOCAPA(s) do {\ + if (FL_TEST(s,STR_NOEMBED)) FL_UNSET(s,(ELTS_SHARED|STR_ASSOC));\ +} while (0) + + +#define STR_SET_NOEMBED(str) do {\ + FL_SET(str, STR_NOEMBED);\ + STR_SET_EMBED_LEN(str, 0);\ +} while (0) +#define STR_SET_EMBED(str) FL_UNSET(str, STR_NOEMBED) +#define STR_EMBED_P(str) (!FL_TEST(str, STR_NOEMBED)) +#define STR_SET_EMBED_LEN(str, n) do { \ + long tmp_n = (n);\ + RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\ + RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\ +} while (0) + +#define STR_SET_LEN(str, n) do { \ + if (STR_EMBED_P(str)) {\ + STR_SET_EMBED_LEN(str, n);\ + }\ + else {\ + RSTRING(str)->as.heap.len = (n);\ + }\ +} while (0) + +#define STR_DEC_LEN(str) do {\ + if (STR_EMBED_P(str)) {\ + long n = RSTRING_LEN(str);\ + n--;\ + STR_SET_EMBED_LEN(str, n);\ + }\ + else {\ + RSTRING(str)->as.heap.len--;\ + }\ +} while (0) + +#define RESIZE_CAPA(str,capacity) do {\ + if (STR_EMBED_P(str)) {\ + if ((capacity) > RSTRING_EMBED_LEN_MAX) {\ + char *tmp = ALLOC_N(char, capacity+1);\ + memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\ + RSTRING(str)->as.heap.ptr = tmp;\ + RSTRING(str)->as.heap.len = RSTRING_LEN(str);\ + STR_SET_NOEMBED(str);\ + RSTRING(str)->as.heap.aux.capa = (capacity);\ + }\ + }\ + else {\ + REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\ + if (!STR_NOCAPA_P(str))\ + RSTRING(str)->as.heap.aux.capa = (capacity);\ + }\ +} while (0) + +#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) +#define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) + +#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str)) + +static inline int +single_byte_optimizable(VALUE str) +{ + rb_encoding *enc; + + /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */ + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) + return 1; + + enc = STR_ENC_GET(str); + if (rb_enc_mbmaxlen(enc) == 1) + return 1; + + /* Conservative. Possibly single byte. + * "\xa1" in Shift_JIS for example. */ + return 0; +} + +VALUE rb_fs; + +static inline const char * +search_nonascii(const char *p, const char *e) +{ +#if SIZEOF_VALUE == 8 +# define NONASCII_MASK 0x8080808080808080LL +#elif SIZEOF_VALUE == 4 +# define NONASCII_MASK 0x80808080UL +#endif +#ifdef NONASCII_MASK + if (sizeof(VALUE) * 2 < e - p) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + while (p < (const char *)s) { + if (!ISASCII(*p)) + return p; + p++; + } + t = (const VALUE*)(~lowbits & (VALUE)e); + while (s < t) { + if (*s & NONASCII_MASK) { + t = s; + break; + } + s++; + } + p = (const char *)t; + } +#endif + while (p < e) { + if (!ISASCII(*p)) + return p; + p++; + } + return NULL; +} + +static int +coderange_scan(const char *p, long len, rb_encoding *enc) +{ + const char *e = p + len; + + if (rb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + p = search_nonascii(p, e); + return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT; + } + + if (rb_enc_asciicompat(enc)) { + p = search_nonascii(p, e); + if (!p) { + return ENC_CODERANGE_7BIT; + } + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + return ENC_CODERANGE_BROKEN; + } + p += MBCLEN_CHARFOUND_LEN(ret); + if (p < e) { + p = search_nonascii(p, e); + if (!p) { + return ENC_CODERANGE_VALID; + } + } + } + if (e < p) { + return ENC_CODERANGE_BROKEN; + } + return ENC_CODERANGE_VALID; + } + + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + + if (!MBCLEN_CHARFOUND_P(ret)) { + return ENC_CODERANGE_BROKEN; + } + p += MBCLEN_CHARFOUND_LEN(ret); + } + if (e < p) { + return ENC_CODERANGE_BROKEN; + } + return ENC_CODERANGE_VALID; +} + +long +rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr) +{ + const char *p = s; + + if (*cr == ENC_CODERANGE_BROKEN) + return e - s; + + if (rb_enc_to_index(enc) == 0) { + /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */ + p = search_nonascii(p, e); + *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID; + return e - s; + } + else if (rb_enc_asciicompat(enc)) { + p = search_nonascii(p, e); + if (!p) { + if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT; + return e - s; + } + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN; + return p - s; + } + p += MBCLEN_CHARFOUND_LEN(ret); + if (p < e) { + p = search_nonascii(p, e); + if (!p) { + *cr = ENC_CODERANGE_VALID; + return e - s; + } + } + } + *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; + return p - s; + } + else { + while (p < e) { + int ret = rb_enc_precise_mbclen(p, e, enc); + if (!MBCLEN_CHARFOUND_P(ret)) { + *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN; + return p - s; + } + p += MBCLEN_CHARFOUND_LEN(ret); + } + *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID; + return p - s; + } +} + +static inline void +str_enc_copy(VALUE str1, VALUE str2) +{ + rb_enc_set_index(str1, ENCODING_GET(str2)); +} + +static void +rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src) +{ + /* this function is designed for copying encoding and coderange + * from src to new string "dest" which is made from the part of src. + */ + str_enc_copy(dest, src); + switch (ENC_CODERANGE(src)) { + case ENC_CODERANGE_7BIT: + ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); + break; + case ENC_CODERANGE_VALID: + if (!rb_enc_asciicompat(STR_ENC_GET(src)) || + search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest))) + ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID); + else + ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); + break; + default: + if (RSTRING_LEN(dest) == 0) { + if (!rb_enc_asciicompat(STR_ENC_GET(src))) + ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID); + else + ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT); + } + break; + } +} + +static void +rb_enc_cr_str_exact_copy(VALUE dest, VALUE src) +{ + str_enc_copy(dest, src); + ENC_CODERANGE_SET(dest, ENC_CODERANGE(src)); +} + +int +rb_enc_str_coderange(VALUE str) +{ + int cr = ENC_CODERANGE(str); + + if (cr == ENC_CODERANGE_UNKNOWN) { + rb_encoding *enc = STR_ENC_GET(str); + cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc); + ENC_CODERANGE_SET(str, cr); + } + return cr; +} + +int +rb_enc_str_asciionly_p(VALUE str) +{ + rb_encoding *enc = STR_ENC_GET(str); + + if (!rb_enc_asciicompat(enc)) + return Qfalse; + else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) + return Qtrue; + return Qfalse; +} + +static inline void +str_mod_check(VALUE s, const char *p, long len) +{ + if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){ + rb_raise(rb_eRuntimeError, "string modified"); + } +} + +static inline void +str_frozen_check(VALUE s) +{ + if (OBJ_FROZEN(s)) { + rb_raise(rb_eRuntimeError, "string frozen"); + } +} + +size_t +rb_str_capacity(VALUE str) +{ + if (STR_EMBED_P(str)) { + return RSTRING_EMBED_LEN_MAX; + } + else if (STR_NOCAPA_P(str)) { + return RSTRING(str)->as.heap.len; + } + else { + return RSTRING(str)->as.heap.aux.capa; + } +} + +static inline VALUE +str_alloc(VALUE klass) +{ + NEWOBJ(str, struct RString); + OBJSETUP(str, klass, T_STRING); + + str->as.heap.ptr = 0; + str->as.heap.len = 0; + str->as.heap.aux.capa = 0; + + return (VALUE)str; +} + +static VALUE +str_new(VALUE klass, const char *ptr, long len) +{ + VALUE str; + + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + + str = str_alloc(klass); + if (len > RSTRING_EMBED_LEN_MAX) { + RSTRING(str)->as.heap.aux.capa = len; + RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1); + STR_SET_NOEMBED(str); + } + else if (len == 0) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } + if (ptr) { + memcpy(RSTRING_PTR(str), ptr, len); + } + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + return str; +} + +VALUE +rb_str_new(const char *ptr, long len) +{ + return str_new(rb_cString, ptr, len); +} + +VALUE +rb_usascii_str_new(const char *ptr, long len) +{ + VALUE str = rb_str_new(ptr, len); + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + +VALUE +rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) +{ + VALUE str = rb_str_new(ptr, len); + rb_enc_associate(str, enc); + return str; +} + +VALUE +rb_str_new_cstr(const char *ptr) +{ + if (!ptr) { + rb_raise(rb_eArgError, "NULL pointer given"); + } + return rb_str_new(ptr, strlen(ptr)); +} + +RUBY_ALIAS_FUNCTION(rb_str_new2(const char *ptr), rb_str_new_cstr, (ptr)) +#define rb_str_new2 rb_str_new_cstr + +VALUE +rb_usascii_str_new_cstr(const char *ptr) +{ + VALUE str = rb_str_new2(ptr); + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + +RUBY_ALIAS_FUNCTION(rb_usascii_str_new2(const char *ptr), rb_usascii_str_new_cstr, (ptr)) +#define rb_usascii_str_new2 rb_usascii_str_new_cstr + +VALUE +rb_tainted_str_new(const char *ptr, long len) +{ + VALUE str = rb_str_new(ptr, len); + + OBJ_TAINT(str); + return str; +} + +VALUE +rb_tainted_str_new_cstr(const char *ptr) +{ + VALUE str = rb_str_new2(ptr); + + OBJ_TAINT(str); + return str; +} + +RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cstr, (ptr)) +#define rb_tainted_str_new2 rb_tainted_str_new_cstr + +VALUE +rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts) +{ + rb_econv_t *ec; + rb_econv_result_t ret; + long len; + VALUE newstr; + const unsigned char *sp; + unsigned char *dp; + + if (!to) return str; + if (from == to) return str; + if ((rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) || + to == rb_ascii8bit_encoding()) { + if (STR_ENC_GET(str) != to) { + str = rb_str_dup(str); + rb_enc_associate(str, to); + } + return str; + } + + len = RSTRING_LEN(str); + newstr = rb_str_new(0, len); + + retry: + ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts); + if (!ec) return str; + + sp = (unsigned char*)RSTRING_PTR(str); + dp = (unsigned char*)RSTRING_PTR(newstr); + ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str), + &dp, (unsigned char*)RSTRING_END(newstr), 0); + rb_econv_close(ec); + switch (ret) { + case econv_destination_buffer_full: + /* destination buffer short */ + len *= 2; + rb_str_resize(newstr, len); + goto retry; + + case econv_finished: + len = dp - (unsigned char*)RSTRING_PTR(newstr); + rb_str_set_len(newstr, len); + rb_enc_associate(newstr, to); + return newstr; + + default: + /* some error, return original */ + return str; + } +} + +VALUE +rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) +{ + return rb_str_conv_enc_opts(str, from, to, 0, Qnil); +} + +VALUE +rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) +{ + VALUE str; + + str = rb_tainted_str_new(ptr, len); + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT && + eenc == rb_usascii_encoding()) { + rb_enc_associate(str, rb_ascii8bit_encoding()); + return str; + } + rb_enc_associate(str, eenc); + return rb_str_conv_enc(str, eenc, rb_default_internal_encoding()); +} + +VALUE +rb_external_str_new(const char *ptr, long len) +{ + return rb_external_str_new_with_enc(ptr, len, rb_default_external_encoding()); +} + +VALUE +rb_external_str_new_cstr(const char *ptr) +{ + return rb_external_str_new_with_enc(ptr, strlen(ptr), rb_default_external_encoding()); +} + +VALUE +rb_locale_str_new(const char *ptr, long len) +{ + return rb_external_str_new_with_enc(ptr, len, rb_locale_encoding()); +} + +VALUE +rb_locale_str_new_cstr(const char *ptr) +{ + return rb_external_str_new_with_enc(ptr, strlen(ptr), rb_locale_encoding()); +} + +VALUE +rb_str_export(VALUE str) +{ + return rb_str_conv_enc(str, STR_ENC_GET(str), rb_default_external_encoding()); +} + +VALUE +rb_str_export_locale(VALUE str) +{ + return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding()); +} + +VALUE +rb_str_export_to_enc(VALUE str, rb_encoding *enc) +{ + return rb_str_conv_enc(str, STR_ENC_GET(str), enc); +} + +static VALUE +str_replace_shared(VALUE str2, VALUE str) +{ + if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) { + STR_SET_EMBED(str2); + memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1); + STR_SET_EMBED_LEN(str2, RSTRING_LEN(str)); + } + else { + FL_SET(str2, STR_NOEMBED); + RSTRING(str2)->as.heap.len = RSTRING_LEN(str); + RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str); + RSTRING(str2)->as.heap.aux.shared = str; + FL_SET(str2, ELTS_SHARED); + } + rb_enc_cr_str_exact_copy(str2, str); + + return str2; +} + +static VALUE +str_new_shared(VALUE klass, VALUE str) +{ + return str_replace_shared(str_alloc(klass), str); +} + +static VALUE +str_new3(VALUE klass, VALUE str) +{ + return str_new_shared(klass, str); +} + +VALUE +rb_str_new_shared(VALUE str) +{ + VALUE str2 = str_new3(rb_obj_class(str), str); + + OBJ_INFECT(str2, str); + return str2; +} + +RUBY_ALIAS_FUNCTION(rb_str_new3(VALUE str), rb_str_new_shared, (str)) +#define rb_str_new3 rb_str_new_shared + +static VALUE +str_new4(VALUE klass, VALUE str) +{ + VALUE str2; + + str2 = str_alloc(klass); + STR_SET_NOEMBED(str2); + RSTRING(str2)->as.heap.len = RSTRING_LEN(str); + RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str); + if (STR_SHARED_P(str)) { + FL_SET(str2, ELTS_SHARED); + RSTRING(str2)->as.heap.aux.shared = RSTRING(str)->as.heap.aux.shared; + } + else { + FL_SET(str, ELTS_SHARED); + RSTRING(str)->as.heap.aux.shared = str2; + } + rb_enc_cr_str_exact_copy(str2, str); + OBJ_INFECT(str2, str); + return str2; +} + +VALUE +rb_str_new_frozen(VALUE orig) +{ + VALUE klass, str; + + if (OBJ_FROZEN(orig)) return orig; + klass = rb_obj_class(orig); + if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) { + long ofs; + ofs = RSTRING_LEN(str) - RSTRING_LEN(orig); + if ((ofs > 0) || (klass != RBASIC(str)->klass) || + (!OBJ_TAINTED(str) && OBJ_TAINTED(orig)) || + ENCODING_GET(str) != ENCODING_GET(orig)) { + str = str_new3(klass, str); + RSTRING(str)->as.heap.ptr += ofs; + RSTRING(str)->as.heap.len -= ofs; + rb_enc_cr_str_exact_copy(str, orig); + OBJ_INFECT(str, orig); + } + } + else if (STR_EMBED_P(orig)) { + str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig)); + rb_enc_cr_str_exact_copy(str, orig); + OBJ_INFECT(str, orig); + } + else if (STR_ASSOC_P(orig)) { + VALUE assoc = RSTRING(orig)->as.heap.aux.shared; + FL_UNSET(orig, STR_ASSOC); + str = str_new4(klass, orig); + FL_SET(str, STR_ASSOC); + RSTRING(str)->as.heap.aux.shared = assoc; + } + else { + str = str_new4(klass, orig); + } + OBJ_FREEZE(str); + return str; +} + +RUBY_ALIAS_FUNCTION(rb_str_new4(VALUE orig), rb_str_new_frozen, (orig)) +#define rb_str_new4 rb_str_new_frozen + +VALUE +rb_str_new_with_class(VALUE obj, const char *ptr, long len) +{ + return str_new(rb_obj_class(obj), ptr, len); +} + +RUBY_ALIAS_FUNCTION(rb_str_new5(VALUE obj, const char *ptr, long len), + rb_str_new_with_class, (obj, ptr, len)) +#define rb_str_new5 rb_str_new_with_class + +#define STR_BUF_MIN_SIZE 128 + +VALUE +rb_str_buf_new(long capa) +{ + VALUE str = str_alloc(rb_cString); + + if (capa < STR_BUF_MIN_SIZE) { + capa = STR_BUF_MIN_SIZE; + } + FL_SET(str, STR_NOEMBED); + RSTRING(str)->as.heap.aux.capa = capa; + RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1); + RSTRING(str)->as.heap.ptr[0] = '\0'; + + return str; +} + +VALUE +rb_str_buf_new_cstr(const char *ptr) +{ + VALUE str; + long len = strlen(ptr); + + str = rb_str_buf_new(len); + rb_str_buf_cat(str, ptr, len); + + return str; +} + +RUBY_ALIAS_FUNCTION(rb_str_buf_new2(const char *ptr), rb_str_buf_new_cstr, (ptr)) +#define rb_str_buf_new2 rb_str_buf_new_cstr + +VALUE +rb_str_tmp_new(long len) +{ + return str_new(0, 0, len); +} + +void +rb_str_free(VALUE str) +{ + if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) { + xfree(RSTRING(str)->as.heap.ptr); + } +} + +VALUE +rb_str_to_str(VALUE str) +{ + return rb_convert_type(str, T_STRING, "String", "to_str"); +} + +void +rb_str_shared_replace(VALUE str, VALUE str2) +{ + rb_encoding *enc; + int cr; + if (str == str2) return; + enc = STR_ENC_GET(str2); + cr = ENC_CODERANGE(str2); + rb_str_modify(str); + OBJ_INFECT(str, str2); + if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) { + xfree(RSTRING_PTR(str)); + } + if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) { + STR_SET_EMBED(str); + memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1); + STR_SET_EMBED_LEN(str, RSTRING_LEN(str2)); + rb_enc_associate(str, enc); + ENC_CODERANGE_SET(str, cr); + return; + } + STR_SET_NOEMBED(str); + STR_UNSET_NOCAPA(str); + RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); + RSTRING(str)->as.heap.len = RSTRING_LEN(str2); + if (STR_NOCAPA_P(str2)) { + FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA); + RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared; + } + else { + RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa; + } + STR_SET_EMBED(str2); /* abandon str2 */ + RSTRING_PTR(str2)[0] = 0; + STR_SET_EMBED_LEN(str2, 0); + rb_enc_associate(str, enc); + ENC_CODERANGE_SET(str, cr); +} + +static ID id_to_s; + +VALUE +rb_obj_as_string(VALUE obj) +{ + VALUE str; + + if (TYPE(obj) == T_STRING) { + return obj; + } + str = rb_funcall(obj, id_to_s, 0); + if (TYPE(str) != T_STRING) + return rb_any_to_s(obj); + if (OBJ_TAINTED(obj)) OBJ_TAINT(str); + return str; +} + +static VALUE +str_duplicate(VALUE klass, VALUE str) +{ + VALUE dup = str_alloc(klass); + rb_str_replace(dup, str); + return dup; +} + +VALUE +rb_str_dup(VALUE str) +{ + return str_duplicate(rb_obj_class(str), str); +} + + +/* + * call-seq: + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of str. + */ + +static VALUE +rb_str_init(int argc, VALUE *argv, VALUE str) +{ + VALUE orig; + + if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1) + rb_str_replace(str, orig); + return str; +} + +long +rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) +{ + long c; + const char *q; + + if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); + } + else if (rb_enc_asciicompat(enc)) { + c = 0; + while (p < e) { + if (ISASCII(*p)) { + q = search_nonascii(p, e); + if (!q) + return c + (e - p); + c += q - p; + p = q; + } + p += rb_enc_mbclen(p, e, enc); + c++; + } + return c; + } + + for (c=0; p>1); + d >>= 6; + d &= NONASCII_MASK >> 7; + d += (d>>8); + d += (d>>16); +#if SIZEOF_VALUE == 8 + d += (d>>32); +#endif + return (d&0xF); +} +#endif + +static long +str_strlen(VALUE str, rb_encoding *enc) +{ + const char *p, *e; + int n, cr; + + if (single_byte_optimizable(str)) return RSTRING_LEN(str); + if (!enc) enc = STR_ENC_GET(str); + p = RSTRING_PTR(str); + e = RSTRING_END(str); +#ifdef NONASCII_MASK + if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && + enc == rb_utf8_encoding()) { + VALUE len = 0; + if (sizeof(VALUE) * 2 < e - p) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + t = (const VALUE*)(~lowbits & (VALUE)e); + while (p < (const char *)s) { + if (is_utf8_lead_byte(*p)) len++; + p++; + } + while (s < t) { + len += count_utf8_lead_bytes_with_word(s); + s++; + } + p = (const char *)s; + } + while (p < e) { + if (is_utf8_lead_byte(*p)) len++; + p++; + } + return (long)len; + } +#endif + n = rb_enc_strlen_cr(p, e, enc, &cr); + if (cr) { + ENC_CODERANGE_SET(str, cr); + } + return n; +} + +/* + * call-seq: + * str.length => integer + * str.size => integer + * + * Returns the character length of str. + */ + +VALUE +rb_str_length(VALUE str) +{ + int len; + + len = str_strlen(str, STR_ENC_GET(str)); + return INT2NUM(len); +} + +/* + * call-seq: + * str.bytesize => integer + * + * Returns the length of str in bytes. + */ + +static VALUE +rb_str_bytesize(VALUE str) +{ + return INT2NUM(RSTRING_LEN(str)); +} + +/* + * call-seq: + * str.empty? => true or false + * + * Returns true if str has a length of zero. + * + * "hello".empty? #=> false + * "".empty? #=> true + */ + +static VALUE +rb_str_empty(VALUE str) +{ + if (RSTRING_LEN(str) == 0) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * str + other_str => new_str + * + * Concatenation---Returns a new String containing + * other_str concatenated to str. + * + * "Hello from " + self.to_s #=> "Hello from main" + */ + +VALUE +rb_str_plus(VALUE str1, VALUE str2) +{ + VALUE str3; + rb_encoding *enc; + + StringValue(str2); + enc = rb_enc_check(str1, str2); + str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2)); + memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1)); + memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1), + RSTRING_PTR(str2), RSTRING_LEN(str2)); + RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0'; + + if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) + OBJ_TAINT(str3); + ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc), + ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2))); + return str3; +} + +/* + * call-seq: + * str * integer => new_str + * + * Copy---Returns a new String containing integer copies of + * the receiver. + * + * "Ho! " * 3 #=> "Ho! Ho! Ho! " + */ + +VALUE +rb_str_times(VALUE str, VALUE times) +{ + VALUE str2; + long n, len; + + len = NUM2LONG(times); + if (len < 0) { + rb_raise(rb_eArgError, "negative argument"); + } + if (len && LONG_MAX/len < RSTRING_LEN(str)) { + rb_raise(rb_eArgError, "argument too big"); + } + + str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str)); + if (len) { + n = RSTRING_LEN(str); + memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), n); + while (n <= len/2) { + memcpy(RSTRING_PTR(str2) + n, RSTRING_PTR(str2), n); + n *= 2; + } + memcpy(RSTRING_PTR(str2) + n, RSTRING_PTR(str2), len-n); + } + RSTRING_PTR(str2)[RSTRING_LEN(str2)] = '\0'; + OBJ_INFECT(str2, str); + rb_enc_cr_str_copy_for_substr(str2, str); + + return str2; +} + +/* + * call-seq: + * str % arg => new_str + * + * Format---Uses str as a format specification, and returns the result + * of applying it to arg. If the format specification contains more than + * one substitution, then arg must be an Array containing + * the values to be substituted. See Kernel::sprintf for details + * of the format string. + * + * "%05d" % 123 #=> "00123" + * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6" + */ + +static VALUE +rb_str_format_m(VALUE str, VALUE arg) +{ + volatile VALUE tmp = rb_check_array_type(arg); + + if (!NIL_P(tmp)) { + return rb_str_format(RARRAY_LEN(tmp), RARRAY_PTR(tmp), str); + } + return rb_str_format(1, &arg, str); +} + +static inline void +str_modifiable(VALUE str) +{ + if (FL_TEST(str, STR_TMPLOCK)) { + rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked"); + } + if (OBJ_FROZEN(str)) rb_error_frozen("string"); + if (!OBJ_UNTRUSTED(str) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify string"); +} + +static inline int +str_independent(VALUE str) +{ + str_modifiable(str); + if (!STR_SHARED_P(str)) return 1; + if (STR_EMBED_P(str)) return 1; + return 0; +} + +static void +str_make_independent(VALUE str) +{ + char *ptr; + long len = RSTRING_LEN(str); + + ptr = ALLOC_N(char, len+1); + if (RSTRING_PTR(str)) { + memcpy(ptr, RSTRING_PTR(str), len); + } + STR_SET_NOEMBED(str); + ptr[len] = 0; + RSTRING(str)->as.heap.ptr = ptr; + RSTRING(str)->as.heap.len = len; + RSTRING(str)->as.heap.aux.capa = len; + STR_UNSET_NOCAPA(str); +} + +void +rb_str_modify(VALUE str) +{ + if (!str_independent(str)) + str_make_independent(str); + ENC_CODERANGE_CLEAR(str); +} + +/* As rb_str_modify(), but don't clear coderange */ +static void +str_modify_keep_cr(VALUE str) +{ + if (!str_independent(str)) + str_make_independent(str); + if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN) + /* Force re-scan later */ + ENC_CODERANGE_CLEAR(str); +} + +void +rb_str_associate(VALUE str, VALUE add) +{ + /* sanity check */ + if (OBJ_FROZEN(str)) rb_error_frozen("string"); + if (STR_ASSOC_P(str)) { + /* already associated */ + rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add); + } + else { + if (STR_SHARED_P(str)) { + VALUE assoc = RSTRING(str)->as.heap.aux.shared; + str_make_independent(str); + if (STR_ASSOC_P(assoc)) { + assoc = RSTRING(assoc)->as.heap.aux.shared; + rb_ary_concat(assoc, add); + add = assoc; + } + } + else if (STR_EMBED_P(str)) { + str_make_independent(str); + } + else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) { + RESIZE_CAPA(str, RSTRING_LEN(str)); + } + FL_SET(str, STR_ASSOC); + RBASIC(add)->klass = 0; + RSTRING(str)->as.heap.aux.shared = add; + } +} + +VALUE +rb_str_associated(VALUE str) +{ + if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared; + if (STR_ASSOC_P(str)) { + return RSTRING(str)->as.heap.aux.shared; + } + return Qfalse; +} + +VALUE +rb_string_value(volatile VALUE *ptr) +{ + VALUE s = *ptr; + if (TYPE(s) != T_STRING) { + if (SYMBOL_P(s)) { + s = rb_sym_to_s(s); + } + else { + s = rb_str_to_str(s); + } + *ptr = s; + } + return s; +} + +char * +rb_string_value_ptr(volatile VALUE *ptr) +{ + VALUE str = rb_string_value(ptr); + return RSTRING_PTR(str); +} + +char * +rb_string_value_cstr(volatile VALUE *ptr) +{ + VALUE str = rb_string_value(ptr); + char *s = RSTRING_PTR(str); + + if (!s || RSTRING_LEN(str) != strlen(s)) { + rb_raise(rb_eArgError, "string contains null byte"); + } + return s; +} + +VALUE +rb_check_string_type(VALUE str) +{ + str = rb_check_convert_type(str, T_STRING, "String", "to_str"); + return str; +} + +/* + * call-seq: + * String.try_convert(obj) -> string or nil + * + * Try to convert obj into a String, using to_str method. + * Returns converted regexp or nil if obj cannot be converted + * for any reason. + * + * String.try_convert("str") # => str + * String.try_convert(/re/) # => nil + */ +static VALUE +rb_str_s_try_convert(VALUE dummy, VALUE str) +{ + return rb_check_string_type(str); +} + +char* +rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) +{ + if (rb_enc_mbmaxlen(enc) == 1) { + p += nth; + } + else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { + p += nth * rb_enc_mbmaxlen(enc); + } + else if (rb_enc_asciicompat(enc)) { + const char *p2, *e2; + int n; + + while (p < e && 0 < nth) { + e2 = p + nth; + if (e < e2) + return (char *)e; + if (ISASCII(*p)) { + p2 = search_nonascii(p, e2); + if (!p2) + return (char *)e2; + nth -= p2 - p; + p = p2; + } + n = rb_enc_mbclen(p, e, enc); + p += n; + nth--; + } + if (nth != 0) + return (char *)e; + return (char *)p; + } + else { + while (p e) p = e; + return (char*)p; +} + +static char* +str_nth(const char *p, const char *e, int nth, rb_encoding *enc, int singlebyte) +{ + if (singlebyte) + p += nth; + else { + p = rb_enc_nth(p, e, nth, enc); + } + if (!p) return 0; + if (p > e) p = e; + return (char *)p; +} + +/* char offset to byte offset */ +static int +str_offset(const char *p, const char *e, int nth, rb_encoding *enc, int singlebyte) +{ + const char *pp = str_nth(p, e, nth, enc, singlebyte); + if (!pp) return e - p; + return pp - p; +} + +long +rb_str_offset(VALUE str, long pos) +{ + return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, + STR_ENC_GET(str), single_byte_optimizable(str)); +} + +#ifdef NONASCII_MASK +static char * +str_utf8_nth(const char *p, const char *e, int nth) +{ + if (sizeof(VALUE) * 2 < nth) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + t = (const VALUE*)(~lowbits & (VALUE)e); + while (p < (const char *)s) { + if (is_utf8_lead_byte(*p)) nth--; + p++; + } + do { + nth -= count_utf8_lead_bytes_with_word(s); + s++; + } while (s < t && sizeof(VALUE) <= nth); + p = (char *)s; + } + while (p < e) { + if (is_utf8_lead_byte(*p)) { + if (nth == 0) break; + nth--; + } + p++; + } + return (char *)p; +} + +static int +str_utf8_offset(const char *p, const char *e, int nth) +{ + const char *pp = str_utf8_nth(p, e, nth); + if (!pp) return e - p; + return pp - p; +} +#endif + +/* byte offset to char offset */ +long +rb_str_sublen(VALUE str, long pos) +{ + if (single_byte_optimizable(str) || pos < 0) + return pos; + else { + char *p = RSTRING_PTR(str); + return rb_enc_strlen(p, p + pos, STR_ENC_GET(str)); + } +} + +VALUE +rb_str_subseq(VALUE str, long beg, long len) +{ + VALUE str2; + + if (RSTRING_LEN(str) == beg + len && + RSTRING_EMBED_LEN_MAX < len) { + str2 = rb_str_new_shared(rb_str_new_frozen(str)); + rb_str_drop_bytes(str2, beg); + } + else { + str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len); + } + + rb_enc_cr_str_copy_for_substr(str2, str); + OBJ_INFECT(str2, str); + + return str2; +} + +VALUE +rb_str_substr(VALUE str, long beg, long len) +{ + rb_encoding *enc = STR_ENC_GET(str); + VALUE str2; + char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); + + if (len < 0) return Qnil; + if (!RSTRING_LEN(str)) { + len = 0; + } + if (single_byte_optimizable(str)) { + if (beg > RSTRING_LEN(str)) return Qnil; + if (beg < 0) { + beg += RSTRING_LEN(str); + if (beg < 0) return Qnil; + } + if (beg + len > RSTRING_LEN(str)) + len = RSTRING_LEN(str) - beg; + if (len <= 0) { + len = 0; + p = 0; + } + else + p = s + beg; + goto sub; + } + if (beg < 0) { + if (len > -beg) len = -beg; + if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) { + beg = -beg; + while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0); + p = e; + if (!p) return Qnil; + while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0); + if (!p) return Qnil; + len = e - p; + goto sub; + } + else { + beg += str_strlen(str, enc); + if (beg < 0) return Qnil; + } + } + else if (beg > 0 && beg > str_strlen(str, enc)) { + return Qnil; + } + if (len == 0) { + p = 0; + } +#ifdef NONASCII_MASK + else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && + enc == rb_utf8_encoding()) { + p = str_utf8_nth(s, e, beg); + len = str_utf8_offset(p, e, len); + } +#endif + else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { + int char_sz = rb_enc_mbmaxlen(enc); + + p = s + beg * char_sz; + if (p > e) { + p = e; + len = 0; + } + else if (len * char_sz > e - p) + len = e - p; + else + len *= char_sz; + } + else if ((p = str_nth(s, e, beg, enc, 0)) == e) { + len = 0; + } + else { + len = str_offset(p, e, len, enc, 0); + } + sub: + if (len > RSTRING_EMBED_LEN_MAX && beg + len == RSTRING_LEN(str)) { + str2 = rb_str_new4(str); + str2 = str_new3(rb_obj_class(str2), str2); + RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len; + RSTRING(str2)->as.heap.len = len; + } + else { + str2 = rb_str_new5(str, p, len); + rb_enc_cr_str_copy_for_substr(str2, str); + OBJ_INFECT(str2, str); + } + + return str2; +} + +VALUE +rb_str_freeze(VALUE str) +{ + if (STR_ASSOC_P(str)) { + VALUE ary = RSTRING(str)->as.heap.aux.shared; + OBJ_FREEZE(ary); + } + return rb_obj_freeze(str); +} + +RUBY_ALIAS_FUNCTION(rb_str_dup_frozen(VALUE str), rb_str_new_frozen, (str)) +#define rb_str_dup_frozen rb_str_new_frozen + +VALUE +rb_str_locktmp(VALUE str) +{ + if (FL_TEST(str, STR_TMPLOCK)) { + rb_raise(rb_eRuntimeError, "temporal locking already locked string"); + } + FL_SET(str, STR_TMPLOCK); + return str; +} + +VALUE +rb_str_unlocktmp(VALUE str) +{ + if (!FL_TEST(str, STR_TMPLOCK)) { + rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string"); + } + FL_UNSET(str, STR_TMPLOCK); + return str; +} + +void +rb_str_set_len(VALUE str, long len) +{ + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; +} + +VALUE +rb_str_resize(VALUE str, long len) +{ + long slen; + + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + + rb_str_modify(str); + slen = RSTRING_LEN(str); + if (len != slen) { + if (STR_EMBED_P(str)) { + char *ptr; + if (len <= RSTRING_EMBED_LEN_MAX) { + STR_SET_EMBED_LEN(str, len); + RSTRING(str)->as.ary[len] = '\0'; + return str; + } + ptr = ALLOC_N(char,len+1); + MEMCPY(ptr, RSTRING(str)->as.ary, char, slen); + RSTRING(str)->as.heap.ptr = ptr; + STR_SET_NOEMBED(str); + } + else if (len <= RSTRING_EMBED_LEN_MAX) { + char *ptr = RSTRING(str)->as.heap.ptr; + STR_SET_EMBED(str); + if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, len); + RSTRING(str)->as.ary[len] = '\0'; + STR_SET_EMBED_LEN(str, len); + xfree(ptr); + return str; + } + else if (slen < len || slen - len > 1024) { + REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1); + } + if (!STR_NOCAPA_P(str)) { + RSTRING(str)->as.heap.aux.capa = len; + } + RSTRING(str)->as.heap.len = len; + RSTRING(str)->as.heap.ptr[len] = '\0'; /* sentinel */ + } + return str; +} + +static VALUE +str_buf_cat(VALUE str, const char *ptr, long len) +{ + long capa, total, off = -1; + + if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) { + off = ptr - RSTRING_PTR(str); + } + rb_str_modify(str); + if (len == 0) return 0; + if (STR_ASSOC_P(str)) { + FL_UNSET(str, STR_ASSOC); + capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str); + } + else if (STR_EMBED_P(str)) { + capa = RSTRING_EMBED_LEN_MAX; + } + else { + capa = RSTRING(str)->as.heap.aux.capa; + } + if (RSTRING_LEN(str) >= LONG_MAX - len) { + rb_raise(rb_eArgError, "string sizes too big"); + } + total = RSTRING_LEN(str)+len; + if (capa <= total) { + while (total > capa) { + if (capa + 1 >= LONG_MAX / 2) { + capa = (total + 4095) / 4096; + break; + } + capa = (capa + 1) * 2; + } + RESIZE_CAPA(str, capa); + } + if (off != -1) { + ptr = RSTRING_PTR(str) + off; + } + memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len); + STR_SET_LEN(str, total); + RSTRING_PTR(str)[total] = '\0'; /* sentinel */ + + return str; +} + +VALUE +rb_str_buf_cat(VALUE str, const char *ptr, long len) +{ + if (len == 0) return str; + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + return str_buf_cat(str, ptr, len); +} + +VALUE +rb_str_buf_cat2(VALUE str, const char *ptr) +{ + return rb_str_buf_cat(str, ptr, strlen(ptr)); +} + +VALUE +rb_str_cat(VALUE str, const char *ptr, long len) +{ + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + if (STR_ASSOC_P(str)) { + rb_str_modify(str); + if (STR_EMBED_P(str)) str_make_independent(str); + REALLOC_N(RSTRING(str)->as.heap.ptr, char, RSTRING(str)->as.heap.len+len+1); + memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len, ptr, len); + RSTRING(str)->as.heap.len += len; + RSTRING(str)->as.heap.ptr[RSTRING(str)->as.heap.len] = '\0'; /* sentinel */ + return str; + } + + return rb_str_buf_cat(str, ptr, len); +} + +VALUE +rb_str_cat2(VALUE str, const char *ptr) +{ + return rb_str_cat(str, ptr, strlen(ptr)); +} + +static VALUE +rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, + int ptr_encindex, int ptr_cr, int *ptr_cr_ret) +{ + int str_encindex = ENCODING_GET(str); + int res_encindex; + int str_cr, res_cr; + int str_a8 = ENCODING_IS_ASCII8BIT(str); + int ptr_a8 = ptr_encindex == 0; + + str_cr = ENC_CODERANGE(str); + + if (str_encindex == ptr_encindex) { + if (str_cr == ENC_CODERANGE_UNKNOWN || + (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) { + ptr_cr = ENC_CODERANGE_UNKNOWN; + } + else if (ptr_cr == ENC_CODERANGE_UNKNOWN) { + ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex)); + } + } + else { + rb_encoding *str_enc = rb_enc_from_index(str_encindex); + rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex); + if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) { + if (len == 0) + return str; + if (RSTRING_LEN(str) == 0) { + rb_str_buf_cat(str, ptr, len); + ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr); + return str; + } + goto incompatible; + } + if (ptr_cr == ENC_CODERANGE_UNKNOWN) { + ptr_cr = coderange_scan(ptr, len, ptr_enc); + } + if (str_cr == ENC_CODERANGE_UNKNOWN) { + if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) { + str_cr = rb_enc_str_coderange(str); + } + } + } + if (ptr_cr_ret) + *ptr_cr_ret = ptr_cr; + + if (str_encindex != ptr_encindex && + str_cr != ENC_CODERANGE_7BIT && + ptr_cr != ENC_CODERANGE_7BIT) { + incompatible: + rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s", + rb_enc_name(rb_enc_from_index(str_encindex)), + rb_enc_name(rb_enc_from_index(ptr_encindex))); + } + + if (str_cr == ENC_CODERANGE_UNKNOWN) { + res_encindex = str_encindex; + res_cr = ENC_CODERANGE_UNKNOWN; + } + else if (str_cr == ENC_CODERANGE_7BIT) { + if (ptr_cr == ENC_CODERANGE_7BIT) { + res_encindex = !str_a8 ? str_encindex : ptr_encindex; + res_cr = ENC_CODERANGE_7BIT; + } + else { + res_encindex = ptr_encindex; + res_cr = ptr_cr; + } + } + else if (str_cr == ENC_CODERANGE_VALID) { + res_encindex = str_encindex; + res_cr = str_cr; + } + else { /* str_cr == ENC_CODERANGE_BROKEN */ + res_encindex = str_encindex; + res_cr = str_cr; + if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN; + } + + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + str_buf_cat(str, ptr, len); + ENCODING_CODERANGE_SET(str, res_encindex, res_cr); + return str; +} + +VALUE +rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc) +{ + return rb_enc_cr_str_buf_cat(str, ptr, len, + rb_enc_to_index(ptr_enc), ENC_CODERANGE_UNKNOWN, NULL); +} + +VALUE +rb_str_buf_cat_ascii(VALUE str, const char *ptr) +{ + /* ptr must reference NUL terminated ASCII string. */ + int encindex = ENCODING_GET(str); + rb_encoding *enc = rb_enc_from_index(encindex); + if (rb_enc_asciicompat(enc)) { + return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr), + encindex, ENC_CODERANGE_7BIT, 0); + } + else { + char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc)); + while (*ptr) { + unsigned int c = (unsigned char)*ptr; + int len = rb_enc_codelen(c, enc); + rb_enc_mbcput(c, buf, enc); + rb_enc_cr_str_buf_cat(str, buf, len, + encindex, ENC_CODERANGE_VALID, 0); + ptr++; + } + return str; + } +} + +VALUE +rb_str_buf_append(VALUE str, VALUE str2) +{ + int str2_cr; + + str2_cr = ENC_CODERANGE(str2); + + rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2), + ENCODING_GET(str2), str2_cr, &str2_cr); + + OBJ_INFECT(str, str2); + ENC_CODERANGE_SET(str2, str2_cr); + + return str; +} + +VALUE +rb_str_append(VALUE str, VALUE str2) +{ + rb_encoding *enc; + int cr, cr2; + + StringValue(str2); + if (RSTRING_LEN(str2) > 0 && STR_ASSOC_P(str)) { + long len = RSTRING_LEN(str)+RSTRING_LEN(str2); + enc = rb_enc_check(str, str2); + cr = ENC_CODERANGE(str); + if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2; + rb_str_modify(str); + REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1); + memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len, + RSTRING_PTR(str2), RSTRING_LEN(str2)+1); + RSTRING(str)->as.heap.len = len; + rb_enc_associate(str, enc); + ENC_CODERANGE_SET(str, cr); + OBJ_INFECT(str, str2); + return str; + } + return rb_str_buf_append(str, str2); +} + + +/* + * call-seq: + * str << integer => str + * str.concat(integer) => str + * str << obj => str + * str.concat(obj) => str + * + * Append---Concatenates the given object to str. If the object is a + * Integer, it is considered as a codepoint, and is converted + * to a character before concatenation. + * + * a = "hello " + * a << "world" #=> "hello world" + * a.concat(33) #=> "hello world!" + */ + +VALUE +rb_str_concat(VALUE str1, VALUE str2) +{ + if (FIXNUM_P(str2) || TYPE(str2) == T_BIGNUM) { + rb_encoding *enc = STR_ENC_GET(str1); + unsigned int c = NUM2UINT(str2); + int pos = RSTRING_LEN(str1); + int len = rb_enc_codelen(c, enc); + int cr = ENC_CODERANGE(str1); + + rb_str_resize(str1, pos+len); + rb_enc_mbcput(c, RSTRING_PTR(str1)+pos, enc); + ENC_CODERANGE_SET(str1, cr); + return str1; + } + return rb_str_append(str1, str2); +} + +#if defined __i386__ || defined _M_IX86 +#define UNALIGNED_WORD_ACCESS 1 +#endif +#ifndef UNALIGNED_WORD_ACCESS +#define UNALIGNED_WORD_ACCESS 0 +#endif + +/* MurmurHash described in http://murmurhash.googlepages.com/ */ +static unsigned int +hash(const unsigned char * data, int len, unsigned int h) +{ + const unsigned int m = 0x7fd652ad; + const int r = 16; + + h += 0xdeadbeef; + + if (len >= 4) { +#if !UNALIGNED_WORD_ACCESS + int align = (VALUE)data & 3; + if (align) { + uint32_t t = 0, d = 0; + int sl, sr, pack; + + switch (align) { +#ifdef WORDS_BIGENDIAN + case 1: t |= data[2]; + case 2: t |= data[1] << 8; + case 3: t |= data[0] << 16; +#else + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; +#endif + } + +#ifdef WORDS_BIGENDIAN + t >>= (8 * align) - 8; +#else + t <<= (8 * align); +#endif + + data += 4-align; + len -= 4-align; + + sl = 8 * (4-align); + sr = 8 * align; + + while (len >= 4) { + d = *(uint32_t *)data; +#ifdef WORDS_BIGENDIAN + t = (t << sr) | (d >> sl); +#else + t = (t >> sr) | (d << sl); +#endif + h += t; + h *= m; + h ^= h >> r; + t = d; + + data += 4; + len -= 4; + } + + pack = len < align ? len : align; + d = 0; + switch (pack) { +#ifdef WORDS_BIGENDIAN + case 3: d |= data[2] << 8; + case 2: d |= data[1] << 16; + case 1: d |= data[0] << 24; + case 0: + h += (t << sr) | (d >> sl); +#else + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: + h += (t >> sr) | (d << sl); +#endif + h *= m; + h ^= h >> r; + } + + data += pack; + len -= pack; + } + else +#endif + { + do { + h += *(uint32_t *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } while (len >= 4); + } + } + + switch(len) { +#ifdef WORDS_BIGENDIAN + case 3: + h += data[2] << 8; + case 2: + h += data[1] << 16; + case 1: + h += data[0] << 24; +#else + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; +#endif + h *= m; + h ^= h >> r; + } + + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} + +int +rb_memhash(const void *ptr, long len) +{ + static int hashseed_init = 0; + static unsigned int hashseed; + + if (!hashseed_init) { + hashseed = rb_genrand_int32(); + hashseed_init = 1; + } + + return hash(ptr, len, hashseed); +} + +int +rb_str_hash(VALUE str) +{ + int e = ENCODING_GET(str); + if (e) { + if (rb_enc_str_asciionly_p(str)) e = 0; + } + return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e; +} + +int +rb_str_hash_cmp(VALUE str1, VALUE str2) +{ + int len; + + if (!rb_str_comparable(str1, str2)) return 1; + if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) && + memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) { + return 0; + } + return 1; +} + +/* + * call-seq: + * str.hash => fixnum + * + * Return a hash based on the string's length and content. + */ + +static VALUE +rb_str_hash_m(VALUE str) +{ + int hval = rb_str_hash(str); + return INT2FIX(hval); +} + +#define lesser(a,b) (((a)>(b))?(b):(a)) + +int +rb_str_comparable(VALUE str1, VALUE str2) +{ + int idx1, idx2; + int rc1, rc2; + + if (RSTRING_LEN(str1) == 0) return Qtrue; + if (RSTRING_LEN(str2) == 0) return Qtrue; + idx1 = ENCODING_GET(str1); + idx2 = ENCODING_GET(str2); + if (idx1 == idx2) return Qtrue; + rc1 = rb_enc_str_coderange(str1); + rc2 = rb_enc_str_coderange(str2); + if (rc1 == ENC_CODERANGE_7BIT) { + if (rc2 == ENC_CODERANGE_7BIT) return Qtrue; + if (rb_enc_asciicompat(rb_enc_from_index(idx2))) + return Qtrue; + } + if (rc2 == ENC_CODERANGE_7BIT) { + if (rb_enc_asciicompat(rb_enc_from_index(idx1))) + return Qtrue; + } + return Qfalse; +} + +int +rb_str_cmp(VALUE str1, VALUE str2) +{ + long len; + int retval; + + len = lesser(RSTRING_LEN(str1), RSTRING_LEN(str2)); + retval = memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len); + if (retval == 0) { + if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) { + if (!rb_str_comparable(str1, str2)) { + if (ENCODING_GET(str1) > ENCODING_GET(str2)) + return 1; + return -1; + } + return 0; + } + if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return 1; + return -1; + } + if (retval > 0) return 1; + return -1; +} + + +/* + * call-seq: + * str == obj => true or false + * + * Equality---If obj is not a String, returns + * false. Otherwise, returns true if str + * <=> obj returns zero. + */ + +VALUE +rb_str_equal(VALUE str1, VALUE str2) +{ + int len; + + if (str1 == str2) return Qtrue; + if (TYPE(str2) != T_STRING) { + if (!rb_respond_to(str2, rb_intern("to_str"))) { + return Qfalse; + } + return rb_equal(str2, str1); + } + if (!rb_str_comparable(str1, str2)) return Qfalse; + if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) && + memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * str.eql?(other) => true or false + * + * Two strings are equal if the have the same length and content. + */ + +static VALUE +rb_str_eql(VALUE str1, VALUE str2) +{ + if (TYPE(str2) != T_STRING || RSTRING_LEN(str1) != RSTRING_LEN(str2)) + return Qfalse; + + if (!rb_str_comparable(str1, str2)) return Qfalse; + if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), + lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0) + return Qtrue; + + return Qfalse; +} + +/* + * call-seq: + * str <=> other_str => -1, 0, +1 + * + * Comparison---Returns -1 if other_str is greater than, 0 if + * other_str is equal to, and +1 if other_str is less than + * str. If the strings are of different lengths, and the strings are + * equal when compared up to the shortest length, then the longer string is + * considered greater than the shorter one. In older versions of Ruby, setting + * $= allowed case-insensitive comparisons; this is now deprecated + * in favor of using String#casecmp. + * + * <=> is the basis for the methods <, + * <=, >, >=, and between?, + * included from module Comparable. The method + * String#== does not use Comparable#==. + * + * "abcdef" <=> "abcde" #=> 1 + * "abcdef" <=> "abcdef" #=> 0 + * "abcdef" <=> "abcdefg" #=> -1 + * "abcdef" <=> "ABCDEF" #=> 1 + */ + +static VALUE +rb_str_cmp_m(VALUE str1, VALUE str2) +{ + long result; + + if (TYPE(str2) != T_STRING) { + if (!rb_respond_to(str2, rb_intern("to_str"))) { + return Qnil; + } + else if (!rb_respond_to(str2, rb_intern("<=>"))) { + return Qnil; + } + else { + VALUE tmp = rb_funcall(str2, rb_intern("<=>"), 1, str1); + + if (NIL_P(tmp)) return Qnil; + if (!FIXNUM_P(tmp)) { + return rb_funcall(LONG2FIX(0), '-', 1, tmp); + } + result = -FIX2LONG(tmp); + } + } + else { + result = rb_str_cmp(str1, str2); + } + return LONG2NUM(result); +} + +/* + * call-seq: + * str.casecmp(other_str) => -1, 0, +1 + * + * Case-insensitive version of String#<=>. + * + * "abcdef".casecmp("abcde") #=> 1 + * "aBcDeF".casecmp("abcdef") #=> 0 + * "abcdef".casecmp("abcdefg") #=> -1 + * "abcdef".casecmp("ABCDEF") #=> 0 + */ + +static VALUE +rb_str_casecmp(VALUE str1, VALUE str2) +{ + long len; + rb_encoding *enc; + char *p1, *p1end, *p2, *p2end; + + StringValue(str2); + enc = rb_enc_compatible(str1, str2); + if (!enc) { + return Qnil; + } + + p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1); + p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2); + if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) { + while (p1 < p1end && p2 < p2end) { + if (*p1 != *p2) { + unsigned int c1 = TOUPPER(*p1 & 0xff); + unsigned int c2 = TOUPPER(*p2 & 0xff); + if (c1 != c2) + return INT2FIX(c1 < c2 ? -1 : 1); + } + p1++; + p2++; + } + } + else { + while (p1 < p1end && p2 < p2end) { + int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc); + int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc); + + if (0 <= c1 && 0 <= c2) { + c1 = TOUPPER(c1); + c2 = TOUPPER(c2); + if (c1 != c2) + return INT2FIX(c1 < c2 ? -1 : 1); + } + else { + int r; + l1 = rb_enc_mbclen(p1, p1end, enc); + l2 = rb_enc_mbclen(p2, p2end, enc); + len = l1 < l2 ? l1 : l2; + r = memcmp(p1, p2, len); + if (r != 0) + return INT2FIX(r < 0 ? -1 : 1); + if (l1 != l2) + return INT2FIX(l1 < l2 ? -1 : 1); + } + p1 += l1; + p2 += l2; + } + } + if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0); + if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1); + return INT2FIX(-1); +} + +static long +rb_str_index(VALUE str, VALUE sub, long offset) +{ + long pos; + char *s, *sptr, *e; + long len, slen; + rb_encoding *enc; + + enc = rb_enc_check(str, sub); + if (is_broken_string(sub)) { + return -1; + } + len = str_strlen(str, enc); + slen = str_strlen(sub, enc); + if (offset < 0) { + offset += len; + if (offset < 0) return -1; + } + if (len - offset < slen) return -1; + s = RSTRING_PTR(str); + e = s + RSTRING_LEN(str); + if (offset) { + offset = str_offset(s, RSTRING_END(str), offset, enc, single_byte_optimizable(str)); + s += offset; + } + if (slen == 0) return offset; + /* need proceed one character at a time */ + sptr = RSTRING_PTR(sub); + slen = RSTRING_LEN(sub); + len = RSTRING_LEN(str) - offset; + for (;;) { + char *t; + pos = rb_memsearch(sptr, slen, s, len, enc); + if (pos < 0) return pos; + t = rb_enc_right_char_head(s, s+pos, e, enc); + if (t == s + pos) break; + if ((len -= t - s) <= 0) return -1; + offset += t - s; + s = t; + } + return pos + offset; +} + + +/* + * call-seq: + * str.index(substring [, offset]) => fixnum or nil + * str.index(regexp [, offset]) => fixnum or nil + * + * Returns the index of the first occurrence of the given substring or + * pattern (regexp) in str. Returns nil if not + * found. If the second parameter is present, it specifies the position in the + * string to begin the search. + * + * "hello".index('e') #=> 1 + * "hello".index('lo') #=> 3 + * "hello".index('a') #=> nil + * "hello".index(?e) #=> 1 + * "hello".index(/[aeiou]/, -3) #=> 4 + */ + +static VALUE +rb_str_index_m(int argc, VALUE *argv, VALUE str) +{ + VALUE sub; + VALUE initpos; + long pos; + + if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { + pos = NUM2LONG(initpos); + } + else { + pos = 0; + } + if (pos < 0) { + pos += str_strlen(str, STR_ENC_GET(str)); + if (pos < 0) { + if (TYPE(sub) == T_REGEXP) { + rb_backref_set(Qnil); + } + return Qnil; + } + } + + switch (TYPE(sub)) { + case T_REGEXP: + if (pos > str_strlen(str, STR_ENC_GET(str))) + return Qnil; + pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, + rb_enc_check(str, sub), single_byte_optimizable(str)); + + pos = rb_reg_search(sub, str, pos, 0); + pos = rb_str_sublen(str, pos); + break; + + default: { + VALUE tmp; + + tmp = rb_check_string_type(sub); + if (NIL_P(tmp)) { + rb_raise(rb_eTypeError, "type mismatch: %s given", + rb_obj_classname(sub)); + } + sub = tmp; + } + /* fall through */ + case T_STRING: + pos = rb_str_index(str, sub, pos); + pos = rb_str_sublen(str, pos); + break; + } + + if (pos == -1) return Qnil; + return LONG2NUM(pos); +} + +static long +rb_str_rindex(VALUE str, VALUE sub, long pos) +{ + long len, slen; + char *s, *sbeg, *e, *t; + rb_encoding *enc; + int singlebyte = single_byte_optimizable(str); + + enc = rb_enc_check(str, sub); + if (is_broken_string(sub)) { + return -1; + } + len = str_strlen(str, enc); + slen = str_strlen(sub, enc); + /* substring longer than string */ + if (len < slen) return -1; + if (len - pos < slen) { + pos = len - slen; + } + if (len == 0) { + return pos; + } + sbeg = RSTRING_PTR(str); + e = RSTRING_END(str); + t = RSTRING_PTR(sub); + slen = RSTRING_LEN(sub); + for (;;) { + s = str_nth(sbeg, e, pos, enc, singlebyte); + if (!s) return -1; + if (memcmp(s, t, slen) == 0) { + return pos; + } + if (pos == 0) break; + pos--; + } + return -1; +} + + +/* + * call-seq: + * str.rindex(substring [, fixnum]) => fixnum or nil + * str.rindex(regexp [, fixnum]) => fixnum or nil + * + * Returns the index of the last occurrence of the given substring or + * pattern (regexp) in str. Returns nil if not + * found. If the second parameter is present, it specifies the position in the + * string to end the search---characters beyond this point will not be + * considered. + * + * "hello".rindex('e') #=> 1 + * "hello".rindex('l') #=> 3 + * "hello".rindex('a') #=> nil + * "hello".rindex(?e) #=> 1 + * "hello".rindex(/[aeiou]/, -2) #=> 1 + */ + +static VALUE +rb_str_rindex_m(int argc, VALUE *argv, VALUE str) +{ + VALUE sub; + VALUE vpos; + rb_encoding *enc = STR_ENC_GET(str); + long pos, len = str_strlen(str, enc); + + if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) { + pos = NUM2LONG(vpos); + if (pos < 0) { + pos += len; + if (pos < 0) { + if (TYPE(sub) == T_REGEXP) { + rb_backref_set(Qnil); + } + return Qnil; + } + } + if (pos > len) pos = len; + } + else { + pos = len; + } + + switch (TYPE(sub)) { + case T_REGEXP: + /* enc = rb_get_check(str, sub); */ + pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos, + STR_ENC_GET(str), single_byte_optimizable(str)); + + if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) { + pos = rb_reg_search(sub, str, pos, 1); + pos = rb_str_sublen(str, pos); + } + if (pos >= 0) return LONG2NUM(pos); + break; + + default: { + VALUE tmp; + + tmp = rb_check_string_type(sub); + if (NIL_P(tmp)) { + rb_raise(rb_eTypeError, "type mismatch: %s given", + rb_obj_classname(sub)); + } + sub = tmp; + } + /* fall through */ + case T_STRING: + pos = rb_str_rindex(str, sub, pos); + if (pos >= 0) return LONG2NUM(pos); + break; + } + return Qnil; +} + +/* + * call-seq: + * str =~ obj => fixnum or nil + * + * Match---If obj is a Regexp, use it as a pattern to match + * against str,and returns the position the match starts, or + * nil if there is no match. Otherwise, invokes + * obj.=~, passing str as an argument. The default + * =~ in Object returns false. + * + * "cat o' 9 tails" =~ /\d/ #=> 7 + * "cat o' 9 tails" =~ 9 #=> nil + */ + +static VALUE +rb_str_match(VALUE x, VALUE y) +{ + switch (TYPE(y)) { + case T_STRING: + rb_raise(rb_eTypeError, "type mismatch: String given"); + + case T_REGEXP: + return rb_reg_match(y, x); + + default: + return rb_funcall(y, rb_intern("=~"), 1, x); + } +} + + +static VALUE get_pat(VALUE, int); + + +/* + * call-seq: + * str.match(pattern) => matchdata or nil + * + * Converts pattern to a Regexp (if it isn't already one), + * then invokes its match method on str. If the second + * parameter is present, it specifies the position in the string to begin the + * search. + * + * 'hello'.match('(.)\1') #=> # + * 'hello'.match('(.)\1')[0] #=> "ll" + * 'hello'.match(/(.)\1/)[0] #=> "ll" + * 'hello'.match('xx') #=> nil + * + * If a block is given, invoke the block with MatchData if match succeed, so + * that you can write + * + * str.match(pat) {|m| ...} + * + * instead of + * + * if m = str.match(pat) + * ... + * end + * + * The return value is a value from block execution in this case. + */ + +static VALUE +rb_str_match_m(int argc, VALUE *argv, VALUE str) +{ + VALUE re, result; + if (argc < 1) + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + re = argv[0]; + argv[0] = str; + result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv); + if (!NIL_P(result) && rb_block_given_p()) { + return rb_yield(result); + } + return result; +} + +enum neighbor_char { + NEIGHBOR_NOT_CHAR, + NEIGHBOR_FOUND, + NEIGHBOR_WRAPPED +}; + +static enum neighbor_char +enc_succ_char(char *p, int len, rb_encoding *enc) +{ + int i, l; + while (1) { + for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--) + p[i] = '\0'; + if (i < 0) + return NEIGHBOR_WRAPPED; + ++((unsigned char*)p)[i]; + l = rb_enc_precise_mbclen(p, p+len, enc); + if (MBCLEN_CHARFOUND_P(l)) { + l = MBCLEN_CHARFOUND_LEN(l); + if (l == len) { + return NEIGHBOR_FOUND; + } + else { + memset(p+l, 0xff, len-l); + } + } + if (MBCLEN_INVALID_P(l) && i < len-1) { + int len2, l2; + for (len2 = len-1; 0 < len2; len2--) { + l2 = rb_enc_precise_mbclen(p, p+len2, enc); + if (!MBCLEN_INVALID_P(l2)) + break; + } + memset(p+len2+1, 0xff, len-(len2+1)); + } + } +} + +static enum neighbor_char +enc_pred_char(char *p, int len, rb_encoding *enc) +{ + int i, l; + while (1) { + for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--) + p[i] = '\xff'; + if (i < 0) + return NEIGHBOR_WRAPPED; + --((unsigned char*)p)[i]; + l = rb_enc_precise_mbclen(p, p+len, enc); + if (MBCLEN_CHARFOUND_P(l)) { + l = MBCLEN_CHARFOUND_LEN(l); + if (l == len) { + return NEIGHBOR_FOUND; + } + else { + memset(p+l, 0, len-l); + } + } + if (MBCLEN_INVALID_P(l) && i < len-1) { + int len2, l2; + for (len2 = len-1; 0 < len2; len2--) { + l2 = rb_enc_precise_mbclen(p, p+len2, enc); + if (!MBCLEN_INVALID_P(l2)) + break; + } + memset(p+len2+1, 0, len-(len2+1)); + } + } +} + +/* + overwrite +p+ by succeeding letter in +enc+ and returns + NEIGHBOR_FOUND or NEIGHBOR_WRAPPED. + When NEIGHBOR_WRAPPED, carried-out letter is stored into carry. + assuming each ranges are successive, and mbclen + never change in each ranges. + NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one + character. + */ +static enum neighbor_char +enc_succ_alnum_char(char *p, int len, rb_encoding *enc, char *carry) +{ + enum neighbor_char ret; + unsigned int c; + int ctype; + int range; + char save[ONIGENC_CODE_TO_MBC_MAXLEN]; + + c = rb_enc_mbc_to_codepoint(p, p+len, enc); + if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc)) + ctype = ONIGENC_CTYPE_DIGIT; + else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc)) + ctype = ONIGENC_CTYPE_ALPHA; + else + return NEIGHBOR_NOT_CHAR; + + MEMCPY(save, p, char, len); + ret = enc_succ_char(p, len, enc); + if (ret == NEIGHBOR_FOUND) { + c = rb_enc_mbc_to_codepoint(p, p+len, enc); + if (rb_enc_isctype(c, ctype, enc)) + return NEIGHBOR_FOUND; + } + MEMCPY(p, save, char, len); + range = 1; + while (1) { + MEMCPY(save, p, char, len); + ret = enc_pred_char(p, len, enc); + if (ret == NEIGHBOR_FOUND) { + c = rb_enc_mbc_to_codepoint(p, p+len, enc); + if (!rb_enc_isctype(c, ctype, enc)) { + MEMCPY(p, save, char, len); + break; + } + } + else { + MEMCPY(p, save, char, len); + break; + } + range++; + } + if (range == 1) { + return NEIGHBOR_NOT_CHAR; + } + + if (ctype != ONIGENC_CTYPE_DIGIT) { + MEMCPY(carry, p, char, len); + return NEIGHBOR_WRAPPED; + } + + MEMCPY(carry, p, char, len); + enc_succ_char(carry, len, enc); + return NEIGHBOR_WRAPPED; +} + + +/* + * call-seq: + * str.succ => new_str + * str.next => new_str + * + * Returns the successor to str. The successor is calculated by + * incrementing characters starting from the rightmost alphanumeric (or + * the rightmost character if there are no alphanumerics) in the + * string. Incrementing a digit always results in another digit, and + * incrementing a letter results in another letter of the same case. + * Incrementing nonalphanumerics uses the underlying character set's + * collating sequence. + * + * If the increment generates a ``carry,'' the character to the left of + * it is incremented. This process repeats until there is no carry, + * adding an additional character if necessary. + * + * "abcd".succ #=> "abce" + * "THX1138".succ #=> "THX1139" + * "<>".succ #=> "<>" + * "1999zzz".succ #=> "2000aaa" + * "ZZZ9999".succ #=> "AAAA0000" + * "***".succ #=> "**+" + */ + +VALUE +rb_str_succ(VALUE orig) +{ + rb_encoding *enc; + VALUE str; + char *sbeg, *s, *e, *last_alnum = 0; + int c = -1; + long l; + char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1"; + int carry_pos = 0, carry_len = 1; + enum neighbor_char neighbor = NEIGHBOR_FOUND; + + str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig)); + rb_enc_cr_str_copy_for_substr(str, orig); + OBJ_INFECT(str, orig); + if (RSTRING_LEN(str) == 0) return str; + + enc = STR_ENC_GET(orig); + sbeg = RSTRING_PTR(str); + s = e = sbeg + RSTRING_LEN(str); + + while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) { + if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) { + if (ISALPHA(*last_alnum) ? ISDIGIT(*s) : + ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) { + s = last_alnum; + break; + } + } + if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; + neighbor = enc_succ_alnum_char(s, l, enc, carry); + switch (neighbor) { + case NEIGHBOR_NOT_CHAR: + continue; + case NEIGHBOR_FOUND: + return str; + case NEIGHBOR_WRAPPED: + last_alnum = s; + break; + } + c = 1; + carry_pos = s - sbeg; + carry_len = l; + } + if (c == -1) { /* str contains no alnum */ + s = e; + while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) { + enum neighbor_char neighbor; + if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue; + neighbor = enc_succ_char(s, l, enc); + if (neighbor == NEIGHBOR_FOUND) + return str; + if (rb_enc_precise_mbclen(s, s+l, enc) != l) { + /* wrapped to \0...\0. search next valid char. */ + enc_succ_char(s, l, enc); + } + if (!rb_enc_asciicompat(enc)) { + MEMCPY(carry, s, char, l); + carry_len = l; + } + carry_pos = s - sbeg; + } + } + RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len); + s = RSTRING_PTR(str) + carry_pos; + memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos); + memmove(s, carry, carry_len); + STR_SET_LEN(str, RSTRING_LEN(str) + carry_len); + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + rb_enc_str_coderange(str); + return str; +} + + +/* + * call-seq: + * str.succ! => str + * str.next! => str + * + * Equivalent to String#succ, but modifies the receiver in + * place. + */ + +static VALUE +rb_str_succ_bang(VALUE str) +{ + rb_str_shared_replace(str, rb_str_succ(str)); + + return str; +} + + +/* + * call-seq: + * str.upto(other_str, exclusive=false) {|s| block } => str + * + * Iterates through successive values, starting at str and + * ending at other_str inclusive, passing each value in turn to + * the block. The String#succ method is used to generate + * each value. If optional second argument exclusive is omitted or is false, + * the last value will be included; otherwise it will be excluded. + * + * "a8".upto("b6") {|s| print s, ' ' } + * for s in "a8".."b6" + * print s, ' ' + * end + * + * produces: + * + * a8 a9 b0 b1 b2 b3 b4 b5 b6 + * a8 a9 b0 b1 b2 b3 b4 b5 b6 + */ + +static VALUE +rb_str_upto(int argc, VALUE *argv, VALUE beg) +{ + VALUE end, exclusive; + VALUE current, after_end; + ID succ; + int n, excl; + rb_encoding *enc; + + rb_scan_args(argc, argv, "11", &end, &exclusive); + RETURN_ENUMERATOR(beg, argc, argv); + excl = RTEST(exclusive); + CONST_ID(succ, "succ"); + StringValue(end); + enc = rb_enc_check(beg, end); + if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && + is_ascii_string(beg) && is_ascii_string(end)) { + char c = RSTRING_PTR(beg)[0]; + char e = RSTRING_PTR(end)[0]; + + if (c > e || (excl && c == e)) return beg; + for (;;) { + rb_yield(rb_enc_str_new(&c, 1, enc)); + if (!excl && c == e) break; + c++; + if (excl && c == e) break; + } + return beg; + } + n = rb_str_cmp(beg, end); + if (n > 0 || (excl && n == 0)) return beg; + + after_end = rb_funcall(end, succ, 0, 0); + current = beg; + while (!rb_str_equal(current, after_end)) { + rb_yield(current); + if (!excl && rb_str_equal(current, end)) break; + current = rb_funcall(current, succ, 0, 0); + StringValue(current); + if (excl && rb_str_equal(current, end)) break; + if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0) + break; + } + + return beg; +} + +static VALUE +rb_str_subpat(VALUE str, VALUE re, int nth) +{ + if (rb_reg_search(re, str, 0, 0) >= 0) { + return rb_reg_nth_match(nth, rb_backref_get()); + } + return Qnil; +} + +static VALUE +rb_str_aref(VALUE str, VALUE indx) +{ + long idx; + + switch (TYPE(indx)) { + case T_FIXNUM: + idx = FIX2LONG(indx); + + num_index: + str = rb_str_substr(str, idx, 1); + if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil; + return str; + + case T_REGEXP: + return rb_str_subpat(str, indx, 0); + + case T_STRING: + if (rb_str_index(str, indx, 0) != -1) + return rb_str_dup(indx); + return Qnil; + + default: + /* check if indx is Range */ + { + long beg, len; + VALUE tmp; + + len = str_strlen(str, STR_ENC_GET(str)); + switch (rb_range_beg_len(indx, &beg, &len, len, 0)) { + case Qfalse: + break; + case Qnil: + return Qnil; + default: + tmp = rb_str_substr(str, beg, len); + return tmp; + } + } + idx = NUM2LONG(indx); + goto num_index; + } + return Qnil; /* not reached */ +} + + +/* + * call-seq: + * str[fixnum] => new_str or nil + * str[fixnum, fixnum] => new_str or nil + * str[range] => new_str or nil + * str[regexp] => new_str or nil + * str[regexp, fixnum] => new_str or nil + * str[other_str] => new_str or nil + * str.slice(fixnum) => new_str or nil + * str.slice(fixnum, fixnum) => new_str or nil + * str.slice(range) => new_str or nil + * str.slice(regexp) => new_str or nil + * str.slice(regexp, fixnum) => new_str or nil + * str.slice(other_str) => new_str or nil + * + * Element Reference---If passed a single Fixnum, returns a + * substring of one character at that position. If passed two Fixnum + * objects, returns a substring starting at the offset given by the first, and + * a length given by the second. If given a range, a substring containing + * characters at offsets given by the range is returned. In all three cases, if + * an offset is negative, it is counted from the end of str. Returns + * nil if the initial offset falls outside the string, the length + * is negative, or the beginning of the range is greater than the end. + * + * If a Regexp is supplied, the matching portion of str is + * returned. If a numeric parameter follows the regular expression, that + * component of the MatchData is returned instead. If a + * String is given, that string is returned if it occurs in + * str. In both cases, nil is returned if there is no + * match. + * + * a = "hello there" + * a[1] #=> "e" + * a[1,3] #=> "ell" + * a[1..3] #=> "ell" + * a[-3,2] #=> "er" + * a[-4..-2] #=> "her" + * a[12..-1] #=> nil + * a[-2..-4] #=> "" + * a[/[aeiou](.)\1/] #=> "ell" + * a[/[aeiou](.)\1/, 0] #=> "ell" + * a[/[aeiou](.)\1/, 1] #=> "l" + * a[/[aeiou](.)\1/, 2] #=> nil + * a["lo"] #=> "lo" + * a["bye"] #=> nil + */ + +static VALUE +rb_str_aref_m(int argc, VALUE *argv, VALUE str) +{ + if (argc == 2) { + if (TYPE(argv[0]) == T_REGEXP) { + return rb_str_subpat(str, argv[0], NUM2INT(argv[1])); + } + return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1])); + } + if (argc != 1) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + return rb_str_aref(str, argv[0]); +} + +VALUE +rb_str_drop_bytes(VALUE str, long len) +{ + char *ptr = RSTRING_PTR(str); + long olen = RSTRING_LEN(str), nlen; + + str_modifiable(str); + if (len > olen) len = olen; + nlen = olen - len; + if (nlen <= RSTRING_EMBED_LEN_MAX) { + char *oldptr = ptr; + int fl = (RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED)); + STR_SET_EMBED(str); + STR_SET_EMBED_LEN(str, nlen); + ptr = RSTRING(str)->as.ary; + memmove(ptr, oldptr + len, nlen); + if (fl == STR_NOEMBED) xfree(oldptr); + } + else { + if (!STR_SHARED_P(str)) rb_str_new4(str); + ptr = RSTRING(str)->as.heap.ptr += len; + RSTRING(str)->as.heap.len = nlen; + } + ptr[nlen] = 0; + ENC_CODERANGE_CLEAR(str); + return str; +} + +static void +rb_str_splice_0(VALUE str, long beg, long len, VALUE val) +{ + if (beg == 0 && RSTRING_LEN(val) == 0) { + rb_str_drop_bytes(str, len); + OBJ_INFECT(str, val); + return; + } + + rb_str_modify(str); + if (len < RSTRING_LEN(val)) { + /* expand string */ + RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1); + } + + if (RSTRING_LEN(val) != len) { + memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val), + RSTRING_PTR(str) + beg + len, + RSTRING_LEN(str) - (beg + len)); + } + if (RSTRING_LEN(val) < beg && len < 0) { + MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len); + } + if (RSTRING_LEN(val) > 0) { + memmove(RSTRING_PTR(str)+beg, RSTRING_PTR(val), RSTRING_LEN(val)); + } + STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len); + if (RSTRING_PTR(str)) { + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + } + OBJ_INFECT(str, val); +} + +static void +rb_str_splice(VALUE str, long beg, long len, VALUE val) +{ + long slen; + char *p, *e; + rb_encoding *enc; + int singlebyte = single_byte_optimizable(str); + int cr; + + if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len); + + StringValue(val); + enc = rb_enc_check(str, val); + slen = str_strlen(str, enc); + + if (slen < beg) { + out_of_range: + rb_raise(rb_eIndexError, "index %ld out of string", beg); + } + if (beg < 0) { + if (-beg > slen) { + goto out_of_range; + } + beg += slen; + } + if (slen < len || slen < beg + len) { + len = slen - beg; + } + str_modify_keep_cr(str); + p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte); + if (!p) p = RSTRING_END(str); + e = str_nth(p, RSTRING_END(str), len, enc, singlebyte); + if (!e) e = RSTRING_END(str); + /* error check */ + beg = p - RSTRING_PTR(str); /* physical position */ + len = e - p; /* physical length */ + rb_str_splice_0(str, beg, len, val); + rb_enc_associate(str, enc); + cr = ENC_CODERANGE_AND(ENC_CODERANGE(str), ENC_CODERANGE(val)); + if (cr != ENC_CODERANGE_BROKEN) + ENC_CODERANGE_SET(str, cr); +} + +void +rb_str_update(VALUE str, long beg, long len, VALUE val) +{ + rb_str_splice(str, beg, len, val); +} + +static void +rb_str_subpat_set(VALUE str, VALUE re, int nth, VALUE val) +{ + VALUE match; + long start, end, len; + rb_encoding *enc; + struct re_registers *regs; + + if (rb_reg_search(re, str, 0, 0) < 0) { + rb_raise(rb_eIndexError, "regexp not matched"); + } + match = rb_backref_get(); + regs = RMATCH_REGS(match); + if (nth >= regs->num_regs) { + out_of_range: + rb_raise(rb_eIndexError, "index %d out of regexp", nth); + } + if (nth < 0) { + if (-nth >= regs->num_regs) { + goto out_of_range; + } + nth += regs->num_regs; + } + + start = BEG(nth); + if (start == -1) { + rb_raise(rb_eIndexError, "regexp group %d not matched", nth); + } + end = END(nth); + len = end - start; + StringValue(val); + enc = rb_enc_check(str, val); + rb_str_splice_0(str, start, len, val); + rb_enc_associate(str, enc); +} + +static VALUE +rb_str_aset(VALUE str, VALUE indx, VALUE val) +{ + long idx, beg; + + switch (TYPE(indx)) { + case T_FIXNUM: + idx = FIX2LONG(indx); + num_index: + rb_str_splice(str, idx, 1, val); + return val; + + case T_REGEXP: + rb_str_subpat_set(str, indx, 0, val); + return val; + + case T_STRING: + beg = rb_str_index(str, indx, 0); + if (beg < 0) { + rb_raise(rb_eIndexError, "string not matched"); + } + beg = rb_str_sublen(str, beg); + rb_str_splice(str, beg, str_strlen(indx, 0), val); + return val; + + default: + /* check if indx is Range */ + { + long beg, len; + if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) { + rb_str_splice(str, beg, len, val); + return val; + } + } + idx = NUM2LONG(indx); + goto num_index; + } +} + +/* + * call-seq: + * str[fixnum] = new_str + * str[fixnum, fixnum] = new_str + * str[range] = aString + * str[regexp] = new_str + * str[regexp, fixnum] = new_str + * str[other_str] = new_str + * + * Element Assignment---Replaces some or all of the content of str. The + * portion of the string affected is determined using the same criteria as + * String#[]. If the replacement string is not the same length as + * the text it is replacing, the string will be adjusted accordingly. If the + * regular expression or string is used as the index doesn't match a position + * in the string, IndexError is raised. If the regular expression + * form is used, the optional second Fixnum allows you to specify + * which portion of the match to replace (effectively using the + * MatchData indexing rules. The forms that take a + * Fixnum will raise an IndexError if the value is + * out of range; the Range form will raise a + * RangeError, and the Regexp and String + * forms will silently ignore the assignment. + */ + +static VALUE +rb_str_aset_m(int argc, VALUE *argv, VALUE str) +{ + if (argc == 3) { + if (TYPE(argv[0]) == T_REGEXP) { + rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]); + } + else { + rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]); + } + return argv[2]; + } + if (argc != 2) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + return rb_str_aset(str, argv[0], argv[1]); +} + +/* + * call-seq: + * str.insert(index, other_str) => str + * + * Inserts other_str before the character at the given + * index, modifying str. Negative indices count from the + * end of the string, and insert after the given character. + * The intent is insert aString so that it starts at the given + * index. + * + * "abcd".insert(0, 'X') #=> "Xabcd" + * "abcd".insert(3, 'X') #=> "abcXd" + * "abcd".insert(4, 'X') #=> "abcdX" + * "abcd".insert(-3, 'X') #=> "abXcd" + * "abcd".insert(-1, 'X') #=> "abcdX" + */ + +static VALUE +rb_str_insert(VALUE str, VALUE idx, VALUE str2) +{ + long pos = NUM2LONG(idx); + + if (pos == -1) { + return rb_str_append(str, str2); + } + else if (pos < 0) { + pos++; + } + rb_str_splice(str, pos, 0, str2); + return str; +} + + +/* + * call-seq: + * str.slice!(fixnum) => fixnum or nil + * str.slice!(fixnum, fixnum) => new_str or nil + * str.slice!(range) => new_str or nil + * str.slice!(regexp) => new_str or nil + * str.slice!(other_str) => new_str or nil + * + * Deletes the specified portion from str, and returns the portion + * deleted. + * + * string = "this is a string" + * string.slice!(2) #=> "i" + * string.slice!(3..6) #=> " is " + * string.slice!(/s.*t/) #=> "sa st" + * string.slice!("r") #=> "r" + * string #=> "thing" + */ + +static VALUE +rb_str_slice_bang(int argc, VALUE *argv, VALUE str) +{ + VALUE result; + VALUE buf[3]; + int i; + + if (argc < 1 || 2 < argc) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + for (i=0; i str or nil + * str.sub!(pattern) {|match| block } => str or nil + * + * Performs the substitutions of String#sub in place, + * returning str, or nil if no substitutions were + * performed. + */ + +static VALUE +rb_str_sub_bang(int argc, VALUE *argv, VALUE str) +{ + VALUE pat, repl, hash = Qnil; + int iter = 0; + int tainted = 0; + int untrusted = 0; + long plen; + + if (argc == 1 && rb_block_given_p()) { + iter = 1; + } + else if (argc == 2) { + repl = argv[1]; + hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash"); + if (NIL_P(hash)) { + StringValue(repl); + } + if (OBJ_TAINTED(repl)) tainted = 1; + if (OBJ_UNTRUSTED(repl)) untrusted = 1; + } + else { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + + pat = get_pat(argv[0], 1); + if (rb_reg_search(pat, str, 0, 0) >= 0) { + rb_encoding *enc; + int cr = ENC_CODERANGE(str); + VALUE match = rb_backref_get(); + struct re_registers *regs = RMATCH_REGS(match); + long beg0 = BEG(0); + long end0 = END(0); + + if (iter || !NIL_P(hash)) { + char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str); + + if (iter) { + repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + } + else { + repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0)); + repl = rb_obj_as_string(repl); + } + str_mod_check(str, p, len); + str_frozen_check(str); + } + else { + repl = rb_reg_regsub(repl, str, regs, pat); + } + enc = rb_enc_compatible(str, repl); + if (!enc) { + rb_encoding *str_enc = STR_ENC_GET(str); + if (coderange_scan(RSTRING_PTR(str), beg0, str_enc) != ENC_CODERANGE_7BIT || + coderange_scan(RSTRING_PTR(str)+end0, + RSTRING_LEN(str)-end0, str_enc) != ENC_CODERANGE_7BIT) { + rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s", + rb_enc_name(str_enc), + rb_enc_name(STR_ENC_GET(repl))); + } + enc = STR_ENC_GET(repl); + } + rb_str_modify(str); + rb_enc_associate(str, enc); + if (OBJ_TAINTED(repl)) tainted = 1; + if (OBJ_UNTRUSTED(repl)) untrusted = 1; + if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) { + int cr2 = ENC_CODERANGE(repl); + if (cr2 == ENC_CODERANGE_BROKEN || + (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT)) + cr = ENC_CODERANGE_UNKNOWN; + else + cr = cr2; + } + plen = end0 - beg0; + if (RSTRING_LEN(repl) > plen) { + RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(repl) - plen); + } + if (RSTRING_LEN(repl) != plen) { + memmove(RSTRING_PTR(str) + beg0 + RSTRING_LEN(repl), + RSTRING_PTR(str) + beg0 + plen, + RSTRING_LEN(str) - beg0 - plen); + } + memcpy(RSTRING_PTR(str) + beg0, + RSTRING_PTR(repl), RSTRING_LEN(repl)); + STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(repl) - plen); + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + ENC_CODERANGE_SET(str, cr); + if (tainted) OBJ_TAINT(str); + if (untrusted) OBJ_UNTRUST(str); + + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.sub(pattern, replacement) => new_str + * str.sub(pattern) {|match| block } => new_str + * + * Returns a copy of str with the first occurrence of + * pattern replaced with either replacement or the value of the + * block. The pattern will typically be a Regexp; if it is + * a String then no regular expression metacharacters will be + * interpreted (that is /\d/ will match a digit, but + * '\d' will match a backslash followed by a 'd'). + * + * If the method call specifies replacement, special variables such as + * $& will not be useful, as substitution into the string occurs + * before the pattern match starts. However, the sequences \1, + * \2, \k, etc., may be used. + * + * In the block form, the current match string is passed in as a parameter, and + * variables such as $1, $2, $`, + * $&, and $' will be set appropriately. The value + * returned by the block will be substituted for the match on each call. + * + * The result inherits any tainting in the original string or any supplied + * replacement string. + * + * "hello".sub(/[aeiou]/, '*') #=> "h*llo" + * "hello".sub(/([aeiou])/, '<\1>') #=> "hllo" + * "hello".sub(/./) {|s| s[0].ord.to_s + ' ' } #=> "104 ello" + * "hello".sub(/(?[aeiou])/, '*\k*') #=> "h*e*llo" + */ + +static VALUE +rb_str_sub(int argc, VALUE *argv, VALUE str) +{ + str = rb_str_dup(str); + rb_str_sub_bang(argc, argv, str); + return str; +} + +static VALUE +str_gsub(int argc, VALUE *argv, VALUE str, int bang) +{ + VALUE pat, val, repl, match, dest, hash = Qnil; + struct re_registers *regs; + long beg, n; + long beg0, end0; + long offset, blen, slen, len, last; + int iter = 0; + char *sp, *cp; + int tainted = 0; + rb_encoding *str_enc; + + switch (argc) { + case 1: + RETURN_ENUMERATOR(str, argc, argv); + iter = 1; + break; + case 2: + repl = argv[1]; + hash = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash"); + if (NIL_P(hash)) { + StringValue(repl); + } + if (OBJ_TAINTED(repl)) tainted = 1; + break; + default: + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + + pat = get_pat(argv[0], 1); + beg = rb_reg_search(pat, str, 0, 0); + if (beg < 0) { + if (bang) return Qnil; /* no match, no substitution */ + return rb_str_dup(str); + } + + offset = 0; + n = 0; + blen = RSTRING_LEN(str) + 30; /* len + margin */ + dest = rb_str_buf_new(blen); + sp = RSTRING_PTR(str); + slen = RSTRING_LEN(str); + cp = sp; + str_enc = STR_ENC_GET(str); + + do { + n++; + match = rb_backref_get(); + regs = RMATCH_REGS(match); + beg0 = BEG(0); + end0 = END(0); + if (iter || !NIL_P(hash)) { + if (iter) { + val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + } + else { + val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0))); + val = rb_obj_as_string(val); + } + str_mod_check(str, sp, slen); + if (bang) str_frozen_check(str); + if (val == dest) { /* paranoid check [ruby-dev:24827] */ + rb_raise(rb_eRuntimeError, "block should not cheat"); + } + } + else { + val = rb_reg_regsub(repl, str, regs, pat); + } + + if (OBJ_TAINTED(val)) tainted = 1; + + len = beg - offset; /* copy pre-match substr */ + if (len) { + rb_enc_str_buf_cat(dest, cp, len, str_enc); + } + + rb_str_buf_append(dest, val); + + last = offset; + offset = end0; + if (beg0 == end0) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + if (RSTRING_LEN(str) <= end0) break; + len = rb_enc_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc); + rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc); + offset = end0 + len; + } + cp = RSTRING_PTR(str) + offset; + if (offset > RSTRING_LEN(str)) break; + beg = rb_reg_search(pat, str, offset, 0); + } while (beg >= 0); + if (RSTRING_LEN(str) > offset) { + rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc); + } + rb_reg_search(pat, str, last, 0); + if (bang) { + rb_str_shared_replace(str, dest); + } + else { + RBASIC(dest)->klass = rb_obj_class(str); + OBJ_INFECT(dest, str); + str = dest; + } + + if (tainted) OBJ_TAINT(str); + return str; +} + + +/* + * call-seq: + * str.gsub!(pattern, replacement) => str or nil + * str.gsub!(pattern) {|match| block } => str or nil + * + * Performs the substitutions of String#gsub in place, returning + * str, or nil if no substitutions were performed. + */ + +static VALUE +rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) +{ + return str_gsub(argc, argv, str, 1); +} + + +/* + * call-seq: + * str.gsub(pattern, replacement) => new_str + * str.gsub(pattern) {|match| block } => new_str + * + * Returns a copy of str with all occurrences of pattern + * replaced with either replacement or the value of the block. The + * pattern will typically be a Regexp; if it is a + * String then no regular expression metacharacters will be + * interpreted (that is /\d/ will match a digit, but + * '\d' will match a backslash followed by a 'd'). + * + * If a string is used as the replacement, special variables from the match + * (such as $& and $1) cannot be substituted into it, + * as substitution into the string occurs before the pattern match + * starts. However, the sequences \1, \2, + * \k, and so on may be used to interpolate + * successive groups in the match. + * + * In the block form, the current match string is passed in as a parameter, and + * variables such as $1, $2, $`, + * $&, and $' will be set appropriately. The value + * returned by the block will be substituted for the match on each call. + * + * The result inherits any tainting in the original string or any supplied + * replacement string. + * + * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*" + * "hello".gsub(/([aeiou])/, '<\1>') #=> "hll" + * "hello".gsub(/./) {|s| s[0].ord.to_s + ' '} #=> "104 101 108 108 111 " + * "hello".gsub(/(?[aeiou])/, '{\k}') #=> "h{e}ll{o}" + */ + +static VALUE +rb_str_gsub(int argc, VALUE *argv, VALUE str) +{ + return str_gsub(argc, argv, str, 0); +} + + +/* + * call-seq: + * str.replace(other_str) => str + * + * Replaces the contents and taintedness of str with the corresponding + * values in other_str. + * + * s = "hello" #=> "hello" + * s.replace "world" #=> "world" + */ + +VALUE +rb_str_replace(VALUE str, VALUE str2) +{ + long len; + if (str == str2) return str; + + StringValue(str2); + len = RSTRING_LEN(str2); + if (STR_ASSOC_P(str2)) { + str2 = rb_str_new4(str2); + } + if (str_independent(str) && !STR_EMBED_P(str)) { + xfree(RSTRING_PTR(str)); + } + if (STR_SHARED_P(str2)) { + STR_SET_NOEMBED(str); + RSTRING(str)->as.heap.len = len; + RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2); + FL_SET(str, ELTS_SHARED); + FL_UNSET(str, STR_ASSOC); + RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared; + } + else { + str_replace_shared(str, rb_str_new4(str2)); + } + + OBJ_INFECT(str, str2); + rb_enc_cr_str_exact_copy(str, str2); + return str; +} + +/* + * call-seq: + * string.clear -> string + * + * Makes string empty. + * + * a = "abcde" + * a.clear #=> "" + */ + +static VALUE +rb_str_clear(VALUE str) +{ + /* rb_str_modify() */ /* no need for str_make_independent */ + if (str_independent(str) && !STR_EMBED_P(str)) { + xfree(RSTRING_PTR(str)); + } + STR_SET_EMBED(str); + STR_SET_EMBED_LEN(str, 0); + RSTRING_PTR(str)[0] = 0; + if (rb_enc_asciicompat(STR_ENC_GET(str))) + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + else + ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); + return str; +} + +/* + * call-seq: + * string.chr -> string + * + * Returns a one-character string at the beginning of the string. + * + * a = "abcde" + * a.chr #=> "a" + */ + +static VALUE +rb_str_chr(VALUE str) +{ + return rb_str_substr(str, 0, 1); +} + +/* + * call-seq: + * str.getbyte(index) => 0 .. 255 + * + * returns the indexth byte as an integer. + */ +static VALUE +rb_str_getbyte(VALUE str, VALUE index) +{ + long pos = NUM2LONG(index); + + if (pos < 0) + pos += RSTRING_LEN(str); + if (pos < 0 || RSTRING_LEN(str) <= pos) + return Qnil; + + return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]); +} + +/* + * call-seq: + * str.setbyte(index, int) => int + * + * modifies the indexth byte as int. + */ +static VALUE +rb_str_setbyte(VALUE str, VALUE index, VALUE value) +{ + long pos = NUM2LONG(index); + int byte = NUM2INT(value); + + rb_str_modify(str); + + if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos) + rb_raise(rb_eIndexError, "index %ld out of string", pos); + if (pos < 0) + pos += RSTRING_LEN(str); + + RSTRING_PTR(str)[pos] = byte; + + return value; +} + +/* + * call-seq: + * str.reverse => new_str + * + * Returns a new string with the characters from str in reverse order. + * + * "stressed".reverse #=> "desserts" + */ + +static VALUE +rb_str_reverse(VALUE str) +{ + rb_encoding *enc; + VALUE rev; + char *s, *e, *p; + int single = 1; + + if (RSTRING_LEN(str) <= 1) return rb_str_dup(str); + enc = STR_ENC_GET(str); + rev = rb_str_new5(str, 0, RSTRING_LEN(str)); + s = RSTRING_PTR(str); e = RSTRING_END(str); + p = RSTRING_END(rev); + + if (RSTRING_LEN(str) > 1) { + if (single_byte_optimizable(str)) { + while (s < e) { + *--p = *s++; + } + } + else { + while (s < e) { + int clen = rb_enc_mbclen(s, e, enc); + + if (clen > 1 || (*s & 0x80)) single = 0; + p -= clen; + memcpy(p, s, clen); + s += clen; + } + } + } + STR_SET_LEN(rev, RSTRING_LEN(str)); + OBJ_INFECT(rev, str); + if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) { + if (single) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } + else { + ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); + } + } + rb_enc_cr_str_copy_for_substr(rev, str); + + return rev; +} + + +/* + * call-seq: + * str.reverse! => str + * + * Reverses str in place. + */ + +static VALUE +rb_str_reverse_bang(VALUE str) +{ + if (RSTRING_LEN(str) > 1) { + if (single_byte_optimizable(str)) { + char *s, *e, c; + + str_modify_keep_cr(str); + s = RSTRING_PTR(str); + e = RSTRING_END(str) - 1; + while (s < e) { + c = *s; + *s++ = *e; + *e-- = c; + } + } + else { + rb_str_shared_replace(str, rb_str_reverse(str)); + } + } + return str; +} + + +/* + * call-seq: + * str.include? other_str => true or false + * + * Returns true if str contains the given string or + * character. + * + * "hello".include? "lo" #=> true + * "hello".include? "ol" #=> false + * "hello".include? ?h #=> true + */ + +static VALUE +rb_str_include(VALUE str, VALUE arg) +{ + long i; + + StringValue(arg); + i = rb_str_index(str, arg, 0); + + if (i == -1) return Qfalse; + return Qtrue; +} + + +/* + * call-seq: + * str.to_i(base=10) => integer + * + * Returns the result of interpreting leading characters in str as an + * integer base base (between 2 and 36). Extraneous characters past the + * end of a valid number are ignored. If there is not a valid number at the + * start of str, 0 is returned. This method never raises an + * exception. + * + * "12345".to_i #=> 12345 + * "99 red balloons".to_i #=> 99 + * "0a".to_i #=> 0 + * "0a".to_i(16) #=> 10 + * "hello".to_i #=> 0 + * "1100101".to_i(2) #=> 101 + * "1100101".to_i(8) #=> 294977 + * "1100101".to_i(10) #=> 1100101 + * "1100101".to_i(16) #=> 17826049 + */ + +static VALUE +rb_str_to_i(int argc, VALUE *argv, VALUE str) +{ + int base; + + if (argc == 0) base = 10; + else { + VALUE b; + + rb_scan_args(argc, argv, "01", &b); + base = NUM2INT(b); + } + if (base < 0) { + rb_raise(rb_eArgError, "invalid radix %d", base); + } + return rb_str_to_inum(str, base, Qfalse); +} + + +/* + * call-seq: + * str.to_f => float + * + * Returns the result of interpreting leading characters in str as a + * floating point number. Extraneous characters past the end of a valid number + * are ignored. If there is not a valid number at the start of str, + * 0.0 is returned. This method never raises an exception. + * + * "123.45e1".to_f #=> 1234.5 + * "45.67 degrees".to_f #=> 45.67 + * "thx1138".to_f #=> 0.0 + */ + +static VALUE +rb_str_to_f(VALUE str) +{ + return DBL2NUM(rb_str_to_dbl(str, Qfalse)); +} + + +/* + * call-seq: + * str.to_s => str + * str.to_str => str + * + * Returns the receiver. + */ + +static VALUE +rb_str_to_s(VALUE str) +{ + if (rb_obj_class(str) != rb_cString) { + return str_duplicate(rb_cString, str); + } + return str; +} + +static void +str_cat_char(VALUE str, unsigned int c, rb_encoding *enc) +{ + char s[RUBY_MAX_CHAR_LEN]; + int n = rb_enc_codelen(c, enc); + + rb_enc_mbcput(c, s, enc); + rb_enc_str_buf_cat(str, s, n, enc); +} + +static void +prefix_escape(VALUE str, unsigned int c, rb_encoding *enc) +{ + str_cat_char(str, '\\', enc); + str_cat_char(str, c, enc); +} + +/* + * call-seq: + * str.inspect => string + * + * Returns a printable version of _str_, surrounded by quote marks, + * with special characters escaped. + * + * str = "hello" + * str[3] = "\b" + * str.inspect #=> "\"hel\\bo\"" + */ + +VALUE +rb_str_inspect(VALUE str) +{ + rb_encoding *enc = STR_ENC_GET(str); + char *p, *pend; + VALUE result = rb_str_buf_new(0); + + if (!rb_enc_asciicompat(enc)) enc = rb_usascii_encoding(); + rb_enc_associate(result, enc); + str_cat_char(result, '"', enc); + p = RSTRING_PTR(str); pend = RSTRING_END(str); + while (p < pend) { + unsigned int c, cc; + int n; + + n = rb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND_P(n)) { + p++; + n = 1; + goto escape_codepoint; + } + n = MBCLEN_CHARFOUND_LEN(n); + + c = rb_enc_codepoint(p, pend, enc); + n = rb_enc_codelen(c, enc); + + p += n; + if (c == '"'|| c == '\\' || + (c == '#' && + p < pend && + MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) && + (cc = rb_enc_codepoint(p,pend,enc), + (cc == '$' || cc == '@' || cc == '{')))) { + prefix_escape(result, c, enc); + } + else if (c == '\n') { + prefix_escape(result, 'n', enc); + } + else if (c == '\r') { + prefix_escape(result, 'r', enc); + } + else if (c == '\t') { + prefix_escape(result, 't', enc); + } + else if (c == '\f') { + prefix_escape(result, 'f', enc); + } + else if (c == '\013') { + prefix_escape(result, 'v', enc); + } + else if (c == '\010') { + prefix_escape(result, 'b', enc); + } + else if (c == '\007') { + prefix_escape(result, 'a', enc); + } + else if (c == 033) { + prefix_escape(result, 'e', enc); + } + else if (rb_enc_isprint(c, enc)) { + rb_enc_str_buf_cat(result, p-n, n, enc); + } + else { + char buf[5]; + char *s; + char *q; + + escape_codepoint: + for (q = p-n; q < p; q++) { + s = buf; + sprintf(buf, "\\x%02X", *q & 0377); + while (*s) { + str_cat_char(result, *s++, enc); + } + } + } + } + str_cat_char(result, '"', enc); + + OBJ_INFECT(result, str); + return result; +} + +#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) + +/* + * call-seq: + * str.dump => new_str + * + * Produces a version of str with all nonprinting characters replaced by + * \nnn notation and all special characters escaped. + */ + +VALUE +rb_str_dump(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + long len; + const char *p, *pend; + char *q, *qend; + VALUE result; + int u8 = (enc == rb_utf8_encoding()); + + len = 2; /* "" */ + p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); + while (p < pend) { + unsigned char c = *p++; + switch (c) { + case '"': case '\\': + case '\n': case '\r': + case '\t': case '\f': + case '\013': case '\010': case '\007': case '\033': + len += 2; + break; + + case '#': + len += IS_EVSTR(p, pend) ? 2 : 1; + break; + + default: + if (ISPRINT(c)) { + len++; + } + else { + if (u8) { /* \u{NN} */ + char buf[32]; + int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; + if (MBCLEN_CHARFOUND_P(n)) { + int cc = rb_enc_codepoint(p-1, pend, enc); + sprintf(buf, "%x", cc); + len += strlen(buf)+4; + p += n; + break; + } + } + len += 4; /* \xNN */ + } + break; + } + } + if (!rb_enc_asciicompat(enc)) { + len += 19; /* ".force_encoding('')" */ + len += strlen(enc->name); + } + + result = rb_str_new5(str, 0, len); + p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); + q = RSTRING_PTR(result); qend = q + len + 1; + + *q++ = '"'; + while (p < pend) { + unsigned char c = *p++; + + if (c == '"' || c == '\\') { + *q++ = '\\'; + *q++ = c; + } + else if (c == '#') { + if (IS_EVSTR(p, pend)) *q++ = '\\'; + *q++ = '#'; + } + else if (c == '\n') { + *q++ = '\\'; + *q++ = 'n'; + } + else if (c == '\r') { + *q++ = '\\'; + *q++ = 'r'; + } + else if (c == '\t') { + *q++ = '\\'; + *q++ = 't'; + } + else if (c == '\f') { + *q++ = '\\'; + *q++ = 'f'; + } + else if (c == '\013') { + *q++ = '\\'; + *q++ = 'v'; + } + else if (c == '\010') { + *q++ = '\\'; + *q++ = 'b'; + } + else if (c == '\007') { + *q++ = '\\'; + *q++ = 'a'; + } + else if (c == '\033') { + *q++ = '\\'; + *q++ = 'e'; + } + else if (ISPRINT(c)) { + *q++ = c; + } + else { + *q++ = '\\'; + if (u8) { + int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1; + if (MBCLEN_CHARFOUND_P(n)) { + int cc = rb_enc_codepoint(p-1, pend, enc); + p += n; + sprintf(q, "u{%x}", cc); + q += strlen(q); + continue; + } + } + sprintf(q, "x%02X", c); + q += 3; + } + } + *q++ = '"'; + *q = '\0'; + if (!rb_enc_asciicompat(enc)) { + sprintf(q, ".force_encoding(\"%s\")", enc->name); + enc = rb_ascii8bit_encoding(); + } + OBJ_INFECT(result, str); + /* result from dump is ASCII */ + rb_enc_associate(result, enc); + ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT); + return result; +} + + +static void +rb_str_check_dummy_enc(rb_encoding *enc) +{ + if (rb_enc_dummy_p(enc)) { + rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s", + rb_enc_name(enc)); + } +} + +/* + * call-seq: + * str.upcase! => str or nil + * + * Upcases the contents of str, returning nil if no changes + * were made. + * Note: case replacement is effective only in ASCII region. + */ + +static VALUE +rb_str_upcase_bang(VALUE str) +{ + rb_encoding *enc; + char *s, *send; + int modify = 0; + + str_modify_keep_cr(str); + enc = STR_ENC_GET(str); + rb_str_check_dummy_enc(enc); + s = RSTRING_PTR(str); send = RSTRING_END(str); + if (single_byte_optimizable(str)) { + while (s < send) { + unsigned int c = *(unsigned char*)s; + + if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') { + *s = 'A' + (c - 'a'); + modify = 1; + } + s++; + } + } + else { + int ascompat = rb_enc_asciicompat(enc); + + while (s < send) { + unsigned int c; + + if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') { + *s = 'A' + (c - 'a'); + modify = 1; + } + s++; + } + else { + c = rb_enc_codepoint(s, send, enc); + if (rb_enc_islower(c, enc)) { + /* assuming toupper returns codepoint with same size */ + rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); + modify = 1; + } + s += rb_enc_codelen(c, enc); + } + } + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.upcase => new_str + * + * Returns a copy of str with all lowercase letters replaced with their + * uppercase counterparts. The operation is locale insensitive---only + * characters ``a'' to ``z'' are affected. + * Note: case replacement is effective only in ASCII region. + * + * "hEllO".upcase #=> "HELLO" + */ + +static VALUE +rb_str_upcase(VALUE str) +{ + str = rb_str_dup(str); + rb_str_upcase_bang(str); + return str; +} + + +/* + * call-seq: + * str.downcase! => str or nil + * + * Downcases the contents of str, returning nil if no + * changes were made. + * Note: case replacement is effective only in ASCII region. + */ + +static VALUE +rb_str_downcase_bang(VALUE str) +{ + rb_encoding *enc; + char *s, *send; + int modify = 0; + + str_modify_keep_cr(str); + enc = STR_ENC_GET(str); + rb_str_check_dummy_enc(enc); + s = RSTRING_PTR(str); send = RSTRING_END(str); + if (single_byte_optimizable(str)) { + while (s < send) { + unsigned int c = *(unsigned char*)s; + + if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { + *s = 'a' + (c - 'A'); + modify = 1; + } + s++; + } + } + else { + int ascompat = rb_enc_asciicompat(enc); + + while (s < send) { + unsigned int c; + + if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') { + *s = 'a' + (c - 'A'); + modify = 1; + } + s++; + } + else { + c = rb_enc_codepoint(s, send, enc); + if (rb_enc_isupper(c, enc)) { + /* assuming toupper returns codepoint with same size */ + rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); + modify = 1; + } + s += rb_enc_codelen(c, enc); + } + } + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.downcase => new_str + * + * Returns a copy of str with all uppercase letters replaced with their + * lowercase counterparts. The operation is locale insensitive---only + * characters ``A'' to ``Z'' are affected. + * Note: case replacement is effective only in ASCII region. + * + * "hEllO".downcase #=> "hello" + */ + +static VALUE +rb_str_downcase(VALUE str) +{ + str = rb_str_dup(str); + rb_str_downcase_bang(str); + return str; +} + + +/* + * call-seq: + * str.capitalize! => str or nil + * + * Modifies str by converting the first character to uppercase and the + * remainder to lowercase. Returns nil if no changes are made. + * Note: case conversion is effective only in ASCII region. + * + * a = "hello" + * a.capitalize! #=> "Hello" + * a #=> "Hello" + * a.capitalize! #=> nil + */ + +static VALUE +rb_str_capitalize_bang(VALUE str) +{ + rb_encoding *enc; + char *s, *send; + int modify = 0; + unsigned int c; + + str_modify_keep_cr(str); + enc = STR_ENC_GET(str); + rb_str_check_dummy_enc(enc); + if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; + s = RSTRING_PTR(str); send = RSTRING_END(str); + + c = rb_enc_codepoint(s, send, enc); + if (rb_enc_islower(c, enc)) { + rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); + modify = 1; + } + s += rb_enc_codelen(c, enc); + while (s < send) { + c = rb_enc_codepoint(s, send, enc); + if (rb_enc_isupper(c, enc)) { + rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); + modify = 1; + } + s += rb_enc_codelen(c, enc); + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.capitalize => new_str + * + * Returns a copy of str with the first character converted to uppercase + * and the remainder to lowercase. + * Note: case conversion is effective only in ASCII region. + * + * "hello".capitalize #=> "Hello" + * "HELLO".capitalize #=> "Hello" + * "123ABC".capitalize #=> "123abc" + */ + +static VALUE +rb_str_capitalize(VALUE str) +{ + str = rb_str_dup(str); + rb_str_capitalize_bang(str); + return str; +} + + +/* + * call-seq: +* str.swapcase! => str or nil + * + * Equivalent to String#swapcase, but modifies the receiver in + * place, returning str, or nil if no changes were made. + * Note: case conversion is effective only in ASCII region. + */ + +static VALUE +rb_str_swapcase_bang(VALUE str) +{ + rb_encoding *enc; + char *s, *send; + int modify = 0; + + str_modify_keep_cr(str); + enc = STR_ENC_GET(str); + rb_str_check_dummy_enc(enc); + s = RSTRING_PTR(str); send = RSTRING_END(str); + while (s < send) { + unsigned int c = rb_enc_codepoint(s, send, enc); + + if (rb_enc_isupper(c, enc)) { + /* assuming toupper returns codepoint with same size */ + rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc); + modify = 1; + } + else if (rb_enc_islower(c, enc)) { + /* assuming tolower returns codepoint with same size */ + rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc); + modify = 1; + } + s += rb_enc_mbclen(s, send, enc); + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.swapcase => new_str + * + * Returns a copy of str with uppercase alphabetic characters converted + * to lowercase and lowercase characters converted to uppercase. + * Note: case conversion is effective only in ASCII region. + * + * "Hello".swapcase #=> "hELLO" + * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11" + */ + +static VALUE +rb_str_swapcase(VALUE str) +{ + str = rb_str_dup(str); + rb_str_swapcase_bang(str); + return str; +} + +typedef unsigned char *USTR; + +struct tr { + int gen; + unsigned int now, max; + char *p, *pend; +}; + +static unsigned int +trnext(struct tr *t, rb_encoding *enc) +{ + for (;;) { + if (!t->gen) { + if (t->p == t->pend) return -1; + if (t->p < t->pend - 1 && *t->p == '\\') { + t->p++; + } + t->now = rb_enc_codepoint(t->p, t->pend, enc); + t->p += rb_enc_codelen(t->now, enc); + if (t->p < t->pend - 1 && *t->p == '-') { + t->p++; + if (t->p < t->pend) { + unsigned int c = rb_enc_codepoint(t->p, t->pend, enc); + t->p += rb_enc_codelen(c, enc); + if (t->now > c) continue; + t->gen = 1; + t->max = c; + } + } + return t->now; + } + else if (++t->now < t->max) { + return t->now; + } + else { + t->gen = 0; + return t->max; + } + } +} + +static VALUE rb_str_delete_bang(int,VALUE*,VALUE); + +static VALUE +tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) +{ + const unsigned int errc = -1; + unsigned int trans[256]; + rb_encoding *enc, *e1, *e2; + struct tr trsrc, trrepl; + int cflag = 0; + unsigned int c, c0; + int last = 0, modify = 0, i, l; + char *s, *send; + VALUE hash = 0; + int singlebyte = single_byte_optimizable(str); + int cr; + +#define CHECK_IF_ASCII(c) \ + (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \ + (cr = ENC_CODERANGE_VALID) : 0) + + StringValue(src); + StringValue(repl); + if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; + if (RSTRING_LEN(repl) == 0) { + return rb_str_delete_bang(1, &src, str); + } + + cr = ENC_CODERANGE(str); + e1 = rb_enc_check(str, src); + e2 = rb_enc_check(str, repl); + if (e1 == e2) { + enc = e1; + } + else { + enc = rb_enc_check(src, repl); + } + trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src); + if (RSTRING_LEN(src) > 1 && + rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' && + trsrc.p + l < trsrc.pend) { + cflag = 1; + trsrc.p += l; + } + trrepl.p = RSTRING_PTR(repl); + trrepl.pend = trrepl.p + RSTRING_LEN(repl); + trsrc.gen = trrepl.gen = 0; + trsrc.now = trrepl.now = 0; + trsrc.max = trrepl.max = 0; + + if (cflag) { + for (i=0; i<256; i++) { + trans[i] = 1; + } + while ((c = trnext(&trsrc, enc)) != errc) { + if (c < 256) { + trans[c] = errc; + } + else { + if (!hash) hash = rb_hash_new(); + rb_hash_aset(hash, UINT2NUM(c), Qtrue); + } + } + while ((c = trnext(&trrepl, enc)) != errc) + /* retrieve last replacer */; + last = trrepl.now; + for (i=0; i<256; i++) { + if (trans[i] != errc) { + trans[i] = last; + } + } + } + else { + unsigned int r; + + for (i=0; i<256; i++) { + trans[i] = errc; + } + while ((c = trnext(&trsrc, enc)) != errc) { + r = trnext(&trrepl, enc); + if (r == errc) r = trrepl.now; + if (c < 256) { + trans[c] = r; + if (rb_enc_codelen(r, enc) != 1) singlebyte = 0; + } + else { + if (!hash) hash = rb_hash_new(); + rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r)); + } + } + } + + if (cr == ENC_CODERANGE_VALID) + cr = ENC_CODERANGE_7BIT; + str_modify_keep_cr(str); + s = RSTRING_PTR(str); send = RSTRING_END(str); + if (sflag) { + int clen, tlen, max = RSTRING_LEN(str); + int offset, save = -1; + char *buf = ALLOC_N(char, max), *t = buf; + + while (s < send) { + int may_modify = 0; + c0 = c = rb_enc_codepoint(s, send, e1); + clen = rb_enc_codelen(c, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); + + s += clen; + if (c < 256) { + c = trans[c]; + } + else if (hash) { + VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c)); + if (NIL_P(tmp)) { + if (cflag) c = last; + else c = errc; + } + else if (cflag) c = errc; + else c = NUM2INT(tmp); + } + else { + c = errc; + } + if (c != -1) { + if (save == c) { + CHECK_IF_ASCII(c); + continue; + } + save = c; + tlen = rb_enc_codelen(c, enc); + modify = 1; + } + else { + save = -1; + c = c0; + if (enc != e1) may_modify = 1; + } + while (t - buf + tlen >= max) { + offset = t - buf; + max *= 2; + REALLOC_N(buf, char, max); + t = buf + offset; + } + rb_enc_mbcput(c, t, enc); + if (may_modify && memcmp(s, t, tlen) != 0) { + modify = 1; + } + CHECK_IF_ASCII(c); + t += tlen; + } + *t = '\0'; + RSTRING(str)->as.heap.ptr = buf; + RSTRING(str)->as.heap.len = t - buf; + STR_SET_NOEMBED(str); + RSTRING(str)->as.heap.aux.capa = max; + } + else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) { + while (s < send) { + c = (unsigned char)*s; + if (trans[c] != errc) { + if (!cflag) { + c = trans[c]; + *s = c; + modify = 1; + } + else { + *s = last; + modify = 1; + } + } + CHECK_IF_ASCII(c); + s++; + } + } + else { + int clen, tlen, max = RSTRING_LEN(str) * 1.2; + int offset; + char *buf = ALLOC_N(char, max), *t = buf; + + while (s < send) { + int may_modify = 0; + c0 = c = rb_enc_codepoint(s, send, e1); + clen = rb_enc_codelen(c, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); + + if (c < 256) { + c = trans[c]; + } + else if (hash) { + VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c)); + if (NIL_P(tmp)) { + if (cflag) c = last; + else c = errc; + } + else if (cflag) c = errc; + else c = NUM2INT(tmp); + } + else { + c = errc; + } + if (c != errc) { + tlen = rb_enc_codelen(c, enc); + modify = 1; + } + else { + c = c0; + if (enc != e1) may_modify = 1; + } + while (t - buf + tlen >= max) { + offset = t - buf; + max *= 2; + REALLOC_N(buf, char, max); + t = buf + offset; + } + if (s != t) { + rb_enc_mbcput(c, t, enc); + if (may_modify && memcmp(s, t, tlen) != 0) { + modify = 1; + } + } + CHECK_IF_ASCII(c); + s += clen; + t += tlen; + } + if (!STR_EMBED_P(str)) { + xfree(RSTRING(str)->as.heap.ptr); + } + *t = '\0'; + RSTRING(str)->as.heap.ptr = buf; + RSTRING(str)->as.heap.len = t - buf; + STR_SET_NOEMBED(str); + RSTRING(str)->as.heap.aux.capa = max; + } + + if (modify) { + if (cr != ENC_CODERANGE_BROKEN) + ENC_CODERANGE_SET(str, cr); + rb_enc_associate(str, enc); + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.tr!(from_str, to_str) => str or nil + * + * Translates str in place, using the same rules as + * String#tr. Returns str, or nil if no + * changes were made. + */ + +static VALUE +rb_str_tr_bang(VALUE str, VALUE src, VALUE repl) +{ + return tr_trans(str, src, repl, 0); +} + + +/* + * call-seq: + * str.tr(from_str, to_str) => new_str + * + * Returns a copy of str with the characters in from_str replaced + * by the corresponding characters in to_str. If to_str is + * shorter than from_str, it is padded with its last character. Both + * strings may use the c1--c2 notation to denote ranges of characters, and + * from_str may start with a ^, which denotes all + * characters except those listed. + * + * "hello".tr('aeiou', '*') #=> "h*ll*" + * "hello".tr('^aeiou', '*') #=> "*e**o" + * "hello".tr('el', 'ip') #=> "hippo" + * "hello".tr('a-y', 'b-z') #=> "ifmmp" + */ + +static VALUE +rb_str_tr(VALUE str, VALUE src, VALUE repl) +{ + str = rb_str_dup(str); + tr_trans(str, src, repl, 0); + return str; +} + +static void +tr_setup_table(VALUE str, char stable[256], int first, + VALUE *tablep, VALUE *ctablep, rb_encoding *enc) +{ + const unsigned int errc = -1; + char buf[256]; + struct tr tr; + unsigned int c; + VALUE table = 0, ptable = 0; + int i, l, cflag = 0; + + tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str); + tr.gen = tr.now = tr.max = 0; + + if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') { + cflag = 1; + tr.p += l; + } + if (first) { + for (i=0; i<256; i++) { + stable[i] = 1; + } + } + for (i=0; i<256; i++) { + buf[i] = cflag; + } + + while ((c = trnext(&tr, enc)) != errc) { + if (c < 256) { + buf[c & 0xff] = !cflag; + } + else { + VALUE key = UINT2NUM(c); + + if (!table) { + table = rb_hash_new(); + if (cflag) { + ptable = *ctablep; + *ctablep = table; + } + else { + ptable = *tablep; + *tablep = table; + } + } + if (!ptable || !NIL_P(rb_hash_aref(ptable, key))) { + rb_hash_aset(table, key, Qtrue); + } + } + } + for (i=0; i<256; i++) { + stable[i] = stable[i] && buf[i]; + } +} + + +static int +tr_find(unsigned int c, char table[256], VALUE del, VALUE nodel) +{ + if (c < 256) { + return table[c] ? Qtrue : Qfalse; + } + else { + VALUE v = UINT2NUM(c); + + if (del && !NIL_P(rb_hash_lookup(del, v))) { + if (!nodel || NIL_P(rb_hash_lookup(nodel, v))) { + return Qtrue; + } + } + return Qfalse; + } +} + +/* + * call-seq: + * str.delete!([other_str]+) => str or nil + * + * Performs a delete operation in place, returning str, or + * nil if str was not modified. + */ + +static VALUE +rb_str_delete_bang(int argc, VALUE *argv, VALUE str) +{ + char squeez[256]; + rb_encoding *enc = 0; + char *s, *send, *t; + VALUE del = 0, nodel = 0; + int modify = 0; + int i, ascompat, cr; + + if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; + if (argc < 1) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + for (i=0; i new_str + * + * Returns a copy of str with all characters in the intersection of its + * arguments deleted. Uses the same rules for building the set of characters as + * String#count. + * + * "hello".delete "l","lo" #=> "heo" + * "hello".delete "lo" #=> "he" + * "hello".delete "aeiou", "^e" #=> "hell" + * "hello".delete "ej-m" #=> "ho" + */ + +static VALUE +rb_str_delete(int argc, VALUE *argv, VALUE str) +{ + str = rb_str_dup(str); + rb_str_delete_bang(argc, argv, str); + return str; +} + + +/* + * call-seq: + * str.squeeze!([other_str]*) => str or nil + * + * Squeezes str in place, returning either str, or + * nil if no changes were made. + */ + +static VALUE +rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) +{ + char squeez[256]; + rb_encoding *enc = 0; + VALUE del = 0, nodel = 0; + char *s, *send, *t; + int save, modify = 0; + int i; + int ascompat, singlebyte = single_byte_optimizable(str); + + if (argc == 0) { + enc = STR_ENC_GET(str); + } + else { + for (i=0; i 0 && !squeez[c])) { + *t++ = save = c; + } + } + } else { + while (s < send) { + unsigned int c; + int clen; + + if (ascompat && (c = *(unsigned char*)s) < 0x80) { + if (c != save || (argc > 0 && !squeez[c])) { + *t++ = save = c; + } + s++; + } + else { + c = rb_enc_codepoint(s, send, enc); + clen = rb_enc_codelen(c, enc); + + if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) { + if (t != s) rb_enc_mbcput(c, t, enc); + save = c; + t += clen; + } + s += clen; + } + } + } + + *t = '\0'; + if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) { + STR_SET_LEN(str, t - RSTRING_PTR(str)); + modify = 1; + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.squeeze([other_str]*) => new_str + * + * Builds a set of characters from the other_str parameter(s) using the + * procedure described for String#count. Returns a new string + * where runs of the same character that occur in this set are replaced by a + * single character. If no arguments are given, all runs of identical + * characters are replaced by a single character. + * + * "yellow moon".squeeze #=> "yelow mon" + * " now is the".squeeze(" ") #=> " now is the" + * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + */ + +static VALUE +rb_str_squeeze(int argc, VALUE *argv, VALUE str) +{ + str = rb_str_dup(str); + rb_str_squeeze_bang(argc, argv, str); + return str; +} + + +/* + * call-seq: + * str.tr_s!(from_str, to_str) => str or nil + * + * Performs String#tr_s processing on str in place, + * returning str, or nil if no changes were made. + */ + +static VALUE +rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl) +{ + return tr_trans(str, src, repl, 1); +} + + +/* + * call-seq: + * str.tr_s(from_str, to_str) => new_str + * + * Processes a copy of str as described under String#tr, + * then removes duplicate characters in regions that were affected by the + * translation. + * + * "hello".tr_s('l', 'r') #=> "hero" + * "hello".tr_s('el', '*') #=> "h*o" + * "hello".tr_s('el', 'hx') #=> "hhxo" + */ + +static VALUE +rb_str_tr_s(VALUE str, VALUE src, VALUE repl) +{ + str = rb_str_dup(str); + tr_trans(str, src, repl, 1); + return str; +} + + +/* + * call-seq: + * str.count([other_str]+) => fixnum + * + * Each other_str parameter defines a set of characters to count. The + * intersection of these sets defines the characters to count in + * str. Any other_str that starts with a caret (^) is + * negated. The sequence c1--c2 means all characters between c1 and c2. + * + * a = "hello world" + * a.count "lo" #=> 5 + * a.count "lo", "o" #=> 2 + * a.count "hello", "^l" #=> 4 + * a.count "ej-m" #=> 4 + */ + +static VALUE +rb_str_count(int argc, VALUE *argv, VALUE str) +{ + char table[256]; + rb_encoding *enc = 0; + VALUE del = 0, nodel = 0; + char *s, *send; + int i; + int ascompat; + + if (argc < 1) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + for (i=0; i anArray + * + * Divides str into substrings based on a delimiter, returning an array + * of these substrings. + * + * If pattern is a String, then its contents are used as + * the delimiter when splitting str. If pattern is a single + * space, str is split on whitespace, with leading whitespace and runs + * of contiguous whitespace characters ignored. + * + * If pattern is a Regexp, str is divided where the + * pattern matches. Whenever the pattern matches a zero-length string, + * str is split into individual characters. If pattern contains + * groups, the respective matches will be returned in the array as well. + * + * If pattern is omitted, the value of $; is used. If + * $; is nil (which is the default), str is + * split on whitespace as if ` ' were specified. + * + * If the limit parameter is omitted, trailing null fields are + * suppressed. If limit is a positive number, at most that number of + * fields will be returned (if limit is 1, the entire + * string is returned as the only entry in an array). If negative, there is no + * limit to the number of fields returned, and trailing null fields are not + * suppressed. + * + * " now's the time".split #=> ["now's", "the", "time"] + * " now's the time".split(' ') #=> ["now's", "the", "time"] + * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] + * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"] + * "hello".split(//) #=> ["h", "e", "l", "l", "o"] + * "hello".split(//, 3) #=> ["h", "e", "llo"] + * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"] + * + * "mellow yellow".split("ello") #=> ["m", "w y", "w"] + * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] + * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] + * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] + */ + +static VALUE +rb_str_split_m(int argc, VALUE *argv, VALUE str) +{ + rb_encoding *enc; + VALUE spat; + VALUE limit; + enum {awk, string, regexp} split_type; + long beg, end, i = 0; + int lim = 0; + VALUE result, tmp; + + if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { + lim = NUM2INT(limit); + if (lim <= 0) limit = Qnil; + else if (lim == 1) { + if (RSTRING_LEN(str) == 0) + return rb_ary_new2(0); + return rb_ary_new3(1, str); + } + i = 1; + } + + enc = STR_ENC_GET(str); + if (NIL_P(spat)) { + if (!NIL_P(rb_fs)) { + spat = rb_fs; + goto fs_set; + } + split_type = awk; + } + else { + fs_set: + if (TYPE(spat) == T_STRING) { + rb_encoding *enc2 = STR_ENC_GET(spat); + + split_type = string; + if (RSTRING_LEN(spat) == 0) { + /* Special case - split into chars */ + spat = rb_reg_regcomp(spat); + split_type = regexp; + } + else if (rb_enc_asciicompat(enc2) == 1) { + if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ + split_type = awk; + } + } + else { + int l; + if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' && + RSTRING_LEN(spat) == l) { + split_type = awk; + } + } + } + else { + spat = get_pat(spat, 1); + split_type = regexp; + } + } + + result = rb_ary_new(); + beg = 0; + if (split_type == awk) { + char *ptr = RSTRING_PTR(str); + char *eptr = RSTRING_END(str); + char *bptr = ptr; + int skip = 1; + unsigned int c; + + end = beg; + while (ptr < eptr) { + c = rb_enc_codepoint(ptr, eptr, enc); + ptr += rb_enc_mbclen(ptr, eptr, enc); + if (skip) { + if (rb_enc_isspace(c, enc)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!NIL_P(limit) && lim <= i) break; + } + } + else { + if (rb_enc_isspace(c, enc)) { + rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); + skip = 1; + beg = ptr - bptr; + if (!NIL_P(limit)) ++i; + } + else { + end = ptr - bptr; + } + } + } + } + else if (split_type == string) { + char *ptr = RSTRING_PTR(str); + char *eptr = RSTRING_END(str); + char *sptr = RSTRING_PTR(spat); + int slen = RSTRING_LEN(spat); + + if (is_broken_string(str)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str))); + } + if (is_broken_string(spat)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat))); + } + enc = rb_enc_check(str, spat); + while (ptr < eptr && + (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) { + /* Check we are at the start of a char */ + char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc); + if (t != ptr + end) { + ptr = t; + continue; + } + rb_ary_push(result, rb_str_subseq(str, ptr - RSTRING_PTR(str), end)); + ptr += end + slen; + if (!NIL_P(limit) && lim <= ++i) break; + } + beg = ptr - RSTRING_PTR(str); + } + else { + long start = beg; + long idx; + int last_null = 0; + struct re_registers *regs; + + while ((end = rb_reg_search(spat, str, start, 0)) >= 0) { + regs = RMATCH_REGS(rb_backref_get()); + if (start == end && BEG(0) == END(0)) { + if (!RSTRING_PTR(str)) { + rb_ary_push(result, rb_str_new("", 0)); + break; + } + else if (last_null == 1) { + rb_ary_push(result, rb_str_subseq(str, beg, + rb_enc_mbclen(RSTRING_PTR(str)+beg, + RSTRING_END(str), + enc))); + beg = start; + } + else { + if (RSTRING_PTR(str)+start == RSTRING_END(str)) + start++; + else + start += rb_enc_mbclen(RSTRING_PTR(str)+start,RSTRING_END(str),enc); + last_null = 1; + continue; + } + } + else { + rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); + beg = start = END(0); + } + last_null = 0; + + for (idx=1; idx < regs->num_regs; idx++) { + if (BEG(idx) == -1) continue; + if (BEG(idx) == END(idx)) + tmp = rb_str_new5(str, 0, 0); + else + tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx)); + rb_ary_push(result, tmp); + } + if (!NIL_P(limit) && lim <= ++i) break; + } + } + if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) { + if (RSTRING_LEN(str) == beg) + tmp = rb_str_new5(str, 0, 0); + else + tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg); + rb_ary_push(result, tmp); + } + if (NIL_P(limit) && lim == 0) { + long len; + while ((len = RARRAY_LEN(result)) > 0 && + (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) + rb_ary_pop(result); + } + + return result; +} + +VALUE +rb_str_split(VALUE str, const char *sep0) +{ + VALUE sep; + + StringValue(str); + sep = rb_str_new2(sep0); + return rb_str_split_m(1, &sep, str); +} + + +/* + * Document-method: lines + * call-seq: + * str.lines(separator=$/) => anEnumerator + * str.lines(separator=$/) {|substr| block } => str + * + * Returns an enumerator that gives each line in the string. If a block is + * given, it iterates over each line in the string. + * + * "foo\nbar\n".lines.to_a #=> ["foo\n", "bar\n"] + * "foo\nb ar".lines.sort #=> ["b ar", "foo\n"] + */ + +/* + * Document-method: each_line + * call-seq: + * str.each_line(separator=$/) {|substr| block } => str + * + * Splits str using the supplied parameter as the record separator + * ($/ by default), passing each substring in turn to the supplied + * block. If a zero-length record separator is supplied, the string is split + * into paragraphs delimited by multiple successive newlines. + * + * print "Example one\n" + * "hello\nworld".each {|s| p s} + * print "Example two\n" + * "hello\nworld".each('l') {|s| p s} + * print "Example three\n" + * "hello\n\n\nworld".each('') {|s| p s} + * + * produces: + * + * Example one + * "hello\n" + * "world" + * Example two + * "hel" + * "l" + * "o\nworl" + * "d" + * Example three + * "hello\n\n\n" + * "world" + */ + +static VALUE +rb_str_each_line(int argc, VALUE *argv, VALUE str) +{ + rb_encoding *enc; + VALUE rs; + unsigned int newline; + char *p, *pend, *s, *ptr; + long len, rslen; + VALUE line; + int n; + VALUE orig = str; + + if (argc == 0) { + rs = rb_rs; + } + else { + rb_scan_args(argc, argv, "01", &rs); + } + RETURN_ENUMERATOR(str, argc, argv); + if (NIL_P(rs)) { + rb_yield(str); + return orig; + } + str = rb_str_new4(str); + ptr = p = s = RSTRING_PTR(str); + pend = p + RSTRING_LEN(str); + len = RSTRING_LEN(str); + StringValue(rs); + if (rs == rb_default_rs) { + enc = rb_enc_get(str); + while (p < pend) { + char *p0; + + p = memchr(p, '\n', pend - p); + if (!p) break; + p0 = rb_enc_left_char_head(s, p, pend, enc); + if (!rb_enc_is_newline(p0, pend, enc)) { + p++; + continue; + } + p = p0 + rb_enc_mbclen(p0, pend, enc); + line = rb_str_new5(str, s, p - s); + OBJ_INFECT(line, str); + rb_enc_cr_str_copy_for_substr(line, str); + rb_yield(line); + str_mod_check(str, ptr, len); + s = p; + } + goto finish; + } + + enc = rb_enc_check(str, rs); + rslen = RSTRING_LEN(rs); + if (rslen == 0) { + newline = '\n'; + } + else { + newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); + } + + while (p < pend) { + unsigned int c = rb_enc_codepoint(p, pend, enc); + + again: + n = rb_enc_codelen(c, enc); + if (rslen == 0 && c == newline) { + p += n; + if (p < pend && (c = rb_enc_codepoint(p, pend, enc)) != newline) { + goto again; + } + while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) { + p += n; + } + p -= n; + } + if (c == newline && + (rslen <= 1 || memcmp(RSTRING_PTR(rs), p, rslen) == 0)) { + line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); + OBJ_INFECT(line, str); + rb_enc_cr_str_copy_for_substr(line, str); + rb_yield(line); + str_mod_check(str, ptr, len); + s = p + (rslen ? rslen : n); + } + p += n; + } + + finish: + if (s != pend) { + line = rb_str_new5(str, s, pend - s); + OBJ_INFECT(line, str); + rb_enc_cr_str_copy_for_substr(line, str); + rb_yield(line); + } + + return orig; +} + + +/* + * Document-method: bytes + * call-seq: + * str.bytes => anEnumerator + * str.bytes {|fixnum| block } => str + * + * Returns an enumerator that gives each byte in the string. If a block is + * given, it iterates over each byte in the string. + * + * "hello".bytes.to_a #=> [104, 101, 108, 108, 111] + */ + +/* + * Document-method: each_byte + * call-seq: + * str.each_byte {|fixnum| block } => str + * + * Passes each byte in str to the given block. + * + * "hello".each_byte {|c| print c, ' ' } + * + * produces: + * + * 104 101 108 108 111 + */ + +static VALUE +rb_str_each_byte(VALUE str) +{ + long i; + + RETURN_ENUMERATOR(str, 0, 0); + for (i=0; i anEnumerator + * str.chars {|substr| block } => str + * + * Returns an enumerator that gives each character in the string. + * If a block is given, it iterates over each character in the string. + * + * "foo".chars.to_a #=> ["f","o","o"] + */ + +/* + * Document-method: each_char + * call-seq: + * str.each_char {|cstr| block } => str + * + * Passes each character in str to the given block. + * + * "hello".each_char {|c| print c, ' ' } + * + * produces: + * + * h e l l o + */ + +static VALUE +rb_str_each_char(VALUE str) +{ + long i, len, n; + const char *ptr; + rb_encoding *enc; + + RETURN_ENUMERATOR(str, 0, 0); + str = rb_str_new4(str); + ptr = RSTRING_PTR(str); + len = RSTRING_LEN(str); + enc = rb_enc_get(str); + for (i = 0; i < len; i += n) { + n = rb_enc_mbclen(ptr + i, ptr + len, enc); + rb_yield(rb_str_subseq(str, i, n)); + } + return str; +} + +/* + * Document-method: codepoints + * call-seq: + * str.codepoints => anEnumerator + * str.codepoints {|fixnum| block } => str + * + * Returns an enumerator that gives the Integer ordinal + * of each character in the string, also known as a codepoint + * when applied to Unicode strings. If a block is given, it iterates + * over each character in the string. + * + * "foo\u0635".codepoints.to_a #=> [102, 111, 111, 1589] + */ + +/* + * Document-method: each_codepoint + * call-seq: + * str.each_codepoint {|integer| block } => str + * + * Passes the Integer ordinal of each character in str, + * also known as a codepoint when applied to Unicode strings to the + * given block. + * + * "hello\u0639".each_codepoint {|c| print c, ' ' } + * + * produces: + * + * 104 101 108 108 111 1593 + */ + +static VALUE +rb_str_each_codepoint(VALUE str) +{ + long len; + int n; + unsigned int c; + const char *ptr, *end; + rb_encoding *enc; + + if (single_byte_optimizable(str)) return rb_str_each_byte(str); + RETURN_ENUMERATOR(str, 0, 0); + ptr = RSTRING_PTR(str); + len = RSTRING_LEN(str); + end = RSTRING_END(str); + enc = STR_ENC_GET(str); + while (ptr < end) { + c = rb_enc_codepoint(ptr, end, enc); + n = rb_enc_codelen(c, enc); + rb_yield(UINT2NUM(c)); + ptr += n; + } + return str; +} + +static long +chopped_length(VALUE str) +{ + rb_encoding *enc = STR_ENC_GET(str); + const char *p, *p2, *beg, *end; + + beg = RSTRING_PTR(str); + end = beg + RSTRING_LEN(str); + if (beg > end) return 0; + p = rb_enc_prev_char(beg, end, end, enc); + if (!p) return 0; + if (p > beg && rb_enc_codepoint(p, end, enc) == '\n') { + p2 = rb_enc_prev_char(beg, p, end, enc); + if (p2 && rb_enc_codepoint(p2, end, enc) == '\r') p = p2; + } + return p - beg; +} + +/* + * call-seq: + * str.chop! => str or nil + * + * Processes str as for String#chop, returning str, + * or nil if str is the empty string. See also + * String#chomp!. + */ + +static VALUE +rb_str_chop_bang(VALUE str) +{ + if (RSTRING_LEN(str) > 0) { + long len; + rb_str_modify(str); + len = chopped_length(str); + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.chop => new_str + * + * Returns a new String with the last character removed. If the + * string ends with \r\n, both characters are removed. Applying + * chop to an empty string returns an empty + * string. String#chomp is often a safer alternative, as it leaves + * the string unchanged if it doesn't end in a record separator. + * + * "string\r\n".chop #=> "string" + * "string\n\r".chop #=> "string\n" + * "string\n".chop #=> "string" + * "string".chop #=> "strin" + * "x".chop.chop #=> "" + */ + +static VALUE +rb_str_chop(VALUE str) +{ + VALUE str2 = rb_str_new5(str, RSTRING_PTR(str), chopped_length(str)); + rb_enc_cr_str_copy_for_substr(str2, str); + OBJ_INFECT(str2, str); + return str2; +} + + +/* + * call-seq: + * str.chomp!(separator=$/) => str or nil + * + * Modifies str in place as described for String#chomp, + * returning str, or nil if no modifications were made. + */ + +static VALUE +rb_str_chomp_bang(int argc, VALUE *argv, VALUE str) +{ + rb_encoding *enc; + VALUE rs; + int newline; + char *p, *pp, *e; + long len, rslen; + + len = RSTRING_LEN(str); + if (len == 0) return Qnil; + p = RSTRING_PTR(str); + e = p + len; + if (argc == 0) { + rs = rb_rs; + if (rs == rb_default_rs) { + smart_chomp: + str_modify_keep_cr(str); + enc = rb_enc_get(str); + if (rb_enc_mbminlen(enc) > 1) { + pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc); + if (rb_enc_is_newline(pp, e, enc)) { + e = pp; + } + pp = e - rb_enc_mbminlen(enc); + if (pp >= p) { + pp = rb_enc_left_char_head(p, pp, e, enc); + if (rb_enc_ascget(pp, e, 0, enc) == '\r') { + e = pp; + } + } + if (e == RSTRING_END(str)) { + return Qnil; + } + len = e - RSTRING_PTR(str); + STR_SET_LEN(str, len); + } + else { + if (RSTRING_PTR(str)[len-1] == '\n') { + STR_DEC_LEN(str); + if (RSTRING_LEN(str) > 0 && + RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') { + STR_DEC_LEN(str); + } + } + else if (RSTRING_PTR(str)[len-1] == '\r') { + STR_DEC_LEN(str); + } + else { + return Qnil; + } + } + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + return str; + } + } + else { + rb_scan_args(argc, argv, "01", &rs); + } + if (NIL_P(rs)) return Qnil; + StringValue(rs); + rslen = RSTRING_LEN(rs); + if (rslen == 0) { + while (len>0 && p[len-1] == '\n') { + len--; + if (len>0 && p[len-1] == '\r') + len--; + } + if (len < RSTRING_LEN(str)) { + str_modify_keep_cr(str); + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + return str; + } + return Qnil; + } + if (rslen > len) return Qnil; + newline = RSTRING_PTR(rs)[rslen-1]; + if (rslen == 1 && newline == '\n') + goto smart_chomp; + + enc = rb_enc_check(str, rs); + if (is_broken_string(rs)) { + return Qnil; + } + pp = e - rslen; + if (p[len-1] == newline && + (rslen <= 1 || + memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { + if (rb_enc_left_char_head(p, pp, e, enc) != pp) + return Qnil; + rb_str_modify(str); + STR_SET_LEN(str, RSTRING_LEN(str) - rslen); + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.chomp(separator=$/) => new_str + * + * Returns a new String with the given record separator removed + * from the end of str (if present). If $/ has not been + * changed from the default Ruby record separator, then chomp also + * removes carriage return characters (that is it will remove \n, + * \r, and \r\n). + * + * "hello".chomp #=> "hello" + * "hello\n".chomp #=> "hello" + * "hello\r\n".chomp #=> "hello" + * "hello\n\r".chomp #=> "hello\n" + * "hello\r".chomp #=> "hello" + * "hello \n there".chomp #=> "hello \n there" + * "hello".chomp("llo") #=> "he" + */ + +static VALUE +rb_str_chomp(int argc, VALUE *argv, VALUE str) +{ + str = rb_str_dup(str); + rb_str_chomp_bang(argc, argv, str); + return str; +} + +/* + * call-seq: + * str.lstrip! => self or nil + * + * Removes leading whitespace from str, returning nil if no + * change was made. See also String#rstrip! and + * String#strip!. + * + * " hello ".lstrip #=> "hello " + * "hello".lstrip! #=> nil + */ + +static VALUE +rb_str_lstrip_bang(VALUE str) +{ + rb_encoding *enc; + char *s, *t, *e; + + str_modify_keep_cr(str); + enc = STR_ENC_GET(str); + s = RSTRING_PTR(str); + if (!s || RSTRING_LEN(str) == 0) return Qnil; + e = t = RSTRING_END(str); + /* remove spaces at head */ + while (s < e) { + unsigned int cc = rb_enc_codepoint(s, e, enc); + + if (!rb_enc_isspace(cc, enc)) break; + s += rb_enc_codelen(cc, enc); + } + + if (s > RSTRING_PTR(str)) { + STR_SET_LEN(str, t-s); + memmove(RSTRING_PTR(str), s, RSTRING_LEN(str)); + RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.lstrip => new_str + * + * Returns a copy of str with leading whitespace removed. See also + * String#rstrip and String#strip. + * + * " hello ".lstrip #=> "hello " + * "hello".lstrip #=> "hello" + */ + +static VALUE +rb_str_lstrip(VALUE str) +{ + str = rb_str_dup(str); + rb_str_lstrip_bang(str); + return str; +} + + +/* + * call-seq: + * str.rstrip! => self or nil + * + * Removes trailing whitespace from str, returning nil if + * no change was made. See also String#lstrip! and + * String#strip!. + * + * " hello ".rstrip #=> " hello" + * "hello".rstrip! #=> nil + */ + +static VALUE +rb_str_rstrip_bang(VALUE str) +{ + rb_encoding *enc; + char *s, *t, *e; + + enc = STR_ENC_GET(str); + rb_str_check_dummy_enc(enc); + s = RSTRING_PTR(str); + if (!s || RSTRING_LEN(str) == 0) return Qnil; + t = e = RSTRING_END(str); + + /* remove trailing spaces or '\0's */ + if (single_byte_optimizable(str)) { + unsigned char c; + while (s < t && ((c = *(t-1)) == '\0' || rb_enc_isspace(c, enc))) t--; + } + else { + char *tp; + + while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) { + unsigned int c = rb_enc_codepoint(tp, e, enc); + if (c && !rb_enc_isspace(c, enc)) break; + t = tp; + } + } + if (t < e) { + int len = t-RSTRING_PTR(str); + + str_modify_keep_cr(str); + STR_SET_LEN(str, len); + RSTRING_PTR(str)[len] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.rstrip => new_str + * + * Returns a copy of str with trailing whitespace removed. See also + * String#lstrip and String#strip. + * + * " hello ".rstrip #=> " hello" + * "hello".rstrip #=> "hello" + */ + +static VALUE +rb_str_rstrip(VALUE str) +{ + str = rb_str_dup(str); + rb_str_rstrip_bang(str); + return str; +} + + +/* + * call-seq: + * str.strip! => str or nil + * + * Removes leading and trailing whitespace from str. Returns + * nil if str was not altered. + */ + +static VALUE +rb_str_strip_bang(VALUE str) +{ + VALUE l = rb_str_lstrip_bang(str); + VALUE r = rb_str_rstrip_bang(str); + + if (NIL_P(l) && NIL_P(r)) return Qnil; + return str; +} + + +/* + * call-seq: + * str.strip => new_str + * + * Returns a copy of str with leading and trailing whitespace removed. + * + * " hello ".strip #=> "hello" + * "\tgoodbye\r\n".strip #=> "goodbye" + */ + +static VALUE +rb_str_strip(VALUE str) +{ + str = rb_str_dup(str); + rb_str_strip_bang(str); + return str; +} + +static VALUE +scan_once(VALUE str, VALUE pat, long *start) +{ + VALUE result, match; + struct re_registers *regs; + long i; + + if (rb_reg_search(pat, str, *start, 0) >= 0) { + match = rb_backref_get(); + regs = RMATCH_REGS(match); + if (BEG(0) == END(0)) { + rb_encoding *enc = STR_ENC_GET(str); + /* + * Always consume at least one character of the input string + */ + if (RSTRING_LEN(str) > END(0)) + *start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0), + RSTRING_END(str), enc); + else + *start = END(0)+1; + } + else { + *start = END(0); + } + if (regs->num_regs == 1) { + return rb_reg_nth_match(0, match); + } + result = rb_ary_new2(regs->num_regs); + for (i=1; i < regs->num_regs; i++) { + rb_ary_push(result, rb_reg_nth_match(i, match)); + } + + return result; + } + return Qnil; +} + + +/* + * call-seq: + * str.scan(pattern) => array + * str.scan(pattern) {|match, ...| block } => str + * + * Both forms iterate through str, matching the pattern (which may be a + * Regexp or a String). For each match, a result is + * generated and either added to the result array or passed to the block. If + * the pattern contains no groups, each individual result consists of the + * matched string, $&. If the pattern contains groups, each + * individual result is itself an array containing one entry per group. + * + * a = "cruel world" + * a.scan(/\w+/) #=> ["cruel", "world"] + * a.scan(/.../) #=> ["cru", "el ", "wor"] + * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]] + * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]] + * + * And the block form: + * + * a.scan(/\w+/) {|w| print "<<#{w}>> " } + * print "\n" + * a.scan(/(.)(.)/) {|x,y| print y, x } + * print "\n" + * + * produces: + * + * <> <> + * rceu lowlr + */ + +static VALUE +rb_str_scan(VALUE str, VALUE pat) +{ + VALUE result; + long start = 0; + long last = -1, prev = 0; + char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str); + + pat = get_pat(pat, 1); + if (!rb_block_given_p()) { + VALUE ary = rb_ary_new(); + + while (!NIL_P(result = scan_once(str, pat, &start))) { + last = prev; + prev = start; + rb_ary_push(ary, result); + } + if (last >= 0) rb_reg_search(pat, str, last, 0); + return ary; + } + + while (!NIL_P(result = scan_once(str, pat, &start))) { + last = prev; + prev = start; + rb_yield(result); + str_mod_check(str, p, len); + } + if (last >= 0) rb_reg_search(pat, str, last, 0); + return str; +} + + +/* + * call-seq: + * str.hex => integer + * + * Treats leading characters from str as a string of hexadecimal digits + * (with an optional sign and an optional 0x) and returns the + * corresponding number. Zero is returned on error. + * + * "0x0a".hex #=> 10 + * "-1234".hex #=> -4660 + * "0".hex #=> 0 + * "wombat".hex #=> 0 + */ + +static VALUE +rb_str_hex(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc)); + } + return rb_str_to_inum(str, 16, Qfalse); +} + + +/* + * call-seq: + * str.oct => integer + * + * Treats leading characters of str as a string of octal digits (with an + * optional sign) and returns the corresponding number. Returns 0 if the + * conversion fails. + * + * "123".oct #=> 83 + * "-377".oct #=> -255 + * "bad".oct #=> 0 + * "0377bad".oct #=> 255 + */ + +static VALUE +rb_str_oct(VALUE str) +{ + rb_encoding *enc = rb_enc_get(str); + + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc)); + } + return rb_str_to_inum(str, -8, Qfalse); +} + + +/* + * call-seq: + * str.crypt(other_str) => new_str + * + * Applies a one-way cryptographic hash to str by invoking the standard + * library function crypt. The argument is the salt string, which + * should be two characters long, each character drawn from + * [a-zA-Z0-9./]. + */ + +static VALUE +rb_str_crypt(VALUE str, VALUE salt) +{ + extern char *crypt(const char *, const char *); + VALUE result; + const char *s, *saltp; +#ifdef BROKEN_CRYPT + char salt_8bit_clean[3]; +#endif + + StringValue(salt); + if (RSTRING_LEN(salt) < 2) + rb_raise(rb_eArgError, "salt too short (need >=2 bytes)"); + + s = RSTRING_PTR(str); + if (!s) s = ""; + saltp = RSTRING_PTR(salt); +#ifdef BROKEN_CRYPT + if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) { + salt_8bit_clean[0] = saltp[0] & 0x7f; + salt_8bit_clean[1] = saltp[1] & 0x7f; + salt_8bit_clean[2] = '\0'; + saltp = salt_8bit_clean; + } +#endif + result = rb_str_new2(crypt(s, saltp)); + OBJ_INFECT(result, str); + OBJ_INFECT(result, salt); + return result; +} + + +/* + * call-seq: + * str.intern => symbol + * str.to_sym => symbol + * + * Returns the Symbol corresponding to str, creating the + * symbol if it did not previously exist. See Symbol#id2name. + * + * "Koala".intern #=> :Koala + * s = 'cat'.to_sym #=> :cat + * s == :cat #=> true + * s = '@cat'.to_sym #=> :@cat + * s == :@cat #=> true + * + * This can also be used to create symbols that cannot be represented using the + * :xxx notation. + * + * 'cat and dog'.to_sym #=> :"cat and dog" + */ + +VALUE +rb_str_intern(VALUE s) +{ + VALUE str = RB_GC_GUARD(s); + ID id; + + id = rb_intern_str(str); + return ID2SYM(id); +} + + +/* + * call-seq: + * str.ord => integer + * + * Return the Integer ordinal of a one-character string. + * + * "a".ord #=> 97 + */ + +VALUE +rb_str_ord(VALUE s) +{ + unsigned int c; + + c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s)); + return UINT2NUM(c); +} +/* + * call-seq: + * str.sum(n=16) => integer + * + * Returns a basic n-bit checksum of the characters in str, + * where n is the optional Fixnum parameter, defaulting + * to 16. The result is simply the sum of the binary value of each character in + * str modulo 2n - 1. This is not a particularly good + * checksum. + */ + +static VALUE +rb_str_sum(int argc, VALUE *argv, VALUE str) +{ + VALUE vbits; + int bits; + char *ptr, *p, *pend; + long len; + + if (argc == 0) { + bits = 16; + } + else { + rb_scan_args(argc, argv, "01", &vbits); + bits = NUM2INT(vbits); + } + ptr = p = RSTRING_PTR(str); + len = RSTRING_LEN(str); + pend = p + len; + if (bits >= sizeof(long)*CHAR_BIT) { + VALUE sum = INT2FIX(0); + + while (p < pend) { + str_mod_check(str, ptr, len); + sum = rb_funcall(sum, '+', 1, INT2FIX((unsigned char)*p)); + p++; + } + if (bits != 0) { + VALUE mod; + + mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits)); + mod = rb_funcall(mod, '-', 1, INT2FIX(1)); + sum = rb_funcall(sum, '&', 1, mod); + } + return sum; + } + else { + unsigned long sum = 0; + + while (p < pend) { + str_mod_check(str, ptr, len); + sum += (unsigned char)*p; + p++; + } + if (bits != 0) { + sum &= (((unsigned long)1)<= width) return rb_str_dup(str); + n = width - len; + llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2); + rlen = n - llen; + cr = ENC_CODERANGE(str); + if (flen > 1) { + llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte); + rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte); + } + size = RSTRING_LEN(str); + if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen || + (len *= flen) >= LONG_MAX - llen2 - rlen2 || + (len += llen2 + rlen2) >= LONG_MAX - size) { + rb_raise(rb_eArgError, "argument too big"); + } + len += size; + res = rb_str_new5(str, 0, len); + p = RSTRING_PTR(res); + if (flen <= 1) { + memset(p, *f, llen); + p += llen; + } + else { + while (llen > fclen) { + memcpy(p,f,flen); + p += flen; + llen -= fclen; + } + if (llen > 0) { + memcpy(p, f, llen2); + p += llen2; + } + } + memcpy(p, RSTRING_PTR(str), size); + p += size; + if (flen <= 1) { + memset(p, *f, rlen); + p += rlen; + } + else { + while (rlen > fclen) { + memcpy(p,f,flen); + p += flen; + rlen -= fclen; + } + if (rlen > 0) { + memcpy(p, f, rlen2); + p += rlen2; + } + } + *p = '\0'; + STR_SET_LEN(res, p-RSTRING_PTR(res)); + OBJ_INFECT(res, str); + if (!NIL_P(pad)) OBJ_INFECT(res, pad); + rb_enc_associate(res, enc); + if (argc == 2) + cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad)); + if (cr != ENC_CODERANGE_BROKEN) + ENC_CODERANGE_SET(res, cr); + return res; +} + + +/* + * call-seq: + * str.ljust(integer, padstr=' ') => new_str + * + * If integer is greater than the length of str, returns a new + * String of length integer with str left justified + * and padded with padstr; otherwise, returns str. + * + * "hello".ljust(4) #=> "hello" + * "hello".ljust(20) #=> "hello " + * "hello".ljust(20, '1234') #=> "hello123412341234123" + */ + +static VALUE +rb_str_ljust(int argc, VALUE *argv, VALUE str) +{ + return rb_str_justify(argc, argv, str, 'l'); +} + + +/* + * call-seq: + * str.rjust(integer, padstr=' ') => new_str + * + * If integer is greater than the length of str, returns a new + * String of length integer with str right justified + * and padded with padstr; otherwise, returns str. + * + * "hello".rjust(4) #=> "hello" + * "hello".rjust(20) #=> " hello" + * "hello".rjust(20, '1234') #=> "123412341234123hello" + */ + +static VALUE +rb_str_rjust(int argc, VALUE *argv, VALUE str) +{ + return rb_str_justify(argc, argv, str, 'r'); +} + + +/* + * call-seq: + * str.center(integer, padstr) => new_str + * + * If integer is greater than the length of str, returns a new + * String of length integer with str centered and + * padded with padstr; otherwise, returns str. + * + * "hello".center(4) #=> "hello" + * "hello".center(20) #=> " hello " + * "hello".center(20, '123') #=> "1231231hello12312312" + */ + +static VALUE +rb_str_center(int argc, VALUE *argv, VALUE str) +{ + return rb_str_justify(argc, argv, str, 'c'); +} + +/* + * call-seq: + * str.partition(sep) => [head, sep, tail] + * str.partition(regexp) => [head, match, tail] + * + * Searches sep or pattern (regexp) in the string + * and returns the part before it, the match, and the part + * after it. + * If it is not found, returns two empty strings and str. + * + * "hello".partition("l") #=> ["he", "l", "lo"] + * "hello".partition("x") #=> ["hello", "", ""] + * "hello".partition(/.l/) #=> ["h", "el", "lo"] + */ + +static VALUE +rb_str_partition(VALUE str, VALUE sep) +{ + long pos; + int regex = Qfalse; + + if (TYPE(sep) == T_REGEXP) { + pos = rb_reg_search(sep, str, 0, 0); + regex = Qtrue; + } + else { + VALUE tmp; + + tmp = rb_check_string_type(sep); + if (NIL_P(tmp)) { + rb_raise(rb_eTypeError, "type mismatch: %s given", + rb_obj_classname(sep)); + } + sep = tmp; + pos = rb_str_index(str, sep, 0); + } + if (pos < 0) { + failed: + return rb_ary_new3(3, str, rb_str_new(0,0),rb_str_new(0,0)); + } + if (regex) { + sep = rb_str_subpat(str, sep, 0); + if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed; + } + return rb_ary_new3(3, rb_str_subseq(str, 0, pos), + sep, + rb_str_subseq(str, pos+RSTRING_LEN(sep), + RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); +} + +/* + * call-seq: + * str.rpartition(sep) => [head, sep, tail] + * str.rpartition(regexp) => [head, match, tail] + * + * Searches sep or pattern (regexp) in the string from the end + * of the string, and returns the part before it, the match, and the part + * after it. + * If it is not found, returns two empty strings and str. + * + * "hello".rpartition("l") #=> ["hel", "l", "o"] + * "hello".rpartition("x") #=> ["", "", "hello"] + * "hello".rpartition(/.l/) #=> ["he", "ll", "o"] + */ + +static VALUE +rb_str_rpartition(VALUE str, VALUE sep) +{ + long pos = RSTRING_LEN(str); + int regex = Qfalse; + + if (TYPE(sep) == T_REGEXP) { + pos = rb_reg_search(sep, str, pos, 1); + regex = Qtrue; + } + else { + VALUE tmp; + + tmp = rb_check_string_type(sep); + if (NIL_P(tmp)) { + rb_raise(rb_eTypeError, "type mismatch: %s given", + rb_obj_classname(sep)); + } + sep = tmp; + pos = rb_str_sublen(str, pos); + pos = rb_str_rindex(str, sep, pos); + } + if (pos < 0) { + return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str); + } + if (regex) { + sep = rb_reg_nth_match(0, rb_backref_get()); + } + return rb_ary_new3(3, rb_str_substr(str, 0, pos), + sep, + rb_str_substr(str,pos+str_strlen(sep,STR_ENC_GET(sep)),RSTRING_LEN(str))); +} + +/* + * call-seq: + * str.start_with?([prefix]+) => true or false + * + * Returns true if str starts with the prefix given. + */ + +static VALUE +rb_str_start_with(int argc, VALUE *argv, VALUE str) +{ + int i; + + for (i=0; i true or false + * + * Returns true if str ends with the suffix given. + */ + +static VALUE +rb_str_end_with(int argc, VALUE *argv, VALUE str) +{ + int i; + char *p, *s, *e; + rb_encoding *enc; + + for (i=0; i str + * + * Changes the encoding to +encoding+ and returns self. + */ + +static VALUE +rb_str_force_encoding(VALUE str, VALUE enc) +{ + str_modifiable(str); + rb_enc_associate(str, rb_to_encoding(enc)); + ENC_CODERANGE_CLEAR(str); + return str; +} + +/* + * call-seq: + * str.valid_encoding? => true or false + * + * Returns true for a string which encoded correctly. + * + * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? => true + * "\xc2".force_encoding("UTF-8").valid_encoding? => false + * "\x80".force_encoding("UTF-8").valid_encoding? => false + */ + +static VALUE +rb_str_valid_encoding_p(VALUE str) +{ + int cr = rb_enc_str_coderange(str); + + return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue; +} + +/* + * call-seq: + * str.ascii_only? => true or false + * + * Returns true for a string which has only ASCII characters. + * + * "abc".force_encoding("UTF-8").ascii_only? => true + * "abc\u{6666}".force_encoding("UTF-8").ascii_only? => false + */ + +static VALUE +rb_str_is_ascii_only_p(VALUE str) +{ + int cr = rb_enc_str_coderange(str); + + return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse; +} + +/********************************************************************** + * Document-class: Symbol + * + * Symbol objects represent names and some strings + * inside the Ruby + * interpreter. They are generated using the :name and + * :"string" literals + * syntax, and by the various to_sym methods. The same + * Symbol object will be created for a given name or string + * for the duration of a program's execution, regardless of the context + * or meaning of that name. Thus if Fred is a constant in + * one context, a method in another, and a class in a third, the + * Symbol :Fred will be the same object in + * all three contexts. + * + * module One + * class Fred + * end + * $f1 = :Fred + * end + * module Two + * Fred = 1 + * $f2 = :Fred + * end + * def Fred() + * end + * $f3 = :Fred + * $f1.object_id #=> 2514190 + * $f2.object_id #=> 2514190 + * $f3.object_id #=> 2514190 + * + */ + + +/* + * call-seq: + * sym == obj => true or false + * + * Equality---If sym and obj are exactly the same + * symbol, returns true. + */ + +static VALUE +sym_equal(VALUE sym1, VALUE sym2) +{ + if (sym1 == sym2) return Qtrue; + return Qfalse; +} + + +static int +sym_printable(const char *s, const char *send, rb_encoding *enc) +{ + while (s < send) { + int c = rb_enc_codepoint(s, send, enc); + int n = rb_enc_codelen(c, enc); + if (!rb_enc_isprint(c, enc)) return Qfalse; + s += n; + } + return Qtrue; +} + +/* + * call-seq: + * sym.inspect => string + * + * Returns the representation of sym as a symbol literal. + * + * :fred.inspect #=> ":fred" + */ + +static VALUE +sym_inspect(VALUE sym) +{ + VALUE str; + ID id = SYM2ID(sym); + rb_encoding *enc; + + sym = rb_id2str(id); + enc = STR_ENC_GET(sym); + str = rb_enc_str_new(0, RSTRING_LEN(sym)+1, enc); + RSTRING_PTR(str)[0] = ':'; + memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym)); + if (RSTRING_LEN(sym) != strlen(RSTRING_PTR(sym)) || + !rb_enc_symname_p(RSTRING_PTR(sym), enc) || + !sym_printable(RSTRING_PTR(sym), RSTRING_END(sym), enc)) { + str = rb_str_inspect(str); + strncpy(RSTRING_PTR(str), ":\"", 2); + } + return str; +} + + +/* + * call-seq: + * sym.id2name => string + * sym.to_s => string + * + * Returns the name or string corresponding to sym. + * + * :fred.id2name #=> "fred" + */ + + +VALUE +rb_sym_to_s(VALUE sym) +{ + ID id = SYM2ID(sym); + + return str_new3(rb_cString, rb_id2str(id)); +} + + +/* + * call-seq: + * sym.to_sym => sym + * sym.intern => sym + * + * In general, to_sym returns the Symbol corresponding + * to an object. As sym is already a symbol, self is returned + * in this case. + */ + +static VALUE +sym_to_sym(VALUE sym) +{ + return sym; +} + +static VALUE +sym_call(VALUE args, VALUE sym, int argc, VALUE *argv) +{ + VALUE obj; + + if (argc < 1) { + rb_raise(rb_eArgError, "no receiver given"); + } + obj = argv[0]; + return rb_funcall3(obj, (ID)sym, argc - 1, argv + 1); +} + +/* + * call-seq: + * sym.to_proc + * + * Returns a _Proc_ object which respond to the given method by _sym_. + * + * (1..3).collect(&:to_s) #=> ["1", "2", "3"] + */ + +static VALUE +sym_to_proc(VALUE sym) +{ + return rb_proc_new(sym_call, (VALUE)SYM2ID(sym)); +} + + +static VALUE +sym_succ(VALUE sym) +{ + return rb_str_intern(rb_str_succ(rb_sym_to_s(sym))); +} + +static VALUE +sym_cmp(VALUE sym, VALUE other) +{ + if (!SYMBOL_P(other)) { + return Qnil; + } + return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other)); +} + +static VALUE +sym_casecmp(VALUE sym, VALUE other) +{ + if (!SYMBOL_P(other)) { + return Qnil; + } + return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other)); +} + +static VALUE +sym_match(VALUE sym, VALUE other) +{ + return rb_str_match(rb_sym_to_s(sym), other); +} + +static VALUE +sym_aref(int argc, VALUE *argv, VALUE sym) +{ + return rb_str_aref_m(argc, argv, rb_sym_to_s(sym)); +} + +static VALUE +sym_length(VALUE sym) +{ + return rb_str_length(rb_id2str(SYM2ID(sym))); +} + +static VALUE +sym_empty(VALUE sym) +{ + return rb_str_empty(rb_id2str(SYM2ID(sym))); +} + +static VALUE +sym_upcase(VALUE sym) +{ + return rb_str_intern(rb_str_upcase(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_downcase(VALUE sym) +{ + return rb_str_intern(rb_str_downcase(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_capitalize(VALUE sym) +{ + return rb_str_intern(rb_str_capitalize(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_swapcase(VALUE sym) +{ + return rb_str_intern(rb_str_swapcase(rb_id2str(SYM2ID(sym)))); +} + +static VALUE +sym_encoding(VALUE sym) +{ + return rb_obj_encoding(rb_id2str(SYM2ID(sym))); +} + +ID +rb_to_id(VALUE name) +{ + VALUE tmp; + ID id; + + switch (TYPE(name)) { + default: + tmp = rb_check_string_type(name); + if (NIL_P(tmp)) { + tmp = rb_inspect(name); + rb_raise(rb_eTypeError, "%s is not a symbol", + RSTRING_PTR(tmp)); + } + name = tmp; + /* fall through */ + case T_STRING: + name = rb_str_intern(name); + /* fall through */ + case T_SYMBOL: + return SYM2ID(name); + } + return id; +} + +/* + * A String object holds and manipulates an arbitrary sequence of + * bytes, typically representing characters. String objects may be created + * using String::new or as literals. + * + * Because of aliasing issues, users of strings should be aware of the methods + * that modify the contents of a String object. Typically, + * methods with names ending in ``!'' modify their receiver, while those + * without a ``!'' return a new String. However, there are + * exceptions, such as String#[]=. + * + */ + +void +Init_String(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + rb_cString = rb_define_class("String", rb_cObject); + rb_include_module(rb_cString, rb_mComparable); + rb_define_alloc_func(rb_cString, str_alloc); + rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1); + rb_define_method(rb_cString, "initialize", rb_str_init, -1); + rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1); + rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1); + rb_define_method(rb_cString, "==", rb_str_equal, 1); + rb_define_method(rb_cString, "===", rb_str_equal, 1); + rb_define_method(rb_cString, "eql?", rb_str_eql, 1); + rb_define_method(rb_cString, "hash", rb_str_hash_m, 0); + rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1); + rb_define_method(rb_cString, "+", rb_str_plus, 1); + rb_define_method(rb_cString, "*", rb_str_times, 1); + rb_define_method(rb_cString, "%", rb_str_format_m, 1); + rb_define_method(rb_cString, "[]", rb_str_aref_m, -1); + rb_define_method(rb_cString, "[]=", rb_str_aset_m, -1); + rb_define_method(rb_cString, "insert", rb_str_insert, 2); + rb_define_method(rb_cString, "length", rb_str_length, 0); + rb_define_method(rb_cString, "size", rb_str_length, 0); + rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0); + rb_define_method(rb_cString, "empty?", rb_str_empty, 0); + rb_define_method(rb_cString, "=~", rb_str_match, 1); + rb_define_method(rb_cString, "match", rb_str_match_m, -1); + rb_define_method(rb_cString, "succ", rb_str_succ, 0); + rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "next", rb_str_succ, 0); + rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "upto", rb_str_upto, -1); + rb_define_method(rb_cString, "index", rb_str_index_m, -1); + rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1); + rb_define_method(rb_cString, "replace", rb_str_replace, 1); + rb_define_method(rb_cString, "clear", rb_str_clear, 0); + rb_define_method(rb_cString, "chr", rb_str_chr, 0); + rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1); + rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2); + + rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); + rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); + rb_define_method(rb_cString, "to_s", rb_str_to_s, 0); + rb_define_method(rb_cString, "to_str", rb_str_to_s, 0); + rb_define_method(rb_cString, "inspect", rb_str_inspect, 0); + rb_define_method(rb_cString, "dump", rb_str_dump, 0); + + rb_define_method(rb_cString, "upcase", rb_str_upcase, 0); + rb_define_method(rb_cString, "downcase", rb_str_downcase, 0); + rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0); + rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0); + + rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0); + rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0); + rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0); + rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0); + + rb_define_method(rb_cString, "hex", rb_str_hex, 0); + rb_define_method(rb_cString, "oct", rb_str_oct, 0); + rb_define_method(rb_cString, "split", rb_str_split_m, -1); + rb_define_method(rb_cString, "lines", rb_str_each_line, -1); + rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0); + rb_define_method(rb_cString, "chars", rb_str_each_char, 0); + rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0); + rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); + rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); + rb_define_method(rb_cString, "concat", rb_str_concat, 1); + rb_define_method(rb_cString, "<<", rb_str_concat, 1); + rb_define_method(rb_cString, "crypt", rb_str_crypt, 1); + rb_define_method(rb_cString, "intern", rb_str_intern, 0); + rb_define_method(rb_cString, "to_sym", rb_str_intern, 0); + rb_define_method(rb_cString, "ord", rb_str_ord, 0); + + rb_define_method(rb_cString, "include?", rb_str_include, 1); + rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1); + rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1); + + rb_define_method(rb_cString, "scan", rb_str_scan, 1); + + rb_define_method(rb_cString, "ljust", rb_str_ljust, -1); + rb_define_method(rb_cString, "rjust", rb_str_rjust, -1); + rb_define_method(rb_cString, "center", rb_str_center, -1); + + rb_define_method(rb_cString, "sub", rb_str_sub, -1); + rb_define_method(rb_cString, "gsub", rb_str_gsub, -1); + rb_define_method(rb_cString, "chop", rb_str_chop, 0); + rb_define_method(rb_cString, "chomp", rb_str_chomp, -1); + rb_define_method(rb_cString, "strip", rb_str_strip, 0); + rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0); + rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0); + + rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1); + rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); + rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0); + rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1); + rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); + rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0); + rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0); + + rb_define_method(rb_cString, "tr", rb_str_tr, 2); + rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2); + rb_define_method(rb_cString, "delete", rb_str_delete, -1); + rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1); + rb_define_method(rb_cString, "count", rb_str_count, -1); + + rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2); + rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2); + rb_define_method(rb_cString, "delete!", rb_str_delete_bang, -1); + rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1); + + rb_define_method(rb_cString, "each_line", rb_str_each_line, -1); + rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0); + rb_define_method(rb_cString, "each_char", rb_str_each_char, 0); + rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0); + + rb_define_method(rb_cString, "sum", rb_str_sum, -1); + + rb_define_method(rb_cString, "slice", rb_str_aref_m, -1); + rb_define_method(rb_cString, "slice!", rb_str_slice_bang, -1); + + rb_define_method(rb_cString, "partition", rb_str_partition, 1); + rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1); + + rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */ + rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1); + rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0); + rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0); + + id_to_s = rb_intern("to_s"); + + rb_fs = Qnil; + rb_define_variable("$;", &rb_fs); + rb_define_variable("$-F", &rb_fs); + + rb_cSymbol = rb_define_class("Symbol", rb_cObject); + rb_include_module(rb_cSymbol, rb_mComparable); + rb_undef_alloc_func(rb_cSymbol); + rb_undef_method(CLASS_OF(rb_cSymbol), "new"); + rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */ + + rb_define_method(rb_cSymbol, "==", sym_equal, 1); + rb_define_method(rb_cSymbol, "===", sym_equal, 1); + rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0); + rb_define_method(rb_cSymbol, "to_s", rb_sym_to_s, 0); + rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0); + rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0); + rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0); + rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0); + rb_define_method(rb_cSymbol, "succ", sym_succ, 0); + rb_define_method(rb_cSymbol, "next", sym_succ, 0); + + rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1); + rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1); + rb_define_method(rb_cSymbol, "=~", sym_match, 1); + + rb_define_method(rb_cSymbol, "[]", sym_aref, -1); + rb_define_method(rb_cSymbol, "slice", sym_aref, -1); + rb_define_method(rb_cSymbol, "length", sym_length, 0); + rb_define_method(rb_cSymbol, "size", sym_length, 0); + rb_define_method(rb_cSymbol, "empty?", sym_empty, 0); + rb_define_method(rb_cSymbol, "match", sym_match, 1); + + rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0); + rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0); + rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0); + rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0); + + rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0); +} diff --git a/struct.c b/struct.c new file mode 100644 index 0000000..4d74bc7 --- /dev/null +++ b/struct.c @@ -0,0 +1,924 @@ +/********************************************************************** + + struct.c - + + $Author: yugui $ + created at: Tue Mar 22 18:44:30 JST 1995 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" + +VALUE rb_cStruct; + +static VALUE struct_alloc(VALUE); + +VALUE +rb_struct_iv_get(VALUE c, const char *name) +{ + ID id; + + id = rb_intern(name); + for (;;) { + if (rb_ivar_defined(c, id)) + return rb_ivar_get(c, id); + c = RCLASS_SUPER(c); + if (c == 0 || c == rb_cStruct) + return Qnil; + } +} + +VALUE +rb_struct_s_members(VALUE klass) +{ + VALUE members = rb_struct_iv_get(klass, "__members__"); + + if (NIL_P(members)) { + rb_raise(rb_eTypeError, "uninitialized struct"); + } + if (TYPE(members) != T_ARRAY) { + rb_raise(rb_eTypeError, "corrupted struct"); + } + return members; +} + +VALUE +rb_struct_members(VALUE s) +{ + VALUE members = rb_struct_s_members(rb_obj_class(s)); + + if (RSTRUCT_LEN(s) != RARRAY_LEN(members)) { + rb_raise(rb_eTypeError, "struct size differs (%ld required %ld given)", + RARRAY_LEN(members), RSTRUCT_LEN(s)); + } + return members; +} + +static VALUE +rb_struct_s_members_m(VALUE klass) +{ + VALUE members, ary; + VALUE *p, *pend; + + members = rb_struct_s_members(klass); + ary = rb_ary_new2(RARRAY_LEN(members)); + p = RARRAY_PTR(members); pend = p + RARRAY_LEN(members); + while (p < pend) { + rb_ary_push(ary, *p); + p++; + } + + return ary; +} + +/* + * call-seq: + * struct.members => array + * + * Returns an array of strings representing the names of the instance + * variables. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.members #=> [:name, :address, :zip] + */ + +static VALUE +rb_struct_members_m(VALUE obj) +{ + return rb_struct_s_members_m(rb_obj_class(obj)); +} + +VALUE +rb_struct_getmember(VALUE obj, ID id) +{ + VALUE members, slot; + long i; + + members = rb_struct_members(obj); + slot = ID2SYM(id); + for (i=0; i= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify Struct"); +} + +static VALUE +rb_struct_set(VALUE obj, VALUE val) +{ + VALUE members, slot; + long i; + + members = rb_struct_members(obj); + rb_struct_modify(obj); + for (i=0; iklass); + rb_class_inherited(klass, nstr); + } + else { + /* old style: should we warn? */ + name = rb_str_to_str(name); + id = rb_to_id(name); + if (!rb_is_const_id(id)) { + rb_name_error(id, "identifier %s needs to be constant", StringValuePtr(name)); + } + if (rb_const_defined_at(klass, id)) { + rb_warn("redefining constant Struct::%s", StringValuePtr(name)); + rb_mod_remove_const(klass, ID2SYM(id)); + } + nstr = rb_define_class_id_under(klass, id, klass); + } + rb_iv_set(nstr, "__members__", members); + + rb_define_alloc_func(nstr, struct_alloc); + rb_define_singleton_method(nstr, "new", rb_class_new_instance, -1); + rb_define_singleton_method(nstr, "[]", rb_class_new_instance, -1); + rb_define_singleton_method(nstr, "members", rb_struct_s_members_m, 0); + for (i=0; i< RARRAY_LEN(members); i++) { + ID id = SYM2ID(RARRAY_PTR(members)[i]); + if (rb_is_local_id(id) || rb_is_const_id(id)) { + if (i < N_REF_FUNC) { + rb_define_method_id(nstr, id, ref_func[i], 0); + } + else { + rb_define_method_id(nstr, id, rb_struct_ref, 0); + } + rb_define_method_id(nstr, rb_id_attrset(id), rb_struct_set, 1); + } + } + + return nstr; +} + +VALUE +rb_struct_alloc_noinit(VALUE klass) +{ + return struct_alloc(klass); +} + +VALUE +rb_struct_define_without_accessor(const char *class_name, VALUE super, rb_alloc_func_t alloc, ...) +{ + VALUE klass; + va_list ar; + VALUE members; + long i; + char *name; + + members = rb_ary_new2(0); + va_start(ar, alloc); + i = 0; + while ((name = va_arg(ar, char*)) != NULL) { + rb_ary_push(members, ID2SYM(rb_intern(name))); + } + va_end(ar); + OBJ_FREEZE(members); + + if (class_name) { + klass = rb_define_class(class_name, super); + } + else { + klass = rb_class_new(super); + rb_make_metaclass(klass, RBASIC(super)->klass); + rb_class_inherited(super, klass); + } + + rb_iv_set(klass, "__members__", members); + + if (alloc) + rb_define_alloc_func(klass, alloc); + else + rb_define_alloc_func(klass, struct_alloc); + + return klass; +} + +VALUE +rb_struct_define(const char *name, ...) +{ + va_list ar; + VALUE nm, ary; + char *mem; + + if (!name) nm = Qnil; + else nm = rb_str_new2(name); + ary = rb_ary_new(); + + va_start(ar, name); + while ((mem = va_arg(ar, char*)) != 0) { + ID slot = rb_intern(mem); + rb_ary_push(ary, ID2SYM(slot)); + } + va_end(ar); + + return make_struct(nm, ary, rb_cStruct); +} + +/* + * call-seq: + * Struct.new( [aString] [, aSym]+> ) => StructClass + * StructClass.new(arg, ...) => obj + * StructClass[arg, ...] => obj + * + * Creates a new class, named by aString, containing accessor + * methods for the given symbols. If the name aString is + * omitted, an anonymous structure class will be created. Otherwise, + * the name of this struct will appear as a constant in class + * Struct, so it must be unique for all + * Structs in the system and should start with a capital + * letter. Assigning a structure class to a constant effectively gives + * the class the name of the constant. + * + * Struct::new returns a new Class object, + * which can then be used to create specific instances of the new + * structure. The number of actual parameters must be + * less than or equal to the number of attributes defined for this + * class; unset parameters default to nil. Passing too many + * parameters will raise an ArgumentError. + * + * The remaining methods listed in this section (class and instance) + * are defined for this generated class. + * + * # Create a structure with a name in Struct + * Struct.new("Customer", :name, :address) #=> Struct::Customer + * Struct::Customer.new("Dave", "123 Main") #=> # + * + * # Create a structure named by its constant + * Customer = Struct.new(:name, :address) #=> Customer + * Customer.new("Dave", "123 Main") #=> # + */ + +static VALUE +rb_struct_s_def(int argc, VALUE *argv, VALUE klass) +{ + VALUE name, rest; + long i; + VALUE st; + ID id; + + rb_scan_args(argc, argv, "1*", &name, &rest); + if (!NIL_P(name) && SYMBOL_P(name)) { + rb_ary_unshift(rest, name); + name = Qnil; + } + for (i=0; i argc) { + rb_mem_clear(RSTRUCT_PTR(self)+argc, n-argc); + } + return Qnil; +} + +VALUE +rb_struct_initialize(VALUE self, VALUE values) +{ + return rb_struct_initialize_m(RARRAY_LEN(values), RARRAY_PTR(values), self); +} + +static VALUE +struct_alloc(VALUE klass) +{ + long n; + NEWOBJ(st, struct RStruct); + OBJSETUP(st, klass, T_STRUCT); + + n = num_members(klass); + + if (0 < n && n <= RSTRUCT_EMBED_LEN_MAX) { + RBASIC(st)->flags &= ~RSTRUCT_EMBED_LEN_MASK; + RBASIC(st)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT; + rb_mem_clear(st->as.ary, n); + } + else { + st->as.heap.ptr = ALLOC_N(VALUE, n); + rb_mem_clear(st->as.heap.ptr, n); + st->as.heap.len = n; + } + + return (VALUE)st; +} + +VALUE +rb_struct_alloc(VALUE klass, VALUE values) +{ + return rb_class_new_instance(RARRAY_LEN(values), RARRAY_PTR(values), klass); +} + +VALUE +rb_struct_new(VALUE klass, ...) +{ + VALUE *mem; + long size, i; + va_list args; + + size = num_members(klass); + mem = ALLOCA_N(VALUE, size); + va_start(args, klass); + for (i=0; i struct + * + * Calls block once for each instance variable, passing the + * value as a parameter. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.each {|x| puts(x) } + * + * produces: + * + * Joe Smith + * 123 Maple, Anytown NC + * 12345 + */ + +static VALUE +rb_struct_each(VALUE s) +{ + long i; + + RETURN_ENUMERATOR(s, 0, 0); + for (i=0; i struct + * + * Calls block once for each instance variable, passing the name + * (as a symbol) and the value as parameters. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.each_pair {|name, value| puts("#{name} => #{value}") } + * + * produces: + * + * name => Joe Smith + * address => 123 Maple, Anytown NC + * zip => 12345 + */ + +static VALUE +rb_struct_each_pair(VALUE s) +{ + VALUE members; + long i; + + RETURN_ENUMERATOR(s, 0, 0); + members = rb_struct_members(s); + for (i=0; i"); + } + + members = rb_struct_members(s); + for (i=0; i 0) { + rb_str_cat2(str, ", "); + } + else if (first != '#') { + rb_str_cat2(str, " "); + } + slot = RARRAY_PTR(members)[i]; + id = SYM2ID(slot); + if (rb_is_local_id(id) || rb_is_const_id(id)) { + rb_str_append(str, rb_id2str(id)); + } + else { + rb_str_append(str, rb_inspect(slot)); + } + rb_str_cat2(str, "="); + rb_str_append(str, rb_inspect(RSTRUCT_PTR(s)[i])); + } + rb_str_cat2(str, ">"); + OBJ_INFECT(str, s); + + return str; +} + +/* + * call-seq: + * struct.to_s => string + * struct.inspect => string + * + * Describe the contents of this struct in a string. + */ + +static VALUE +rb_struct_inspect(VALUE s) +{ + return rb_exec_recursive(inspect_struct, s, 0); +} + +/* + * call-seq: + * struct.to_a => array + * struct.values => array + * + * Returns the values for this instance as an array. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.to_a[1] #=> "123 Maple, Anytown NC" + */ + +static VALUE +rb_struct_to_a(VALUE s) +{ + return rb_ary_new4(RSTRUCT_LEN(s), RSTRUCT_PTR(s)); +} + +/* :nodoc: */ +VALUE +rb_struct_init_copy(VALUE copy, VALUE s) +{ + if (copy == s) return copy; + rb_check_frozen(copy); + if (!rb_obj_is_instance_of(s, rb_obj_class(copy))) { + rb_raise(rb_eTypeError, "wrong argument class"); + } + if (RSTRUCT_LEN(copy) != RSTRUCT_LEN(s)) { + rb_raise(rb_eTypeError, "struct size mismatch"); + } + MEMCPY(RSTRUCT_PTR(copy), RSTRUCT_PTR(s), VALUE, RSTRUCT_LEN(copy)); + + return copy; +} + +static VALUE +rb_struct_aref_id(VALUE s, ID id) +{ + VALUE members; + long i, len; + + members = rb_struct_members(s); + len = RARRAY_LEN(members); + for (i=0; i anObject + * struct[fixnum] => anObject + * + * Attribute Reference---Returns the value of the instance variable + * named by symbol, or indexed (0..length-1) by + * fixnum. Will raise NameError if the named + * variable does not exist, or IndexError if the index is + * out of range. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * + * joe["name"] #=> "Joe Smith" + * joe[:name] #=> "Joe Smith" + * joe[0] #=> "Joe Smith" + */ + +VALUE +rb_struct_aref(VALUE s, VALUE idx) +{ + long i; + + if (TYPE(idx) == T_STRING || TYPE(idx) == T_SYMBOL) { + return rb_struct_aref_id(s, rb_to_id(idx)); + } + + i = NUM2LONG(idx); + if (i < 0) i = RSTRUCT_LEN(s) + i; + if (i < 0) + rb_raise(rb_eIndexError, "offset %ld too small for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + if (RSTRUCT_LEN(s) <= i) + rb_raise(rb_eIndexError, "offset %ld too large for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + return RSTRUCT_PTR(s)[i]; +} + +static VALUE +rb_struct_aset_id(VALUE s, ID id, VALUE val) +{ + VALUE members; + long i, len; + + members = rb_struct_members(s); + rb_struct_modify(s); + len = RARRAY_LEN(members); + if (RSTRUCT_LEN(s) != RARRAY_LEN(members)) { + rb_raise(rb_eTypeError, "struct size differs (%ld required %ld given)", + RARRAY_LEN(members), RSTRUCT_LEN(s)); + } + for (i=0; i obj + * struct[fixnum] = obj => obj + * + * Attribute Assignment---Assigns to the instance variable named by + * symbol or fixnum the value obj and + * returns it. Will raise a NameError if the named + * variable does not exist, or an IndexError if the index + * is out of range. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * + * joe["name"] = "Luke" + * joe[:zip] = "90210" + * + * joe.name #=> "Luke" + * joe.zip #=> "90210" + */ + +VALUE +rb_struct_aset(VALUE s, VALUE idx, VALUE val) +{ + long i; + + if (TYPE(idx) == T_STRING || TYPE(idx) == T_SYMBOL) { + return rb_struct_aset_id(s, rb_to_id(idx), val); + } + + i = NUM2LONG(idx); + if (i < 0) i = RSTRUCT_LEN(s) + i; + if (i < 0) { + rb_raise(rb_eIndexError, "offset %ld too small for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + } + if (RSTRUCT_LEN(s) <= i) { + rb_raise(rb_eIndexError, "offset %ld too large for struct(size:%ld)", + i, RSTRUCT_LEN(s)); + } + rb_struct_modify(s); + return RSTRUCT_PTR(s)[i] = val; +} + +static VALUE +struct_entry(VALUE s, long n) +{ + return rb_struct_aref(s, LONG2NUM(n)); +} + +/* + * call-seq: + * struct.values_at(selector,... ) => an_array + * + * Returns an array containing the elements in + * _self_ corresponding to the given selector(s). The selectors + * may be either integer indices or ranges. + * See also .select. + * + * a = %w{ a b c d e f } + * a.values_at(1, 3, 5) + * a.values_at(1, 3, 5, 7) + * a.values_at(-1, -3, -5, -7) + * a.values_at(1..3, 2...5) + */ + +static VALUE +rb_struct_values_at(int argc, VALUE *argv, VALUE s) +{ + return rb_get_values_at(s, RSTRUCT_LEN(s), argc, argv, struct_entry); +} + +/* + * call-seq: + * struct.select {|i| block } => array + * + * Invokes the block passing in successive elements from + * struct, returning an array containing those elements + * for which the block returns a true value (equivalent to + * Enumerable#select). + * + * Lots = Struct.new(:a, :b, :c, :d, :e, :f) + * l = Lots.new(11, 22, 33, 44, 55, 66) + * l.select {|v| (v % 2).zero? } #=> [22, 44, 66] + */ + +static VALUE +rb_struct_select(int argc, VALUE *argv, VALUE s) +{ + VALUE result; + long i; + + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + result = rb_ary_new(); + for (i = 0; i < RSTRUCT_LEN(s); i++) { + if (RTEST(rb_yield(RSTRUCT_PTR(s)[i]))) { + rb_ary_push(result, RSTRUCT_PTR(s)[i]); + } + } + + return result; +} + +static VALUE +recursive_equal(VALUE s, VALUE s2, int recur) +{ + long i; + + if (recur) return Qtrue; /* Subtle! */ + for (i=0; i true or false + * + * Equality---Returns true if other_struct is + * equal to this one: they must be of the same class as generated by + * Struct::new, and the values of all instance variables + * must be equal (according to Object#==). + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joejr = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * jane = Customer.new("Jane Doe", "456 Elm, Anytown NC", 12345) + * joe == joejr #=> true + * joe == jane #=> false + */ + +static VALUE +rb_struct_equal(VALUE s, VALUE s2) +{ + if (s == s2) return Qtrue; + if (TYPE(s2) != T_STRUCT) return Qfalse; + if (rb_obj_class(s) != rb_obj_class(s2)) return Qfalse; + if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) { + rb_bug("inconsistent struct"); /* should never happen */ + } + + return rb_exec_recursive_paired(recursive_equal, s, s2, s2); +} + +/* + * call-seq: + * struct.hash => fixnum + * + * Return a hash value based on this struct's contents. + */ + +static VALUE +rb_struct_hash(VALUE s) +{ + long i, h; + VALUE n; + + h = rb_hash(rb_obj_class(s)); + for (i = 0; i < RSTRUCT_LEN(s); i++) { + h = (h << 1) | (h<0 ? 1 : 0); + n = rb_hash(RSTRUCT_PTR(s)[i]); + h ^= NUM2LONG(n); + } + return LONG2FIX(h); +} + +static VALUE +recursive_eql(VALUE s, VALUE s2, int recur) +{ + long i; + + if (recur) return Qtrue; /* Subtle! */ + for (i=0; i true or false + * + * Two structures are equal if they are the same object, or if all their + * fields are equal (using eql?). + */ + +static VALUE +rb_struct_eql(VALUE s, VALUE s2) +{ + if (s == s2) return Qtrue; + if (TYPE(s2) != T_STRUCT) return Qfalse; + if (rb_obj_class(s) != rb_obj_class(s2)) return Qfalse; + if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) { + rb_bug("inconsistent struct"); /* should never happen */ + } + + return rb_exec_recursive_paired(recursive_eql, s, s2, s2); +} + +/* + * call-seq: + * struct.length => fixnum + * struct.size => fixnum + * + * Returns the number of instance variables. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.length #=> 3 + */ + +static VALUE +rb_struct_size(VALUE s) +{ + return LONG2FIX(RSTRUCT_LEN(s)); +} + +/* + * A Struct is a convenient way to bundle a number of + * attributes together, using accessor methods, without having to write + * an explicit class. + * + * The Struct class is a generator of specific classes, + * each one of which is defined to hold a set of variables and their + * accessors. In these examples, we'll call the generated class + * ``CustomerClass,'' and we'll show an example instance of that + * class as ``CustomerInst.'' + * + * In the descriptions that follow, the parameter symbol refers + * to a symbol, which is either a quoted string or a + * Symbol (such as :name). + */ +void +Init_Struct(void) +{ + rb_cStruct = rb_define_class("Struct", rb_cObject); + rb_include_module(rb_cStruct, rb_mEnumerable); + + rb_undef_alloc_func(rb_cStruct); + rb_define_singleton_method(rb_cStruct, "new", rb_struct_s_def, -1); + + rb_define_method(rb_cStruct, "initialize", rb_struct_initialize_m, -1); + rb_define_method(rb_cStruct, "initialize_copy", rb_struct_init_copy, 1); + + rb_define_method(rb_cStruct, "==", rb_struct_equal, 1); + rb_define_method(rb_cStruct, "eql?", rb_struct_eql, 1); + rb_define_method(rb_cStruct, "hash", rb_struct_hash, 0); + + rb_define_method(rb_cStruct, "to_s", rb_struct_inspect, 0); + rb_define_method(rb_cStruct, "inspect", rb_struct_inspect, 0); + rb_define_method(rb_cStruct, "to_a", rb_struct_to_a, 0); + rb_define_method(rb_cStruct, "values", rb_struct_to_a, 0); + rb_define_method(rb_cStruct, "size", rb_struct_size, 0); + rb_define_method(rb_cStruct, "length", rb_struct_size, 0); + + rb_define_method(rb_cStruct, "each", rb_struct_each, 0); + rb_define_method(rb_cStruct, "each_pair", rb_struct_each_pair, 0); + rb_define_method(rb_cStruct, "[]", rb_struct_aref, 1); + rb_define_method(rb_cStruct, "[]=", rb_struct_aset, 2); + rb_define_method(rb_cStruct, "select", rb_struct_select, -1); + rb_define_method(rb_cStruct, "values_at", rb_struct_values_at, -1); + + rb_define_method(rb_cStruct, "members", rb_struct_members_m, 0); +} diff --git a/template/encdb.h.tmpl b/template/encdb.h.tmpl new file mode 100644 index 0000000..e65b651 --- /dev/null +++ b/template/encdb.h.tmpl @@ -0,0 +1,79 @@ +<% +# +# OnigEncodingDefine(foo, Foo) = { +# .. +# "Shift_JIS", /* Canonical Name */ +# .. +# }; +# ENC_ALIAS("SJIS", "Shift_JIS") +# ENC_REPLICATE("Windows-31J", "Shift_JIS") +# ENC_ALIAS("CP932", "Windows-31J") +# + +def check_duplication(defs, name, fn, line) + if defs[name] + raise ArgumentError, "%s:%d: encoding %s is already registered(%s:%d)" % + [fn, line, name, *defs[name]] + else + defs[name.upcase] = [fn,line] + end +end + +count = 0 +lines = [] +encodings = [] +defs = {} +encdirs = ARGV.dup +encdirs << 'enc' if encdirs.empty? +files = {} +encdirs.each do |encdir| + next unless File.directory?(encdir) + Dir.open(encdir) {|d| d.grep(/.+\.[ch]\z/)}.sort_by {|e| + e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten + }.each do |fn| + next if files[fn] + files[fn] = true + open(File.join(encdir,fn)) do |f| + orig = nil + name = nil + f.each_line do |line| + if (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line) + if $1 + check_duplication(defs, $1, fn, $.) + encodings << $1 + count += 1 + end + else + case line + when /^\s*rb_enc_register\(\s*"([^"]+)"/ + count += 1 + line = nil + when /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/ + raise ArgumentError, + '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' % + [fn, $., $2, $1] unless defs[$2.upcase] + count += 1 + when /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/ + raise ArgumentError, + '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' % + [fn, $., $2, $1] unless defs[$2.upcase] + when /^ENC_DUMMY\(\s*"([^"]+)"/ + count += 1 + else + next + end + check_duplication(defs, $1, fn, $.) + lines << line.sub(/;.*/m, "").chomp + ";" if line + end + end + end + end +end +encodings.each do |e| +%>ENC_DEFINE("<%=e%>"); +% end +% lines.each do |line| +<%=line%> +% end + +#define ENCODING_COUNT <%=count%> diff --git a/template/fake.rb.in b/template/fake.rb.in new file mode 100644 index 0000000..ed9416a --- /dev/null +++ b/template/fake.rb.in @@ -0,0 +1,15 @@ +class Object + CROSS_COMPILING = RUBY_PLATFORM + remove_const :RUBY_PLATFORM + remove_const :RUBY_VERSION + remove_const :RUBY_DESCRIPTION if defined?(RUBY_DESCRIPTION) + RUBY_PLATFORM = "@arch@" + RUBY_VERSION = "@MAJOR@.@MINOR@.@TEENY@" + RUBY_DESCRIPTION = "ruby #{RUBY_VERSION} (#{Time.now.strftime("%Y-%m-%d")}) [#{RUBY_PLATFORM}]" +end +if RUBY_PLATFORM =~ /mswin|bccwin|mingw/ + class File + remove_const :ALT_SEPARATOR + ALT_SEPARATOR = "\\" + end +end diff --git a/template/id.h.tmpl b/template/id.h.tmpl new file mode 100644 index 0000000..b511f9c --- /dev/null +++ b/template/id.h.tmpl @@ -0,0 +1,127 @@ +%# -*- c -*- +/* DO NOT EDIT THIS FILE DIRECTLY */ +/********************************************************************** + + id.h - + + $Author: yugui $ + created at: Sun Oct 19 21:12:51 2008 + + Copyright (C) 2007 Koichi Sasada + +**********************************************************************/ +<% +require 'optparse' +vpath = ["."] +input = nil +opt = OptionParser.new do |o| + o.on('-v', '--vpath=DIR') {|dirs| vpath.concat dirs.split(File::PATH_SEPARATOR)} + input, = o.order!(ARGV) +end or abort opt.opt_s + +tokens = nil +vpath.find do |dir| + begin + if line = File.read(File.join(dir, input))[/^\s*enum\s+yytokentype\s*\{([^{}]*)\s*\};/m, 1] + tokens = line.scan(/\b(t(?:LAST_TOKEN|U(?:PLUS|MINUS)|POW|CMP|EQQ?|[NGL]EQ|(?:AND|OR)OP|N?MATCH|DOT\d|AREF|ASET|[LR]SHFT|LAMBDA)|id\w+)\s*=\s*(\d+),?/m) + end + rescue Errno::ENOENT + nil + else + true + end +end +%> +#ifndef RUBY_ID_H +#define RUBY_ID_H + +#define ID_SCOPE_SHIFT 3 +#define ID_SCOPE_MASK 0x07 +#define ID_LOCAL 0x00 +#define ID_INSTANCE 0x01 +#define ID_GLOBAL 0x03 +#define ID_ATTRSET 0x04 +#define ID_CONST 0x05 +#define ID_CLASS 0x06 +#define ID_JUNK 0x07 +#define ID_INTERNAL ID_JUNK + +#ifdef USE_PARSE_H +#include "parse.h" +#endif + +#define symIFUNC ID2SYM(idIFUNC) +#define symCFUNC ID2SYM(idCFUNC) + +#if !defined tLAST_TOKEN && defined YYTOKENTYPE +#define tLAST_TOKEN tLAST_TOKEN +#endif + +enum ruby_method_ids { +#ifndef tLAST_TOKEN +% tokens.each do |token, value| + <%=token%> = <%=value%>, +% end +#endif + idPLUS = '+', + idMINUS = '-', + idMULT = '*', + idDIV = '/', + idMOD = '%', + idLT = '<', + idLTLT = tLSHFT, + idLE = tLEQ, + idGT = '>', + idGE = tGEQ, + idEq = tEQ, + idEqq = tEQQ, + idNeq = tNEQ, + idNot = '!', + idBackquote = '`', + idEqTilde = tMATCH, + idAREF = tAREF, + idASET = tASET, + idLAST_TOKEN = tLAST_TOKEN >> ID_SCOPE_SHIFT, + tIntern, + tMethodMissing, + tLength, + tGets, + tSucc, + tEach, + tLambda, + tSend, + t__send__, + tInitialize, +#if SUPPORT_JOKE + tBitblt, + tAnswer, +#endif + tLAST_ID, +#define TOKEN2ID(n) id##n = ((t##n<, <%=value%>); +% end +}; +#endif + +#endif /* RUBY_ID_H */ diff --git a/template/insns.inc.tmpl b/template/insns.inc.tmpl new file mode 100644 index 0000000..a64b39c --- /dev/null +++ b/template/insns.inc.tmpl @@ -0,0 +1,20 @@ +/** -*-c-*- + This file contains YARV instructions list. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/insns.inc.tmpl' + or insns2vm.rb + */ + + +/* BIN : Basic Instruction Name */ +#define BIN(n) YARVINSN_##n + +enum ruby_vminsn_type { +<%= insns %> + VM_INSTRUCTION_SIZE = <%= @insns.size %> +}; + diff --git a/template/insns_info.inc.tmpl b/template/insns_info.inc.tmpl new file mode 100644 index 0000000..a58ac3c --- /dev/null +++ b/template/insns_info.inc.tmpl @@ -0,0 +1,83 @@ +/** -*-c-*- + This file contains instruction information for yarv instruction sequence. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/insns_info.inc.tmpl' + or insns2vm.rb + */ + +<%= insn_type_chars %> + +static const char *const insn_name_info[] = { +<%= insn_names %> +}; + +static const char *const insn_operand_info[] = { +<%= operands_info %> +}; + +static const int insn_len_info[] = { +<%= operands_num_info %> +}; + +#ifdef USE_INSN_RET_NUM +static const int insn_stack_push_num_info[] = { +<%= stack_num_info %> +}; +#endif + +#ifdef USE_INSN_STACK_INCREASE +static int +insn_stack_increase(int depth, int insn, VALUE *opes) +{ + switch(insn){ +<%= stack_increase %> + default: + rb_bug("insn_sp_increase: unreachable"); + } + return 0; +} +#endif + +/* some utilities */ + +static int +insn_len(int insn) +{ + return insn_len_info[insn]; +} + +static const char * +insn_name(int insn) +{ + return insn_name_info[insn]; +} + +static const char * +insn_op_types(int insn) +{ + return insn_operand_info[insn]; +} + +static int +insn_op_type(int insn, int pos) +{ + int len = insn_len(insn) - 1; + if(pos < len){ + return insn_operand_info[insn][pos]; + } + else{ + return 0; + } +} + +#ifdef USE_INSN_RET_NUM +static int +insn_ret_num(int insn) +{ + return insn_stack_push_num_info[insn]; +} +#endif diff --git a/template/known_errors.inc.tmpl b/template/known_errors.inc.tmpl new file mode 100644 index 0000000..23bca21 --- /dev/null +++ b/template/known_errors.inc.tmpl @@ -0,0 +1,14 @@ +/** -*-c-*- + * DO NOT EDIT + * This file is automatically generated by tools/generic_erb.rb from + * template/known_errors.inc.tmpl and defs/known_errors.def. + */ + +% error_names = ARGF.read.split(/\s+/) +% error_names.each do |name| +#ifdef <%=name%> + set_syserr(<%=name%>, "<%=name%>"); +#else + set_syserr(0, "<%=name%>"); +#endif +% end diff --git a/template/minsns.inc.tmpl b/template/minsns.inc.tmpl new file mode 100644 index 0000000..f994c9b --- /dev/null +++ b/template/minsns.inc.tmpl @@ -0,0 +1,14 @@ +/** -*-c-*- + This file contains YARV instructions list, to define YARVCore::Instructions. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/minsns.inc.tmpl' + or insns2vm.rb + */ + +<%= defs %> + + diff --git a/template/opt_sc.inc.tmpl b/template/opt_sc.inc.tmpl new file mode 100644 index 0000000..f5ad339 --- /dev/null +++ b/template/opt_sc.inc.tmpl @@ -0,0 +1,32 @@ +/* -*-c-*- *********************************************************/ +/*******************************************************************/ +/*******************************************************************/ +/** + This file is for threaded code. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/opt_sc.inc.tmpl' + or rb/insns2vm.rb + */ + +#define SC_STATE_SIZE 6 + +#define SCS_XX 1 +#define SCS_AX 2 +#define SCS_BX 3 +#define SCS_AB 4 +#define SCS_BA 5 + +#define SC_ERROR 0xffffffff + +static const VALUE sc_insn_info[][SC_STATE_SIZE] = { +<%= sc_insn_info %> +}; + +static const VALUE sc_insn_next[] = { +<%= sc_insn_next %> +}; + diff --git a/template/optinsn.inc.tmpl b/template/optinsn.inc.tmpl new file mode 100644 index 0000000..c7828fe --- /dev/null +++ b/template/optinsn.inc.tmpl @@ -0,0 +1,30 @@ +/* -*-c-*- *********************************************************/ +/*******************************************************************/ +/*******************************************************************/ +/** + This file is for threaded code. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/optinsn.inc.tmpl' + or rb/insns2vm.rb + */ + +static INSN * +insn_operands_unification(INSN *insnobj){ +#ifdef OPT_OPERANDS_UNIFICATION + /* optimize rule */ + switch(insnobj->insn_id){ + +<%= rule %> + + default: + /* do nothing */; + break; + } +#endif + return insnobj; +} + diff --git a/template/optunifs.inc.tmpl b/template/optunifs.inc.tmpl new file mode 100644 index 0000000..9b268f0 --- /dev/null +++ b/template/optunifs.inc.tmpl @@ -0,0 +1,35 @@ +/* -*-c-*- *********************************************************/ +/*******************************************************************/ +/*******************************************************************/ +/** + This file is for threaded code. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/optunifs.inc.tmpl' + or rb/insns2vm.rb + */ + +/* + static const int UNIFIED_insn_name_1[] = {id, size, ...}; + static const int UNIFIED_insn_name_2[] = {id, size, ...}; + ... + + static const int *const UNIFIED_insn_name[] = {size, + UNIFIED_insn_name_1, + UNIFIED_insn_name_2, ...}; + ... + + static const int *const *const unified_insns_data[] = { + UNIFIED_insn_nameA, + UNIFIED_insn_nameB, ...}; + */ + +<%= unif_insns_each %> +<%= unif_insns %> +<%= unif_insns_data %> + +#undef GET_INSN_NAME + diff --git a/template/transdb.h.tmpl b/template/transdb.h.tmpl new file mode 100644 index 0000000..d0cf101 --- /dev/null +++ b/template/transdb.h.tmpl @@ -0,0 +1,59 @@ +<% +# +# static const rb_transcoder +# rb_from_US_ASCII = { +# "US-ASCII", "UTF-8", &from_US_ASCII, 1, 0, +# NULL, NULL, +# }; +# + +count = 0 +converters = {} +transdirs = ARGV.dup +transdirs << 'enc/trans' if transdirs.empty? + +transdirs = transdirs.sort_by {|td| + -td.length +}.inject([]) {|tds, td| + next tds unless File.directory?(td) + tds << td if tds.all? {|td2| !File.identical?(td2, td) } + tds +} + +files = {} +names_t = [] +converter_list = [] +transdirs.each do |transdir| + names = Dir.entries(transdir) + names_t += names.map {|n| /(?!\A)\.trans\z/ =~ n ? $` : nil }.compact + names_c = names.map {|n| /(?!\A)\.c\z/ =~ n ? $` : nil }.compact + (names_t & names_c).map {|n| + "#{n}.c" + }.sort_by {|e| + e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten + }.each do |fn| + next if files[fn] + files[fn] = true + path = File.join(transdir,fn) + open(path) do |f| + f.each_line do |line| + if (/^static const rb_transcoder/ =~ line)..(/"(.*?)"\s*,\s*"(.*?)"/ =~ line) + if $1 && $2 + from_to = "%s to %s" % [$1, $2] + if converters[from_to] + raise ArgumentError, '%s:%d: transcode "%s" is already registered at %s:%d' % + [path, $., from_to, *converters[from_to].values_at(3, 4)] + else + converters[from_to] = [$1, $2, fn[0..-3], path, $.] + converter_list << from_to + end + end + end + end + end + end +end +converter_list.each do |from_to| + from, to, fn = *converters[from_to] +%>rb_declare_transcoder("<%=from%>", "<%=to%>", "<%=fn%>"); +% end diff --git a/template/vm.inc.tmpl b/template/vm.inc.tmpl new file mode 100644 index 0000000..d152665 --- /dev/null +++ b/template/vm.inc.tmpl @@ -0,0 +1,29 @@ +/* -*-c-*- *********************************************************/ +/*******************************************************************/ +/*******************************************************************/ +/** + This file is VM main loop. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'insns.c' + */ + +<%= +line = 15 +vm_body.gsub(/\n|__CURRENT_LINE__|__CURRENT_FILE__/){ + e = $& + case e + when '__CURRENT_LINE__' + line.to_s + when '__CURRENT_FILE__' + "vm.inc" + else + line += 1 + e + end +} +%> + diff --git a/template/vmtc.inc.tmpl b/template/vmtc.inc.tmpl new file mode 100644 index 0000000..f73bbc6 --- /dev/null +++ b/template/vmtc.inc.tmpl @@ -0,0 +1,18 @@ +/* -*-c-*- *********************************************************/ +/*******************************************************************/ +/*******************************************************************/ +/** + This file is for threaded code. + + ---- + This file is auto generated by insns2vm.rb + DO NOT TOUCH! + + If you want to fix something, you must edit 'template/vmtc.inc.tmpl' + or insns2vm.rb + */ + +static const void *const insns_address_table[] = { +<%= insns_table %> +}; + diff --git a/template/yarvarch.en b/template/yarvarch.en new file mode 100644 index 0000000..7a76e25 --- /dev/null +++ b/template/yarvarch.en @@ -0,0 +1,7 @@ +#title YARV: Yet another RubyVM - Software Architecture + +maybe writing. + +* YARV instruction set + +<%= d %> diff --git a/template/yarvarch.ja b/template/yarvarch.ja new file mode 100644 index 0000000..c332c20 --- /dev/null +++ b/template/yarvarch.ja @@ -0,0 +1,454 @@ +#title YARVA[LeN` +#set author { Ruby + + +- 2005-03-03(Thu) 00:31:12 +0900 + +---- + +* H + +[[YARV: Yet Another RubyVM|http://www.atdot.net/yarv]] vB + + +YARV ARuby vO@\B + +- Compiler +- VM Generator +- VM (Virtual Machine) +- Assembler +- Dis-Assembler +- (experimental) JIT Compiler +- (experimental) AOT Compiler + + + YARV Ruby C^v^gCuB +ARuby C^v^Kv@\ip[TAIuWFNgA +gCujpB + +Apb` Ruby C^v^B + +ARuby {C^v^ieval.cjuw +Jp\B + + +* Compiler (compile.h, compile.c) + +RpCARuby C^v^p[T\iRNode +f[^j YARV BYARV q +B + +AXR[vJn[J +sA\HB + + Ruby Array IuWFNg YARV IuWFNgAIy +hi[As`BRpCAR +pCAYARV +ARuby C^v^|y +iK[x[WRN^I +jB + +YARV AqAIyhA 1 word i}V +\RlBC |C^TCYBRuby C^v^p + VALUE TCYj\BAYARV uoCg +R[hvBAYARV uvp +gB + +1 word ApAANZXx +lA{lBIyhR +X^gv[i[ACfbNXIyh\ +AANZX\eoApB + + +* VM Generator (rb/insns2vm.rb, insns.def) + +rb/insns2vm.rb XNvgAinsns.def t@CA +VM Kvt@CBIAs +ARpCKvAKKvAAZ +uAtAZuKvt@CB + + +** Lq + +insns.def AeLqBI +LqB + +- O +- JeSARgipA{j +- IyhO +- sOX^bN|bvl +- sX^bNvbVl +- WbNiC Lqj + +AX^bN self putself Lq +B + +#code +/** + @c put + @e put self. + @j self uB + */ +DEFINE_INSN +putself +() +() +(VALUE val) +{ + val = GET_SELF(); +} +#end + +AIyhAX^bN|bvlB +IAself X^bNgbvuA val A +X^bNvbVlA +X^bNgbvu C vOB + +tH[}bg insns.def `QB +vB + +insnhelper.h t@CAWbNLqKv}N +`BAVM \` vm.h t@C +B + + +* VM (Virtual Machine, vm.h, vm.c) + +VM ARpC YARV sB +A YARV LAI eval.c VM u +lB + + Ruby C^v^sA VM +iiKSAjB + +VM APX^bN}VBXbhX^b +NBX^bNq[vA_ +\B + + +** WX^ + +VM 5 zIWX^B + +- PC (Program Counter) +- SP (Stack Pointer) +- CFP (Control Frame Pointer) +- LFP (Local Frame Pointer) +- DFP (Dynamic Frame Pointer) + +PC suBSP X^bNgbvu +BCFPALFPADFP t[BqB + + +** X^bNt[ + +obsolete (update soon) + + +** t[fUC + +Lisp nAubN[Jt[\ +bh[Jt[pB +t[Aq\A[JANZXqO +HKiAlfp KvjB + +ARuby BA\bh[J +AIubNselficallee recieverjB +t[B + +ARuby2.0 ubN[JiubN[J +cA\jBA\bh[J +ANZXp\zB + +A\bh[JANZXt[iXR[vj +XgfAI\bh[JXR[v +ubNt[AubNt[\bh[Jt[ + lfpWX^eANZXB + + +** \bho + +\bhoAYARV Lq\bhAC Lq +\bhfBXpb`@B + +YARV AqX^bNt[p +B VM AosB + +C Lq\bhAPoiA +obNg[X\bhot +sjB + +AVM pX^bNrpAvO}V +X^bNg\iC -> Ruby -> C -> ... +ojBAdlB + + +** O + +OAJava JVM lOe[upBO +AYt[AOe[uBAO + PC lvGgAGg] +BGgAX^bNT +lXR[vOe[uB + +AbreakAreturniubNjAretry ldgB + +*** Oe[u + +Oe[uGgIi[B + +- PC +- O +- Wvij +- NubN iseq + + +*** rescue + +rescue ubNB$! lB +B + +#code +begin +rescue A +rescue B +rescue C +end +#end + +A Ruby XNvgB + +#code +{|err| + case err + when A === err + when B === err + when C === err + else + raise # yarv throw + end +} +#end + + +*** ensure + +niOjniOj2 +BnAAR[hR +pCBAnubNBK +throw B + + +*** break, returniubNjAretry + +break AubN return Aretry throw RpC +BAbreak tbNOe[uGg +fB + + +** + +OARuby RpCBA +`\B + +ANZXRubyLqB + +#code +Ruby\: +expr::ID::...::ID +#end + +AyarvZbgB + +#code +(expr) +getconstant ID +... +getconstant ID +#end + + +*** pX + + expr nil ApX]B + Ruby 2.0 XB + ++ NXAW[IlXgWivOj[gH ++ pW[giObjectjH + +ANXAW[IlXgWB +Athread_object klass_nest_stack pB +AlXgB + +\bh`AlXg\bh`idupj +A\bhsAlXgQ\ +B + +gbvxAB + +NX/W[`sAQ +BANXXR[vANX`Rs[ +iRs[AsjB + +AIlXgIB + + +** K@ + +YARV IAK@p +BAqKsB + + +*** threaded code + +GCC C glxp direct threaded code +B + + +*** Peephole optimization + +PKB + + +*** inline method cache + +\bhB + + +*** inline constant cache + +B + + +*** ubN Proc IuWFNg + +ubNt\bhosubN Proc +IuWFNgBAKv Proc IuWFNg +}B + +Proc \bhAKv_AiXR[ +vmjq[vB + + +*** + +Fixnum mZosARXg +Av~eBus\bhop +pB + + +*** Z + + 1 BZ opt_insn_unif.def Lq +IB + + +*** IyhZ + +IyhBZ opt_operand.def +LqIB + + +*** stack caching + +X^bNgbvzWX^B 2 z +WX^zA5X^bNLbVOsBX^bNLb +VOIB + + +*** JIT Compile + +@B\BIR[hB +vOB + + +*** AOT Compile + +YARV C B\KsA +Brb/aotc.rb RpCB + + +* Assembler (rb/yasm.rb) + +YARV AZupBg rb/yasm.rb Q +iA@T|[g +jB + + +* Dis-Assembler (disasm.c) + +YARV IuWFNg YARVCore::InstructionSequence disasm +\bhBAtAZuB + + +* YARV Zbg + +<%= d %> + +* + +** eXg + +test/test_* eXgP[XBA~XBtA +eXgLqB + + +** x`}[N + +benchmark/bm_* x`}[NvOB + + +** \ + +A +BW eval.c u +B + + +*** Verifier + +YARV A~X\ +BAX^bNpOxt@C +AplB + + +*** Compiled File \z + +Ruby vOZbgVACYf[^\t@C +olBpxRpC +t@CA[hRpCARXg +B + + +**** S\ + +t@C\lAB + +#code +u4 : 4 byte unsigned storage +u2 : 2 byte unsigned storage +u1 : 1 byte unsigned storage + +every storages are little endian :-) + +CompiledFile{ + u4 magic; + + u2 major; + u2 minor; + + u4 character_code; + + u4 constants_pool_count; + ConstantEntry constants_pool[constants_pool_count]; + + u4 block_count; + blockEntry blocks[block_count]; + + u4 method_count; + MethodEntry methods[method_count]; +} +#end + +Java classfile pNB + diff --git a/template/yasmdata.rb.tmpl b/template/yasmdata.rb.tmpl new file mode 100644 index 0000000..95ad092 --- /dev/null +++ b/template/yasmdata.rb.tmpl @@ -0,0 +1,20 @@ +# -*-ruby-*- +# + +class VM + class InstructionSequence + class Instruction + InsnID2NO = { +<%= insn_id2no %> + } + + def self.id2insn_no id + if InsnID2NO.has_key? id + InsnID2NO[id] + end + end + end + end +end + + diff --git a/thread.c b/thread.c new file mode 100644 index 0000000..14a0145 --- /dev/null +++ b/thread.c @@ -0,0 +1,4015 @@ +/********************************************************************** + + thread.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +/* + YARV Thread Desgin + + model 1: Userlevel Thread + Same as traditional ruby thread. + + model 2: Native Thread with Global VM lock + Using pthread (or Windows thread) and Ruby threads run concurrent. + + model 3: Native Thread with fine grain lock + Using pthread and Ruby threads run concurrent or parallel. + +------------------------------------------------------------------------ + + model 2: + A thread has mutex (GVL: Global VM Lock or Giant VM Lock) can run. + When thread scheduling, running thread release GVL. If running thread + try blocking operation, this thread must release GVL and another + thread can continue this flow. After blocking operation, thread + must check interrupt (RUBY_VM_CHECK_INTS). + + Every VM can run parallel. + + Ruby threads are scheduled by OS thread scheduler. + +------------------------------------------------------------------------ + + model 3: + Every threads run concurrent or parallel and to access shared object + exclusive access control is needed. For example, to access String + object or Array object, fine grain lock must be locked every time. + */ + + +/* for model 2 */ + +#include "eval_intern.h" +#include "gc.h" + +#ifndef USE_NATIVE_THREAD_PRIORITY +#define USE_NATIVE_THREAD_PRIORITY 0 +#define RUBY_THREAD_PRIORITY_MAX 3 +#define RUBY_THREAD_PRIORITY_MIN -3 +#endif + +#ifndef THREAD_DEBUG +#define THREAD_DEBUG 0 +#endif + +VALUE rb_cMutex; +VALUE rb_cBarrier; + +static void sleep_timeval(rb_thread_t *th, struct timeval time); +static void sleep_wait_for_interrupt(rb_thread_t *th, double sleepsec); +static void sleep_forever(rb_thread_t *th, int nodeadlock); +static double timeofday(void); +struct timeval rb_time_interval(VALUE); +static int rb_thread_dead(rb_thread_t *th); + +static void rb_check_deadlock(rb_vm_t *vm); + +int rb_signal_buff_size(void); +void rb_signal_exec(rb_thread_t *th, int sig); +void rb_disable_interrupt(void); +void rb_thread_stop_timer_thread(void); + +static const VALUE eKillSignal = INT2FIX(0); +static const VALUE eTerminateSignal = INT2FIX(1); +static volatile int system_working = 1; + +inline static void +st_delete_wrap(st_table *table, st_data_t key) +{ + st_delete(table, &key, 0); +} + +/********************************************************************************/ + +#define THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION + +struct rb_blocking_region_buffer { + enum rb_thread_status prev_status; + struct rb_unblock_callback oldubf; +}; + +static void set_unblock_function(rb_thread_t *th, rb_unblock_function_t *func, void *arg, + struct rb_unblock_callback *old); +static void reset_unblock_function(rb_thread_t *th, const struct rb_unblock_callback *old); + +static void inline blocking_region_begin(rb_thread_t *th, struct rb_blocking_region_buffer *region, + rb_unblock_function_t *func, void *arg); +static void inline blocking_region_end(rb_thread_t *th, struct rb_blocking_region_buffer *region); + +#define GVL_UNLOCK_BEGIN() do { \ + rb_thread_t *_th_stored = GET_THREAD(); \ + rb_gc_save_machine_context(_th_stored); \ + native_mutex_unlock(&_th_stored->vm->global_vm_lock) + +#define GVL_UNLOCK_END() \ + native_mutex_lock(&_th_stored->vm->global_vm_lock); \ + rb_thread_set_current(_th_stored); \ +} while(0) + +#define BLOCKING_REGION_CORE(exec) do { \ + GVL_UNLOCK_BEGIN(); {\ + exec; \ + } \ + GVL_UNLOCK_END(); \ +} while(0); + +#define BLOCKING_REGION(exec, ubf, ubfarg) do { \ + rb_thread_t *__th = GET_THREAD(); \ + struct rb_blocking_region_buffer __region; \ + blocking_region_begin(__th, &__region, ubf, ubfarg); \ + exec; \ + blocking_region_end(__th, &__region); \ + RUBY_VM_CHECK_INTS(); \ +} while(0) + +#if THREAD_DEBUG +#ifdef HAVE_VA_ARGS_MACRO +void rb_thread_debug(const char *file, int line, const char *fmt, ...); +#define thread_debug(fmt, ...) rb_thread_debug(__FILE__, __LINE__, fmt, ##__VA_ARGS__) +#define POSITION_FORMAT "%s:%d:" +#define POSITION_ARGS ,file, line +#else +void rb_thread_debug(const char *fmt, ...); +#define thread_debug rb_thread_debug +#define POSITION_FORMAT +#define POSITION_ARGS +#endif + +# if THREAD_DEBUG < 0 +static int rb_thread_debug_enabled; + +static VALUE +rb_thread_s_debug(void) +{ + return INT2NUM(rb_thread_debug_enabled); +} + +static VALUE +rb_thread_s_debug_set(VALUE self, VALUE val) +{ + rb_thread_debug_enabled = RTEST(val); + return val; +} +# else +# define rb_thread_debug_enabled THREAD_DEBUG +# endif +#else +#define thread_debug if(0)printf +#endif + +#ifndef __ia64 +#define thread_start_func_2(th, st, rst) thread_start_func_2(th, st) +#endif +NOINLINE(static int thread_start_func_2(rb_thread_t *th, VALUE *stack_start, + VALUE *register_stack_start)); +static void timer_thread_function(void *); + +#if defined(_WIN32) +#include "thread_win32.c" + +#define DEBUG_OUT() \ + WaitForSingleObject(&debug_mutex, INFINITE); \ + printf(POSITION_FORMAT"%p - %s" POSITION_ARGS, GetCurrentThreadId(), buf); \ + fflush(stdout); \ + ReleaseMutex(&debug_mutex); + +#elif defined(HAVE_PTHREAD_H) +#include "thread_pthread.c" + +#define DEBUG_OUT() \ + pthread_mutex_lock(&debug_mutex); \ + printf(POSITION_FORMAT"%#"PRIxVALUE" - %s" POSITION_ARGS, (VALUE)pthread_self(), buf); \ + fflush(stdout); \ + pthread_mutex_unlock(&debug_mutex); + +#else +#error "unsupported thread type" +#endif + +#if THREAD_DEBUG +static int debug_mutex_initialized = 1; +static rb_thread_lock_t debug_mutex; + +void +rb_thread_debug( +#ifdef HAVE_VA_ARGS_MACRO + const char *file, int line, +#endif + const char *fmt, ...) +{ + va_list args; + char buf[BUFSIZ]; + + if (!rb_thread_debug_enabled) return; + + if (debug_mutex_initialized == 1) { + debug_mutex_initialized = 0; + native_mutex_initialize(&debug_mutex); + } + + va_start(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + + DEBUG_OUT(); +} +#endif + + +static void +set_unblock_function(rb_thread_t *th, rb_unblock_function_t *func, void *arg, + struct rb_unblock_callback *old) +{ + check_ints: + RUBY_VM_CHECK_INTS(); /* check signal or so */ + native_mutex_lock(&th->interrupt_lock); + if (th->interrupt_flag) { + native_mutex_unlock(&th->interrupt_lock); + goto check_ints; + } + else { + if (old) *old = th->unblock; + th->unblock.func = func; + th->unblock.arg = arg; + } + native_mutex_unlock(&th->interrupt_lock); +} + +static void +reset_unblock_function(rb_thread_t *th, const struct rb_unblock_callback *old) +{ + native_mutex_lock(&th->interrupt_lock); + th->unblock = *old; + native_mutex_unlock(&th->interrupt_lock); +} + +static void +rb_thread_interrupt(rb_thread_t *th) +{ + native_mutex_lock(&th->interrupt_lock); + RUBY_VM_SET_INTERRUPT(th); + if (th->unblock.func) { + (th->unblock.func)(th->unblock.arg); + } + else { + /* none */ + } + native_mutex_unlock(&th->interrupt_lock); +} + + +static int +terminate_i(st_data_t key, st_data_t val, rb_thread_t *main_thread) +{ + VALUE thval = key; + rb_thread_t *th; + GetThreadPtr(thval, th); + + if (th != main_thread) { + thread_debug("terminate_i: %p\n", (void *)th); + rb_thread_interrupt(th); + th->thrown_errinfo = eTerminateSignal; + th->status = THREAD_TO_KILL; + } + else { + thread_debug("terminate_i: main thread (%p)\n", (void *)th); + } + return ST_CONTINUE; +} + +typedef struct rb_mutex_struct +{ + rb_thread_lock_t lock; + rb_thread_cond_t cond; + struct rb_thread_struct volatile *th; + volatile int cond_waiting, cond_notified; + struct rb_mutex_struct *next_mutex; +} mutex_t; + +static void rb_mutex_unlock_all(mutex_t *mutex, rb_thread_t *th); + +void +rb_thread_terminate_all(void) +{ + rb_thread_t *th = GET_THREAD(); /* main thread */ + rb_vm_t *vm = th->vm; + VALUE einfo = vm->main_thread->errinfo; + int state; + + if (vm->main_thread != th) { + rb_bug("rb_thread_terminate_all: called by child thread (%p, %p)", + (void *)vm->main_thread, (void *)th); + } + + /* unlock all locking mutexes */ + if (th->keeping_mutexes) { + rb_mutex_unlock_all(th->keeping_mutexes, GET_THREAD()); + } + + thread_debug("rb_thread_terminate_all (main thread: %p)\n", (void *)th); + st_foreach(vm->living_threads, terminate_i, (st_data_t)th); + + while (!rb_thread_alone()) { + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + rb_thread_schedule(); + } + POP_TAG(); + if (state && einfo != vm->main_thread->errinfo && + RUBY_VM_SET_INTERRUPT(vm->main_thread)) { + break; + } + } + rb_thread_stop_timer_thread(); +} + +static void +thread_cleanup_func_before_exec(void *th_ptr) +{ + rb_thread_t *th = th_ptr; + th->status = THREAD_KILLED; + th->machine_stack_start = th->machine_stack_end = 0; +#ifdef __ia64 + th->machine_register_stack_start = th->machine_register_stack_end = 0; +#endif +} + +static void +thread_cleanup_func(void *th_ptr) +{ + rb_thread_t *th = th_ptr; + + /* unlock all locking mutexes */ + if (th->keeping_mutexes) { + rb_mutex_unlock_all(th->keeping_mutexes, th); + th->keeping_mutexes = NULL; + } + thread_cleanup_func_before_exec(th_ptr); + native_thread_destroy(th); +} + +extern void ruby_error_print(void); +static VALUE rb_thread_raise(int, VALUE *, rb_thread_t *); +void rb_thread_recycle_stack_release(VALUE *); + +void +ruby_thread_init_stack(rb_thread_t *th) +{ + native_thread_init_stack(th); +} + +static int +thread_start_func_2(rb_thread_t *th, VALUE *stack_start, VALUE *register_stack_start) +{ + int state; + VALUE args = th->first_args; + rb_proc_t *proc; + rb_thread_t *join_th; + rb_thread_t *main_th; + VALUE errinfo = Qnil; + + ruby_thread_set_native(th); + + th->machine_stack_start = stack_start; +#ifdef __ia64 + th->machine_register_stack_start = register_stack_start; +#endif + thread_debug("thread start: %p\n", (void *)th); + + native_mutex_lock(&th->vm->global_vm_lock); + { + thread_debug("thread start (get lock): %p\n", (void *)th); + rb_thread_set_current(th); + + TH_PUSH_TAG(th); + if ((state = EXEC_TAG()) == 0) { + SAVE_ROOT_JMPBUF(th, { + if (th->first_proc) { + GetProcPtr(th->first_proc, proc); + th->errinfo = Qnil; + th->local_lfp = proc->block.lfp; + th->local_svar = Qnil; + th->value = rb_vm_invoke_proc(th, proc, proc->block.self, + RARRAY_LEN(args), RARRAY_PTR(args), 0); + } + else { + th->value = (*th->first_func)((void *)th->first_args); + } + }); + } + else { + errinfo = th->errinfo; + if (NIL_P(errinfo)) errinfo = rb_errinfo(); + if (state == TAG_FATAL) { + /* fatal error within this thread, need to stop whole script */ + } + else if (rb_obj_is_kind_of(errinfo, rb_eSystemExit)) { + if (th->safe_level >= 4) { + th->errinfo = rb_exc_new3(rb_eSecurityError, + rb_sprintf("Insecure exit at level %d", th->safe_level)); + errinfo = Qnil; + } + } + else if (th->safe_level < 4 && + (th->vm->thread_abort_on_exception || + th->abort_on_exception || RTEST(ruby_debug))) { + /* exit on main_thread */ + } + else { + errinfo = Qnil; + } + th->value = Qnil; + } + + th->status = THREAD_KILLED; + thread_debug("thread end: %p\n", (void *)th); + + main_th = th->vm->main_thread; + if (th != main_th) { + if (TYPE(errinfo) == T_OBJECT) { + /* treat with normal error object */ + rb_thread_raise(1, &errinfo, main_th); + } + } + TH_POP_TAG(); + + /* locking_mutex must be Qfalse */ + if (th->locking_mutex != Qfalse) { + rb_bug("thread_start_func_2: locking_mutex must not be set (%p:%"PRIxVALUE")", + (void *)th, th->locking_mutex); + } + + /* delete self other than main thread from living_threads */ + if (th != main_th) { + st_delete_wrap(th->vm->living_threads, th->self); + } + + /* wake up joinning threads */ + join_th = th->join_list_head; + while (join_th) { + if (join_th == main_th) errinfo = Qnil; + rb_thread_interrupt(join_th); + switch (join_th->status) { + case THREAD_STOPPED: case THREAD_STOPPED_FOREVER: + join_th->status = THREAD_RUNNABLE; + default: break; + } + join_th = join_th->join_list_next; + } + + if (!th->root_fiber) { + rb_thread_recycle_stack_release(th->stack); + th->stack = 0; + } + } + thread_cleanup_func(th); + if (th != main_th) rb_check_deadlock(th->vm); + if (th->vm->main_thread == th) { + ruby_cleanup(state); + } + native_mutex_unlock(&th->vm->global_vm_lock); + + return 0; +} + +static VALUE +thread_create_core(VALUE thval, VALUE args, VALUE (*fn)(ANYARGS)) +{ + rb_thread_t *th; + + if (OBJ_FROZEN(GET_THREAD()->thgroup)) { + rb_raise(rb_eThreadError, + "can't start a new thread (frozen ThreadGroup)"); + } + GetThreadPtr(thval, th); + + /* setup thread environment */ + th->first_func = fn; + th->first_proc = fn ? Qfalse : rb_block_proc(); + th->first_args = args; /* GC: shouldn't put before above line */ + + th->priority = GET_THREAD()->priority; + th->thgroup = GET_THREAD()->thgroup; + + native_mutex_initialize(&th->interrupt_lock); + /* kick thread */ + st_insert(th->vm->living_threads, thval, (st_data_t) th->thread_id); + native_thread_create(th); + return thval; +} + +static VALUE +thread_s_new(int argc, VALUE *argv, VALUE klass) +{ + rb_thread_t *th; + VALUE thread = rb_thread_alloc(klass); + rb_obj_call_init(thread, argc, argv); + GetThreadPtr(thread, th); + if (!th->first_args) { + rb_raise(rb_eThreadError, "uninitialized thread - check `%s#initialize'", + rb_class2name(klass)); + } + return thread; +} + +/* + * call-seq: + * Thread.start([args]*) {|args| block } => thread + * Thread.fork([args]*) {|args| block } => thread + * + * Basically the same as Thread::new. However, if class + * Thread is subclassed, then calling start in that + * subclass will not invoke the subclass's initialize method. + */ + +static VALUE +thread_start(VALUE klass, VALUE args) +{ + return thread_create_core(rb_thread_alloc(klass), args, 0); +} + +static VALUE +thread_initialize(VALUE thread, VALUE args) +{ + rb_thread_t *th; + if (!rb_block_given_p()) { + rb_raise(rb_eThreadError, "must be called with a block"); + } + GetThreadPtr(thread, th); + if (th->first_args) { + VALUE rb_proc_location(VALUE self); + VALUE proc = th->first_proc, line, loc; + const char *file; + if (!proc || !RTEST(loc = rb_proc_location(proc))) { + rb_raise(rb_eThreadError, "already initialized thread"); + } + file = RSTRING_PTR(RARRAY_PTR(loc)[0]); + if (NIL_P(line = RARRAY_PTR(loc)[1])) { + rb_raise(rb_eThreadError, "already initialized thread - %s", + file); + } + rb_raise(rb_eThreadError, "already initialized thread - %s:%d", + file, NUM2INT(line)); + } + return thread_create_core(thread, args, 0); +} + +VALUE +rb_thread_create(VALUE (*fn)(ANYARGS), void *arg) +{ + return thread_create_core(rb_thread_alloc(rb_cThread), (VALUE)arg, fn); +} + + +/* +infty, for this purpose */ +#define DELAY_INFTY 1E30 + +struct join_arg { + rb_thread_t *target, *waiting; + double limit; + int forever; +}; + +static VALUE +remove_from_join_list(VALUE arg) +{ + struct join_arg *p = (struct join_arg *)arg; + rb_thread_t *target_th = p->target, *th = p->waiting; + + if (target_th->status != THREAD_KILLED) { + rb_thread_t **pth = &target_th->join_list_head; + + while (*pth) { + if (*pth == th) { + *pth = th->join_list_next; + break; + } + pth = &(*pth)->join_list_next; + } + } + + return Qnil; +} + +static VALUE +thread_join_sleep(VALUE arg) +{ + struct join_arg *p = (struct join_arg *)arg; + rb_thread_t *target_th = p->target, *th = p->waiting; + double now, limit = p->limit; + + while (target_th->status != THREAD_KILLED) { + if (p->forever) { + sleep_forever(th, 1); + } + else { + now = timeofday(); + if (now > limit) { + thread_debug("thread_join: timeout (thid: %p)\n", + (void *)target_th->thread_id); + return Qfalse; + } + sleep_wait_for_interrupt(th, limit - now); + } + thread_debug("thread_join: interrupted (thid: %p)\n", + (void *)target_th->thread_id); + } + return Qtrue; +} + +static VALUE +thread_join(rb_thread_t *target_th, double delay) +{ + rb_thread_t *th = GET_THREAD(); + struct join_arg arg; + + arg.target = target_th; + arg.waiting = th; + arg.limit = timeofday() + delay; + arg.forever = delay == DELAY_INFTY; + + thread_debug("thread_join (thid: %p)\n", (void *)target_th->thread_id); + + if (target_th->status != THREAD_KILLED) { + th->join_list_next = target_th->join_list_head; + target_th->join_list_head = th; + if (!rb_ensure(thread_join_sleep, (VALUE)&arg, + remove_from_join_list, (VALUE)&arg)) { + return Qnil; + } + } + + thread_debug("thread_join: success (thid: %p)\n", + (void *)target_th->thread_id); + + if (target_th->errinfo != Qnil) { + VALUE err = target_th->errinfo; + + if (FIXNUM_P(err)) { + /* */ + } + else if (TYPE(target_th->errinfo) == T_NODE) { + rb_exc_raise(rb_vm_make_jump_tag_but_local_jump( + GET_THROWOBJ_STATE(err), GET_THROWOBJ_VAL(err))); + } + else { + /* normal exception */ + rb_exc_raise(err); + } + } + return target_th->self; +} + +/* + * call-seq: + * thr.join => thr + * thr.join(limit) => thr + * + * The calling thread will suspend execution and run thr. Does not + * return until thr exits or until limit seconds have passed. If + * the time limit expires, nil will be returned, otherwise + * thr is returned. + * + * Any threads not joined will be killed when the main program exits. If + * thr had previously raised an exception and the + * abort_on_exception and $DEBUG flags are not set + * (so the exception has not yet been processed) it will be processed at this + * time. + * + * a = Thread.new { print "a"; sleep(10); print "b"; print "c" } + * x = Thread.new { print "x"; Thread.pass; print "y"; print "z" } + * x.join # Let x thread finish, a will be killed on exit. + * + * produces: + * + * axyz + * + * The following example illustrates the limit parameter. + * + * y = Thread.new { 4.times { sleep 0.1; puts 'tick... ' }} + * puts "Waiting" until y.join(0.15) + * + * produces: + * + * tick... + * Waiting + * tick... + * Waitingtick... + * + * + * tick... + */ + +static VALUE +thread_join_m(int argc, VALUE *argv, VALUE self) +{ + rb_thread_t *target_th; + double delay = DELAY_INFTY; + VALUE limit; + + GetThreadPtr(self, target_th); + + rb_scan_args(argc, argv, "01", &limit); + if (!NIL_P(limit)) { + delay = rb_num2dbl(limit); + } + + return thread_join(target_th, delay); +} + +/* + * call-seq: + * thr.value => obj + * + * Waits for thr to complete (via Thread#join) and returns + * its value. + * + * a = Thread.new { 2 + 2 } + * a.value #=> 4 + */ + +static VALUE +thread_value(VALUE self) +{ + rb_thread_t *th; + GetThreadPtr(self, th); + thread_join(th, DELAY_INFTY); + return th->value; +} + +/* + * Thread Scheduling + */ + +static struct timeval +double2timeval(double d) +{ + struct timeval time; + + time.tv_sec = (int)d; + time.tv_usec = (int)((d - (int)d) * 1e6); + if (time.tv_usec < 0) { + time.tv_usec += (long)1e6; + time.tv_sec -= 1; + } + return time; +} + +static void +sleep_forever(rb_thread_t *th, int deadlockable) +{ + enum rb_thread_status prev_status = th->status; + + th->status = deadlockable ? THREAD_STOPPED_FOREVER : THREAD_STOPPED; + do { + if (deadlockable) { + th->vm->sleeper++; + rb_check_deadlock(th->vm); + } + native_sleep(th, 0); + if (deadlockable) { + th->vm->sleeper--; + } + RUBY_VM_CHECK_INTS(); + } while (th->status == THREAD_STOPPED_FOREVER); + th->status = prev_status; +} + +static void +getclockofday(struct timeval *tp) +{ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { + tp->tv_sec = ts.tv_sec; + tp->tv_usec = ts.tv_nsec / 1000; + } else +#endif + { + gettimeofday(tp, NULL); + } +} + +static void +sleep_timeval(rb_thread_t *th, struct timeval tv) +{ + struct timeval to, tvn; + enum rb_thread_status prev_status = th->status; + + getclockofday(&to); + to.tv_sec += tv.tv_sec; + if ((to.tv_usec += tv.tv_usec) >= 1000000) { + to.tv_sec++; + to.tv_usec -= 1000000; + } + + th->status = THREAD_STOPPED; + do { + native_sleep(th, &tv); + RUBY_VM_CHECK_INTS(); + getclockofday(&tvn); + if (to.tv_sec < tvn.tv_sec) break; + if (to.tv_sec == tvn.tv_sec && to.tv_usec <= tvn.tv_usec) break; + thread_debug("sleep_timeval: %ld.%.6ld > %ld.%.6ld\n", + (long)to.tv_sec, (long)to.tv_usec, + (long)tvn.tv_sec, (long)tvn.tv_usec); + tv.tv_sec = to.tv_sec - tvn.tv_sec; + if ((tv.tv_usec = to.tv_usec - tvn.tv_usec) < 0) { + --tv.tv_sec; + tv.tv_usec += 1000000; + } + } while (th->status == THREAD_STOPPED); + th->status = prev_status; +} + +void +rb_thread_sleep_forever() +{ + thread_debug("rb_thread_sleep_forever\n"); + sleep_forever(GET_THREAD(), 0); +} + +static void +rb_thread_sleep_deadly() +{ + thread_debug("rb_thread_sleep_deadly\n"); + sleep_forever(GET_THREAD(), 1); +} + +static double +timeofday(void) +{ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) + struct timespec tp; + + if (clock_gettime(CLOCK_MONOTONIC, &tp) == 0) { + return (double)tp.tv_sec + (double)tp.tv_nsec * 1e-9; + } else +#endif + { + struct timeval tv; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; + } +} + +static void +sleep_wait_for_interrupt(rb_thread_t *th, double sleepsec) +{ + sleep_timeval(th, double2timeval(sleepsec)); +} + +static void +sleep_for_polling(rb_thread_t *th) +{ + struct timeval time; + time.tv_sec = 0; + time.tv_usec = 100 * 1000; /* 0.1 sec */ + sleep_timeval(th, time); +} + +void +rb_thread_wait_for(struct timeval time) +{ + rb_thread_t *th = GET_THREAD(); + sleep_timeval(th, time); +} + +void +rb_thread_polling(void) +{ + RUBY_VM_CHECK_INTS(); + if (!rb_thread_alone()) { + rb_thread_t *th = GET_THREAD(); + sleep_for_polling(th); + } +} + +/* + * CAUTION: This function causes thread switching. + * rb_thread_check_ints() check ruby's interrupts. + * some interrupt needs thread switching/invoke handlers, + * and so on. + */ + +void +rb_thread_check_ints(void) +{ + RUBY_VM_CHECK_INTS(); +} + +/* + * Hidden API for tcl/tk wrapper. + * There is no guarantee to perpetuate it. + */ +int +rb_thread_check_trap_pending(void) +{ + return GET_THREAD()->exec_signal != 0; +} + +/* This function can be called in blocking region. */ +int +rb_thread_interrupted(VALUE thval) +{ + rb_thread_t *th; + GetThreadPtr(thval, th); + return RUBY_VM_INTERRUPTED(th); +} + +struct timeval rb_time_timeval(VALUE); + +void +rb_thread_sleep(int sec) +{ + rb_thread_wait_for(rb_time_timeval(INT2FIX(sec))); +} + +static void rb_thread_execute_interrupts_rec(rb_thread_t *, int); + +static void +rb_thread_schedule_rec(int sched_depth) +{ + thread_debug("rb_thread_schedule\n"); + if (!rb_thread_alone()) { + rb_thread_t *th = GET_THREAD(); + + thread_debug("rb_thread_schedule/switch start\n"); + + rb_gc_save_machine_context(th); + native_mutex_unlock(&th->vm->global_vm_lock); + { + native_thread_yield(); + } + native_mutex_lock(&th->vm->global_vm_lock); + + rb_thread_set_current(th); + thread_debug("rb_thread_schedule/switch done\n"); + + if (!sched_depth && UNLIKELY(GET_THREAD()->interrupt_flag)) { + rb_thread_execute_interrupts_rec(GET_THREAD(), sched_depth+1); + } + } +} + +void +rb_thread_schedule(void) +{ + rb_thread_schedule_rec(0); +} + +/* blocking region */ +static inline void +blocking_region_begin(rb_thread_t *th, struct rb_blocking_region_buffer *region, + rb_unblock_function_t *func, void *arg) +{ + region->prev_status = th->status; + th->blocking_region_buffer = region; + set_unblock_function(th, func, arg, ®ion->oldubf); + th->status = THREAD_STOPPED; + thread_debug("enter blocking region (%p)\n", (void *)th); + rb_gc_save_machine_context(th); + native_mutex_unlock(&th->vm->global_vm_lock); +} + +static inline void +blocking_region_end(rb_thread_t *th, struct rb_blocking_region_buffer *region) +{ + native_mutex_lock(&th->vm->global_vm_lock); + rb_thread_set_current(th); + thread_debug("leave blocking region (%p)\n", (void *)th); + remove_signal_thread_list(th); + th->blocking_region_buffer = 0; + reset_unblock_function(th, ®ion->oldubf); + if (th->status == THREAD_STOPPED) { + th->status = region->prev_status; + } +} + +struct rb_blocking_region_buffer * +rb_thread_blocking_region_begin(void) +{ + rb_thread_t *th = GET_THREAD(); + struct rb_blocking_region_buffer *region = ALLOC(struct rb_blocking_region_buffer); + blocking_region_begin(th, region, ubf_select, th); + return region; +} + +void +rb_thread_blocking_region_end(struct rb_blocking_region_buffer *region) +{ + rb_thread_t *th = GET_THREAD(); + blocking_region_end(th, region); + xfree(region); + RUBY_VM_CHECK_INTS(); +} + +/* + * rb_thread_blocking_region - permit concurrent/parallel execution. + * + * This function does: + * (1) release GVL. + * Other Ruby threads may run in parallel. + * (2) call func with data1. + * (3) aquire GVL. + * Other Ruby threads can not run in parallel any more. + * + * If another thread interrupts this thread (Thread#kill, signal deliverly, + * VM-shutdown request, and so on), `ubf()' is called (`ubf()' means + * "un-blocking function"). `ubf()' should interrupt `func()' execution. + * + * There are built-in ubfs and you can specify these ubfs. + * However, we can not guarantee our built-in ubfs interrupt + * your `func()' correctly. Be careful to use rb_thread_blocking_region(). + * + * * RUBY_UBF_IO: ubf for IO operation + * * RUBY_UBF_PROCESS: ubf for process operation + * + * NOTE: You can not execute most of Ruby C API and touch Ruby objects + * in `func()' and `ubf()' because current thread doesn't acquire + * GVL (cause synchronization problem). If you need to do it, + * read source code of C APIs and confirm by yourself. + * + * NOTE: In short, this API is difficult to use safely. I recommend you + * use other ways if you have. We lack experiences to use this API. + * Please report your problem related on it. + * + * Safe C API: + * * rb_thread_interrupted() - check interrupt flag + * * ruby_xalloc(), ruby_xrealloc(), ruby_xfree() - + * if they called without GVL, acquire GVL automatically. + */ +VALUE +rb_thread_blocking_region( + rb_blocking_function_t *func, void *data1, + rb_unblock_function_t *ubf, void *data2) +{ + VALUE val; + rb_thread_t *th = GET_THREAD(); + + if (ubf == RUBY_UBF_IO || ubf == RUBY_UBF_PROCESS) { + ubf = ubf_select; + data2 = th; + } + + BLOCKING_REGION({ + val = func(data1); + }, ubf, data2); + + return val; +} + +/* alias of rb_thread_blocking_region() */ + +VALUE +rb_thread_call_without_gvl( + rb_blocking_function_t *func, void *data1, + rb_unblock_function_t *ubf, void *data2) +{ + return rb_thread_blocking_region(func, data1, ubf, data2); +} + +/* + * rb_thread_call_with_gvl - re-enter into Ruby world while releasing GVL. + * + *** + *** This API is EXPERIMENTAL! + *** We do not guarantee that this API remains in ruby 1.9.2 or later. + *** + * + * While releasing GVL using rb_thread_blocking_region() or + * rb_thread_call_without_gvl(), you can not access Ruby values or invoke methods. + * If you need to access it, you must use this function rb_thread_call_with_gvl(). + * + * This function rb_thread_call_with_gvl() does: + * (1) acquire GVL. + * (2) call passed function `func'. + * (3) release GVL. + * (4) return a value which is returned at (2). + * + * NOTE: You should not return Ruby object at (2) because such Object + * will not marked. + * + * NOTE: If an exception is raised in `func', this function "DOES NOT" + * protect (catch) the exception. If you have any resources + * which should free before throwing exception, you need use + * rb_protect() in `func' and return a value which represents + * exception is raised. + * + * NOTE: This functions should not be called by a thread which + * is not created as Ruby thread (created by Thread.new or so). + * In other words, this function *DOES NOT* associate + * NON-Ruby thread to Ruby thread. + */ +void * +rb_thread_call_with_gvl(void *(*func)(void *), void *data1) +{ + rb_thread_t *th = ruby_thread_from_native(); + struct rb_blocking_region_buffer *brb; + struct rb_unblock_callback prev_unblock; + void *r; + + if (th == 0) { + /* Error is occurred, but we can't use rb_bug() + * because this thread is not Ruby's thread. + * What should we do? + */ + + fprintf(stderr, "[BUG] rb_thread_call_with_gvl() is called by non-ruby thread\n"); + exit(1); + } + + brb = (struct rb_blocking_region_buffer *)th->blocking_region_buffer; + prev_unblock = th->unblock; + + if (brb == 0) { + rb_bug("rb_thread_call_with_gvl: called by a thread which has GVL."); + } + + blocking_region_end(th, brb); + /* enter to Ruby world: You can access Ruby values, methods and so on. */ + r = (*func)(data1); + /* levae from Ruby world: You can not access Ruby values, etc. */ + blocking_region_begin(th, brb, prev_unblock.func, prev_unblock.arg); + return r; +} + +/* + * ruby_thread_has_gvl_p - check if current native thread has GVL. + * + *** + *** This API is EXPERIMENTAL! + *** We do not guarantee that this API remains in ruby 1.9.2 or later. + *** + */ + +int +ruby_thread_has_gvl_p(void) +{ + rb_thread_t *th = ruby_thread_from_native(); + + if (th && th->blocking_region_buffer == 0) { + return 1; + } + else { + return 0; + } +} + +/* + * call-seq: + * Thread.pass => nil + * + * Invokes the thread scheduler to pass execution to another thread. + * + * a = Thread.new { print "a"; Thread.pass; + * print "b"; Thread.pass; + * print "c" } + * b = Thread.new { print "x"; Thread.pass; + * print "y"; Thread.pass; + * print "z" } + * a.join + * b.join + * + * produces: + * + * axbycz + */ + +static VALUE +thread_s_pass(VALUE klass) +{ + rb_thread_schedule(); + return Qnil; +} + +/* + * + */ + +static void +rb_thread_execute_interrupts_rec(rb_thread_t *th, int sched_depth) +{ + if (GET_VM()->main_thread == th) { + while (rb_signal_buff_size() && !th->exec_signal) native_thread_yield(); + } + + if (th->raised_flag) return; + + while (th->interrupt_flag) { + enum rb_thread_status status = th->status; + int timer_interrupt = th->interrupt_flag & 0x01; + int finalizer_interrupt = th->interrupt_flag & 0x04; + + th->status = THREAD_RUNNABLE; + th->interrupt_flag = 0; + + /* signal handling */ + if (th->exec_signal) { + int sig = th->exec_signal; + th->exec_signal = 0; + rb_signal_exec(th, sig); + } + + /* exception from another thread */ + if (th->thrown_errinfo) { + VALUE err = th->thrown_errinfo; + th->thrown_errinfo = 0; + thread_debug("rb_thread_execute_interrupts: %ld\n", err); + + if (err == eKillSignal || err == eTerminateSignal) { + th->errinfo = INT2FIX(TAG_FATAL); + TH_JUMP_TAG(th, TAG_FATAL); + } + else { + rb_exc_raise(err); + } + } + th->status = status; + + if (finalizer_interrupt) { + rb_gc_finalize_deferred(); + } + + if (!sched_depth && timer_interrupt) { + sched_depth++; + EXEC_EVENT_HOOK(th, RUBY_EVENT_SWITCH, th->cfp->self, 0, 0); + + if (th->slice > 0) { + th->slice--; + } + else { + reschedule: + rb_thread_schedule_rec(sched_depth+1); + if (th->slice < 0) { + th->slice++; + goto reschedule; + } + else { + th->slice = th->priority; + } + } + } + } +} + +void +rb_thread_execute_interrupts(rb_thread_t *th) +{ + rb_thread_execute_interrupts_rec(th, 0); +} + +void +rb_gc_mark_threads(void) +{ + /* TODO: remove */ +} + +/*****************************************************/ + +static void +rb_thread_ready(rb_thread_t *th) +{ + rb_thread_interrupt(th); +} + +static VALUE +rb_thread_raise(int argc, VALUE *argv, rb_thread_t *th) +{ + VALUE exc; + + again: + if (rb_thread_dead(th)) { + return Qnil; + } + + if (th->thrown_errinfo != 0 || th->raised_flag) { + rb_thread_schedule(); + goto again; + } + + exc = rb_make_exception(argc, argv); + th->thrown_errinfo = exc; + rb_thread_ready(th); + return Qnil; +} + +void +rb_thread_signal_raise(void *thptr, int sig) +{ + VALUE argv[2]; + rb_thread_t *th = thptr; + + argv[0] = rb_eSignal; + argv[1] = INT2FIX(sig); + rb_thread_raise(2, argv, th->vm->main_thread); +} + +void +rb_thread_signal_exit(void *thptr) +{ + VALUE argv[2]; + rb_thread_t *th = thptr; + + argv[0] = rb_eSystemExit; + argv[1] = rb_str_new2("exit"); + rb_thread_raise(2, argv, th->vm->main_thread); +} + +#if defined(POSIX_SIGNAL) && defined(SIGSEGV) && defined(HAVE_SIGALTSTACK) +#define USE_SIGALTSTACK +#endif + +void +ruby_thread_stack_overflow(rb_thread_t *th) +{ + th->raised_flag = 0; +#ifdef USE_SIGALTSTACK + th->raised_flag = 0; + rb_exc_raise(sysstack_error); +#else + th->errinfo = sysstack_error; + TH_JUMP_TAG(th, TAG_RAISE); +#endif +} + +int +rb_thread_set_raised(rb_thread_t *th) +{ + if (th->raised_flag & RAISED_EXCEPTION) { + return 1; + } + th->raised_flag |= RAISED_EXCEPTION; + return 0; +} + +int +rb_thread_reset_raised(rb_thread_t *th) +{ + if (!(th->raised_flag & RAISED_EXCEPTION)) { + return 0; + } + th->raised_flag &= ~RAISED_EXCEPTION; + return 1; +} + +void +rb_thread_fd_close(int fd) +{ + /* TODO: fix me */ +} + +/* + * call-seq: + * thr.raise(exception) + * + * Raises an exception (see Kernel::raise) from thr. The + * caller does not have to be thr. + * + * Thread.abort_on_exception = true + * a = Thread.new { sleep(200) } + * a.raise("Gotcha") + * + * produces: + * + * prog.rb:3: Gotcha (RuntimeError) + * from prog.rb:2:in `initialize' + * from prog.rb:2:in `new' + * from prog.rb:2 + */ + +static VALUE +thread_raise_m(int argc, VALUE *argv, VALUE self) +{ + rb_thread_t *th; + GetThreadPtr(self, th); + rb_thread_raise(argc, argv, th); + return Qnil; +} + + +/* + * call-seq: + * thr.exit => thr or nil + * thr.kill => thr or nil + * thr.terminate => thr or nil + * + * Terminates thr and schedules another thread to be run. If this thread + * is already marked to be killed, exit returns the + * Thread. If this is the main thread, or the last thread, exits + * the process. + */ + +VALUE +rb_thread_kill(VALUE thread) +{ + rb_thread_t *th; + + GetThreadPtr(thread, th); + + if (th != GET_THREAD() && th->safe_level < 4) { + rb_secure(4); + } + if (th->status == THREAD_TO_KILL || th->status == THREAD_KILLED) { + return thread; + } + if (th == th->vm->main_thread) { + rb_exit(EXIT_SUCCESS); + } + + thread_debug("rb_thread_kill: %p (%p)\n", (void *)th, (void *)th->thread_id); + + rb_thread_interrupt(th); + th->thrown_errinfo = eKillSignal; + th->status = THREAD_TO_KILL; + + return thread; +} + + +/* + * call-seq: + * Thread.kill(thread) => thread + * + * Causes the given thread to exit (see Thread::exit). + * + * count = 0 + * a = Thread.new { loop { count += 1 } } + * sleep(0.1) #=> 0 + * Thread.kill(a) #=> # + * count #=> 93947 + * a.alive? #=> false + */ + +static VALUE +rb_thread_s_kill(VALUE obj, VALUE th) +{ + return rb_thread_kill(th); +} + + +/* + * call-seq: + * Thread.exit => thread + * + * Terminates the currently running thread and schedules another thread to be + * run. If this thread is already marked to be killed, exit + * returns the Thread. If this is the main thread, or the last + * thread, exit the process. + */ + +static VALUE +rb_thread_exit(void) +{ + return rb_thread_kill(GET_THREAD()->self); +} + + +/* + * call-seq: + * thr.wakeup => thr + * + * Marks thr as eligible for scheduling (it may still remain blocked on + * I/O, however). Does not invoke the scheduler (see Thread#run). + * + * c = Thread.new { Thread.stop; puts "hey!" } + * c.wakeup + * + * produces: + * + * hey! + */ + +VALUE +rb_thread_wakeup(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + + if (th->status == THREAD_KILLED) { + rb_raise(rb_eThreadError, "killed thread"); + } + rb_thread_ready(th); + if (th->status != THREAD_TO_KILL) { + th->status = THREAD_RUNNABLE; + } + return thread; +} + + +/* + * call-seq: + * thr.run => thr + * + * Wakes up thr, making it eligible for scheduling. + * + * a = Thread.new { puts "a"; Thread.stop; puts "c" } + * Thread.pass + * puts "Got here" + * a.run + * a.join + * + * produces: + * + * a + * Got here + * c + */ + +VALUE +rb_thread_run(VALUE thread) +{ + rb_thread_wakeup(thread); + rb_thread_schedule(); + return thread; +} + + +/* + * call-seq: + * Thread.stop => nil + * + * Stops execution of the current thread, putting it into a ``sleep'' state, + * and schedules execution of another thread. + * + * a = Thread.new { print "a"; Thread.stop; print "c" } + * Thread.pass + * print "b" + * a.run + * a.join + * + * produces: + * + * abc + */ + +VALUE +rb_thread_stop(void) +{ + if (rb_thread_alone()) { + rb_raise(rb_eThreadError, + "stopping only thread\n\tnote: use sleep to stop forever"); + } + rb_thread_sleep_deadly(); + return Qnil; +} + +static int +thread_list_i(st_data_t key, st_data_t val, void *data) +{ + VALUE ary = (VALUE)data; + rb_thread_t *th; + GetThreadPtr((VALUE)key, th); + + switch (th->status) { + case THREAD_RUNNABLE: + case THREAD_STOPPED: + case THREAD_STOPPED_FOREVER: + case THREAD_TO_KILL: + rb_ary_push(ary, th->self); + default: + break; + } + return ST_CONTINUE; +} + +/********************************************************************/ + +/* + * call-seq: + * Thread.list => array + * + * Returns an array of Thread objects for all threads that are + * either runnable or stopped. + * + * Thread.new { sleep(200) } + * Thread.new { 1000000.times {|i| i*i } } + * Thread.new { Thread.stop } + * Thread.list.each {|t| p t} + * + * produces: + * + * # + * # + * # + * # + */ + +VALUE +rb_thread_list(void) +{ + VALUE ary = rb_ary_new(); + st_foreach(GET_THREAD()->vm->living_threads, thread_list_i, ary); + return ary; +} + +VALUE +rb_thread_current(void) +{ + return GET_THREAD()->self; +} + +/* + * call-seq: + * Thread.current => thread + * + * Returns the currently executing thread. + * + * Thread.current #=> # + */ + +static VALUE +thread_s_current(VALUE klass) +{ + return rb_thread_current(); +} + +VALUE +rb_thread_main(void) +{ + return GET_THREAD()->vm->main_thread->self; +} + +static VALUE +rb_thread_s_main(VALUE klass) +{ + return rb_thread_main(); +} + + +/* + * call-seq: + * Thread.abort_on_exception => true or false + * + * Returns the status of the global ``abort on exception'' condition. The + * default is false. When set to true, or if the + * global $DEBUG flag is true (perhaps because the + * command line option -d was specified) all threads will abort + * (the process will exit(0)) if an exception is raised in any + * thread. See also Thread::abort_on_exception=. + */ + +static VALUE +rb_thread_s_abort_exc(void) +{ + return GET_THREAD()->vm->thread_abort_on_exception ? Qtrue : Qfalse; +} + + +/* + * call-seq: + * Thread.abort_on_exception= boolean => true or false + * + * When set to true, all threads will abort if an exception is + * raised. Returns the new state. + * + * Thread.abort_on_exception = true + * t1 = Thread.new do + * puts "In new thread" + * raise "Exception from thread" + * end + * sleep(1) + * puts "not reached" + * + * produces: + * + * In new thread + * prog.rb:4: Exception from thread (RuntimeError) + * from prog.rb:2:in `initialize' + * from prog.rb:2:in `new' + * from prog.rb:2 + */ + +static VALUE +rb_thread_s_abort_exc_set(VALUE self, VALUE val) +{ + rb_secure(4); + GET_THREAD()->vm->thread_abort_on_exception = RTEST(val); + return val; +} + + +/* + * call-seq: + * thr.abort_on_exception => true or false + * + * Returns the status of the thread-local ``abort on exception'' condition for + * thr. The default is false. See also + * Thread::abort_on_exception=. + */ + +static VALUE +rb_thread_abort_exc(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + return th->abort_on_exception ? Qtrue : Qfalse; +} + + +/* + * call-seq: + * thr.abort_on_exception= boolean => true or false + * + * When set to true, causes all threads (including the main + * program) to abort if an exception is raised in thr. The process will + * effectively exit(0). + */ + +static VALUE +rb_thread_abort_exc_set(VALUE thread, VALUE val) +{ + rb_thread_t *th; + rb_secure(4); + + GetThreadPtr(thread, th); + th->abort_on_exception = RTEST(val); + return val; +} + + +/* + * call-seq: + * thr.group => thgrp or nil + * + * Returns the ThreadGroup which contains thr, or nil if + * the thread is not a member of any group. + * + * Thread.main.group #=> # + */ + +VALUE +rb_thread_group(VALUE thread) +{ + rb_thread_t *th; + VALUE group; + GetThreadPtr(thread, th); + group = th->thgroup; + + if (!group) { + group = Qnil; + } + return group; +} + +static const char * +thread_status_name(enum rb_thread_status status) +{ + switch (status) { + case THREAD_RUNNABLE: + return "run"; + case THREAD_STOPPED: + case THREAD_STOPPED_FOREVER: + return "sleep"; + case THREAD_TO_KILL: + return "aborting"; + case THREAD_KILLED: + return "dead"; + default: + return "unknown"; + } +} + +static int +rb_thread_dead(rb_thread_t *th) +{ + return th->status == THREAD_KILLED; +} + + +/* + * call-seq: + * thr.status => string, false or nil + * + * Returns the status of thr: ``sleep'' if thr is + * sleeping or waiting on I/O, ``run'' if thr is executing, + * ``aborting'' if thr is aborting, false if + * thr terminated normally, and nil if thr + * terminated with an exception. + * + * a = Thread.new { raise("die now") } + * b = Thread.new { Thread.stop } + * c = Thread.new { Thread.exit } + * d = Thread.new { sleep } + * d.kill #=> # + * a.status #=> nil + * b.status #=> "sleep" + * c.status #=> false + * d.status #=> "aborting" + * Thread.current.status #=> "run" + */ + +static VALUE +rb_thread_status(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + + if (rb_thread_dead(th)) { + if (!NIL_P(th->errinfo) && !FIXNUM_P(th->errinfo) + /* TODO */ ) { + return Qnil; + } + return Qfalse; + } + return rb_str_new2(thread_status_name(th->status)); +} + + +/* + * call-seq: + * thr.alive? => true or false + * + * Returns true if thr is running or sleeping. + * + * thr = Thread.new { } + * thr.join #=> # + * Thread.current.alive? #=> true + * thr.alive? #=> false + */ + +static VALUE +rb_thread_alive_p(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + + if (rb_thread_dead(th)) + return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * thr.stop? => true or false + * + * Returns true if thr is dead or sleeping. + * + * a = Thread.new { Thread.stop } + * b = Thread.current + * a.stop? #=> true + * b.stop? #=> false + */ + +static VALUE +rb_thread_stop_p(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + + if (rb_thread_dead(th)) + return Qtrue; + if (th->status == THREAD_STOPPED || th->status == THREAD_STOPPED_FOREVER) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * thr.safe_level => integer + * + * Returns the safe level in effect for thr. Setting thread-local safe + * levels can help when implementing sandboxes which run insecure code. + * + * thr = Thread.new { $SAFE = 3; sleep } + * Thread.current.safe_level #=> 0 + * thr.safe_level #=> 3 + */ + +static VALUE +rb_thread_safe_level(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + + return INT2NUM(th->safe_level); +} + +/* + * call-seq: + * thr.inspect => string + * + * Dump the name, id, and status of _thr_ to a string. + */ + +static VALUE +rb_thread_inspect(VALUE thread) +{ + const char *cname = rb_obj_classname(thread); + rb_thread_t *th; + const char *status; + VALUE str; + + GetThreadPtr(thread, th); + status = thread_status_name(th->status); + str = rb_sprintf("#<%s:%p %s>", cname, (void *)thread, status); + OBJ_INFECT(str, thread); + + return str; +} + +VALUE +rb_thread_local_aref(VALUE thread, ID id) +{ + rb_thread_t *th; + VALUE val; + + GetThreadPtr(thread, th); + if (rb_safe_level() >= 4 && th != GET_THREAD()) { + rb_raise(rb_eSecurityError, "Insecure: thread locals"); + } + if (!th->local_storage) { + return Qnil; + } + if (st_lookup(th->local_storage, id, &val)) { + return val; + } + return Qnil; +} + +/* + * call-seq: + * thr[sym] => obj or nil + * + * Attribute Reference---Returns the value of a thread-local variable, using + * either a symbol or a string name. If the specified variable does not exist, + * returns nil. + * + * a = Thread.new { Thread.current["name"] = "A"; Thread.stop } + * b = Thread.new { Thread.current[:name] = "B"; Thread.stop } + * c = Thread.new { Thread.current["name"] = "C"; Thread.stop } + * Thread.list.each {|x| puts "#{x.inspect}: #{x[:name]}" } + * + * produces: + * + * #: C + * #: B + * #: A + * #: + */ + +static VALUE +rb_thread_aref(VALUE thread, VALUE id) +{ + return rb_thread_local_aref(thread, rb_to_id(id)); +} + +VALUE +rb_thread_local_aset(VALUE thread, ID id, VALUE val) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + + if (rb_safe_level() >= 4 && th != GET_THREAD()) { + rb_raise(rb_eSecurityError, "Insecure: can't modify thread locals"); + } + if (OBJ_FROZEN(thread)) { + rb_error_frozen("thread locals"); + } + if (!th->local_storage) { + th->local_storage = st_init_numtable(); + } + if (NIL_P(val)) { + st_delete_wrap(th->local_storage, id); + return Qnil; + } + st_insert(th->local_storage, id, val); + return val; +} + +/* + * call-seq: + * thr[sym] = obj => obj + * + * Attribute Assignment---Sets or creates the value of a thread-local variable, + * using either a symbol or a string. See also Thread#[]. + */ + +static VALUE +rb_thread_aset(VALUE self, ID id, VALUE val) +{ + return rb_thread_local_aset(self, rb_to_id(id), val); +} + +/* + * call-seq: + * thr.key?(sym) => true or false + * + * Returns true if the given string (or symbol) exists as a + * thread-local variable. + * + * me = Thread.current + * me[:oliver] = "a" + * me.key?(:oliver) #=> true + * me.key?(:stanley) #=> false + */ + +static VALUE +rb_thread_key_p(VALUE self, VALUE key) +{ + rb_thread_t *th; + ID id = rb_to_id(key); + + GetThreadPtr(self, th); + + if (!th->local_storage) { + return Qfalse; + } + if (st_lookup(th->local_storage, id, 0)) { + return Qtrue; + } + return Qfalse; +} + +static int +thread_keys_i(ID key, VALUE value, VALUE ary) +{ + rb_ary_push(ary, ID2SYM(key)); + return ST_CONTINUE; +} + +static int +vm_living_thread_num(rb_vm_t *vm) +{ + return vm->living_threads->num_entries; +} + +int +rb_thread_alone() +{ + int num = 1; + if (GET_THREAD()->vm->living_threads) { + num = vm_living_thread_num(GET_THREAD()->vm); + thread_debug("rb_thread_alone: %d\n", num); + } + return num == 1; +} + +/* + * call-seq: + * thr.keys => array + * + * Returns an an array of the names of the thread-local variables (as Symbols). + * + * thr = Thread.new do + * Thread.current[:cat] = 'meow' + * Thread.current["dog"] = 'woof' + * end + * thr.join #=> # + * thr.keys #=> [:dog, :cat] + */ + +static VALUE +rb_thread_keys(VALUE self) +{ + rb_thread_t *th; + VALUE ary = rb_ary_new(); + GetThreadPtr(self, th); + + if (th->local_storage) { + st_foreach(th->local_storage, thread_keys_i, ary); + } + return ary; +} + +/* + * call-seq: + * thr.priority => integer + * + * Returns the priority of thr. Default is inherited from the + * current thread which creating the new thread, or zero for the + * initial main thread; higher-priority threads will run before + * lower-priority threads. + * + * Thread.current.priority #=> 0 + */ + +static VALUE +rb_thread_priority(VALUE thread) +{ + rb_thread_t *th; + GetThreadPtr(thread, th); + return INT2NUM(th->priority); +} + + +/* + * call-seq: + * thr.priority= integer => thr + * + * Sets the priority of thr to integer. Higher-priority threads + * will run before lower-priority threads. + * + * count1 = count2 = 0 + * a = Thread.new do + * loop { count1 += 1 } + * end + * a.priority = -1 + * + * b = Thread.new do + * loop { count2 += 1 } + * end + * b.priority = -2 + * sleep 1 #=> 1 + * count1 #=> 622504 + * count2 #=> 5832 + */ + +static VALUE +rb_thread_priority_set(VALUE thread, VALUE prio) +{ + rb_thread_t *th; + int priority; + GetThreadPtr(thread, th); + + rb_secure(4); + +#if USE_NATIVE_THREAD_PRIORITY + th->priority = NUM2INT(prio); + native_thread_apply_priority(th); +#else + priority = NUM2INT(prio); + if (priority > RUBY_THREAD_PRIORITY_MAX) { + priority = RUBY_THREAD_PRIORITY_MAX; + } + else if (priority < RUBY_THREAD_PRIORITY_MIN) { + priority = RUBY_THREAD_PRIORITY_MIN; + } + th->priority = priority; + th->slice = priority; +#endif + return INT2NUM(th->priority); +} + +/* for IO */ + +#if defined(NFDBITS) && defined(HAVE_RB_FD_INIT) + +/* + * several Unix platforms support file descriptors bigger than FD_SETSIZE + * in select(2) system call. + * + * - Linux 2.2.12 (?) + * - NetBSD 1.2 (src/sys/kern/sys_generic.c:1.25) + * select(2) documents how to allocate fd_set dynamically. + * http://netbsd.gw.com/cgi-bin/man-cgi?select++NetBSD-4.0 + * - FreeBSD 2.2 (src/sys/kern/sys_generic.c:1.19) + * - OpenBSD 2.0 (src/sys/kern/sys_generic.c:1.4) + * select(2) documents how to allocate fd_set dynamically. + * http://www.openbsd.org/cgi-bin/man.cgi?query=select&manpath=OpenBSD+4.4 + * - HP-UX documents how to allocate fd_set dynamically. + * http://docs.hp.com/en/B2355-60105/select.2.html + * - Solaris 8 has select_large_fdset + * + * When fd_set is not big enough to hold big file descriptors, + * it should be allocated dynamically. + * Note that this assumes fd_set is structured as bitmap. + * + * rb_fd_init allocates the memory. + * rb_fd_term free the memory. + * rb_fd_set may re-allocates bitmap. + * + * So rb_fd_set doesn't reject file descriptors bigger than FD_SETSIZE. + */ + +void +rb_fd_init(volatile rb_fdset_t *fds) +{ + fds->maxfd = 0; + fds->fdset = ALLOC(fd_set); + FD_ZERO(fds->fdset); +} + +void +rb_fd_term(rb_fdset_t *fds) +{ + if (fds->fdset) xfree(fds->fdset); + fds->maxfd = 0; + fds->fdset = 0; +} + +void +rb_fd_zero(rb_fdset_t *fds) +{ + if (fds->fdset) { + MEMZERO(fds->fdset, fd_mask, howmany(fds->maxfd, NFDBITS)); + FD_ZERO(fds->fdset); + } +} + +static void +rb_fd_resize(int n, rb_fdset_t *fds) +{ + int m = howmany(n + 1, NFDBITS) * sizeof(fd_mask); + int o = howmany(fds->maxfd, NFDBITS) * sizeof(fd_mask); + + if (m < sizeof(fd_set)) m = sizeof(fd_set); + if (o < sizeof(fd_set)) o = sizeof(fd_set); + + if (m > o) { + fds->fdset = realloc(fds->fdset, m); + memset((char *)fds->fdset + o, 0, m - o); + } + if (n >= fds->maxfd) fds->maxfd = n + 1; +} + +void +rb_fd_set(int n, rb_fdset_t *fds) +{ + rb_fd_resize(n, fds); + FD_SET(n, fds->fdset); +} + +void +rb_fd_clr(int n, rb_fdset_t *fds) +{ + if (n >= fds->maxfd) return; + FD_CLR(n, fds->fdset); +} + +int +rb_fd_isset(int n, const rb_fdset_t *fds) +{ + if (n >= fds->maxfd) return 0; + return FD_ISSET(n, fds->fdset) != 0; /* "!= 0" avoids FreeBSD PR 91421 */ +} + +void +rb_fd_copy(rb_fdset_t *dst, const fd_set *src, int max) +{ + int size = howmany(max, NFDBITS) * sizeof(fd_mask); + + if (size < sizeof(fd_set)) size = sizeof(fd_set); + dst->maxfd = max; + dst->fdset = realloc(dst->fdset, size); + memcpy(dst->fdset, src, size); +} + +int +rb_fd_select(int n, rb_fdset_t *readfds, rb_fdset_t *writefds, rb_fdset_t *exceptfds, struct timeval *timeout) +{ + fd_set *r = NULL, *w = NULL, *e = NULL; + if (readfds) { + rb_fd_resize(n - 1, readfds); + r = rb_fd_ptr(readfds); + } + if (writefds) { + rb_fd_resize(n - 1, writefds); + w = rb_fd_ptr(writefds); + } + if (exceptfds) { + rb_fd_resize(n - 1, exceptfds); + e = rb_fd_ptr(exceptfds); + } + return select(n, r, w, e, timeout); +} + +#undef FD_ZERO +#undef FD_SET +#undef FD_CLR +#undef FD_ISSET + +#define FD_ZERO(f) rb_fd_zero(f) +#define FD_SET(i, f) rb_fd_set(i, f) +#define FD_CLR(i, f) rb_fd_clr(i, f) +#define FD_ISSET(i, f) rb_fd_isset(i, f) + +#elif defined(_WIN32) + +void +rb_fd_init(volatile rb_fdset_t *set) +{ + set->capa = FD_SETSIZE; + set->fdset = ALLOC(fd_set); + FD_ZERO(set->fdset); +} + +void +rb_fd_term(rb_fdset_t *set) +{ + xfree(set->fdset); + set->fdset = NULL; + set->capa = 0; +} + +void +rb_fd_set(int fd, rb_fdset_t *set) +{ + unsigned int i; + SOCKET s = rb_w32_get_osfhandle(fd); + + for (i = 0; i < set->fdset->fd_count; i++) { + if (set->fdset->fd_array[i] == s) { + return; + } + } + if (set->fdset->fd_count >= set->capa) { + set->capa = (set->fdset->fd_count / FD_SETSIZE + 1) * FD_SETSIZE; + set->fdset = xrealloc(set->fdset, sizeof(unsigned int) + sizeof(SOCKET) * set->capa); + } + set->fdset->fd_array[set->fdset->fd_count++] = s; +} + +#undef FD_ZERO +#undef FD_SET +#undef FD_CLR +#undef FD_ISSET + +#define FD_ZERO(f) rb_fd_zero(f) +#define FD_SET(i, f) rb_fd_set(i, f) +#define FD_CLR(i, f) rb_fd_clr(i, f) +#define FD_ISSET(i, f) rb_fd_isset(i, f) + +#endif + +#if defined(__CYGWIN__) || defined(_WIN32) +static long +cmp_tv(const struct timeval *a, const struct timeval *b) +{ + long d = (a->tv_sec - b->tv_sec); + return (d != 0) ? d : (a->tv_usec - b->tv_usec); +} + +static int +subtract_tv(struct timeval *rest, const struct timeval *wait) +{ + while (rest->tv_usec < wait->tv_usec) { + if (rest->tv_sec <= wait->tv_sec) { + return 0; + } + rest->tv_sec -= 1; + rest->tv_usec += 1000 * 1000; + } + rest->tv_sec -= wait->tv_sec; + rest->tv_usec -= wait->tv_usec; + return 1; +} +#endif + +static int +do_select(int n, fd_set *read, fd_set *write, fd_set *except, + struct timeval *timeout) +{ + int result, lerrno; + fd_set orig_read, orig_write, orig_except; + +#ifndef linux + double limit = 0; + struct timeval wait_rest; +# if defined(__CYGWIN__) || defined(_WIN32) + struct timeval start_time; +# endif + + if (timeout) { +# if defined(__CYGWIN__) || defined(_WIN32) + gettimeofday(&start_time, NULL); + limit = (double)start_time.tv_sec + (double)start_time.tv_usec*1e-6; +# else + limit = timeofday(); +# endif + limit += (double)timeout->tv_sec+(double)timeout->tv_usec*1e-6; + wait_rest = *timeout; + timeout = &wait_rest; + } +#endif + + if (read) orig_read = *read; + if (write) orig_write = *write; + if (except) orig_except = *except; + + retry: + lerrno = 0; + +#if defined(__CYGWIN__) || defined(_WIN32) + { + int finish = 0; + /* polling duration: 100ms */ + struct timeval wait_100ms, *wait; + wait_100ms.tv_sec = 0; + wait_100ms.tv_usec = 100 * 1000; /* 100 ms */ + + do { + wait = (timeout == 0 || cmp_tv(&wait_100ms, timeout) > 0) ? &wait_100ms : timeout; + BLOCKING_REGION({ + do { + result = select(n, read, write, except, wait); + if (result < 0) lerrno = errno; + if (result != 0) break; + + if (read) *read = orig_read; + if (write) *write = orig_write; + if (except) *except = orig_except; + wait = &wait_100ms; + if (timeout) { + struct timeval elapsed; + gettimeofday(&elapsed, NULL); + subtract_tv(&elapsed, &start_time); + if (!subtract_tv(timeout, &elapsed)) { + finish = 1; + break; + } + if (cmp_tv(&wait_100ms, timeout) < 0) wait = timeout; + } + } while (__th->interrupt_flag == 0); + }, 0, 0); + } while (result == 0 && !finish); + } +#else + BLOCKING_REGION({ + result = select(n, read, write, except, timeout); + if (result < 0) lerrno = errno; + }, ubf_select, GET_THREAD()); +#endif + + errno = lerrno; + + if (result < 0) { + switch (errno) { + case EINTR: +#ifdef ERESTART + case ERESTART: +#endif + if (read) *read = orig_read; + if (write) *write = orig_write; + if (except) *except = orig_except; +#ifndef linux + if (timeout) { + double d = limit - timeofday(); + + wait_rest.tv_sec = (unsigned int)d; + wait_rest.tv_usec = (long)((d-(double)wait_rest.tv_sec)*1e6); + if (wait_rest.tv_sec < 0) wait_rest.tv_sec = 0; + if (wait_rest.tv_usec < 0) wait_rest.tv_usec = 0; + } +#endif + goto retry; + default: + break; + } + } + return result; +} + +static void +rb_thread_wait_fd_rw(int fd, int read) +{ + int result = 0; + thread_debug("rb_thread_wait_fd_rw(%d, %s)\n", fd, read ? "read" : "write"); + + if (fd < 0) { + rb_raise(rb_eIOError, "closed stream"); + } + if (rb_thread_alone()) return; + while (result <= 0) { + rb_fdset_t set; + rb_fd_init(&set); + FD_SET(fd, &set); + + if (read) { + result = do_select(fd + 1, rb_fd_ptr(&set), 0, 0, 0); + } + else { + result = do_select(fd + 1, 0, rb_fd_ptr(&set), 0, 0); + } + + rb_fd_term(&set); + + if (result < 0) { + rb_sys_fail(0); + } + } + + thread_debug("rb_thread_wait_fd_rw(%d, %s): done\n", fd, read ? "read" : "write"); +} + +void +rb_thread_wait_fd(int fd) +{ + rb_thread_wait_fd_rw(fd, 1); +} + +int +rb_thread_fd_writable(int fd) +{ + rb_thread_wait_fd_rw(fd, 0); + return Qtrue; +} + +int +rb_thread_select(int max, fd_set * read, fd_set * write, fd_set * except, + struct timeval *timeout) +{ + if (!read && !write && !except) { + if (!timeout) { + rb_thread_sleep_forever(); + return 0; + } + rb_thread_wait_for(*timeout); + return 0; + } + else { + return do_select(max, read, write, except, timeout); + } +} + + +/* + * for GC + */ + +#ifdef USE_CONSERVATIVE_STACK_END +void +rb_gc_set_stack_end(VALUE **stack_end_p) +{ + VALUE stack_end; + *stack_end_p = &stack_end; +} +#endif + +void +rb_gc_save_machine_context(rb_thread_t *th) +{ + SET_MACHINE_STACK_END(&th->machine_stack_end); + FLUSH_REGISTER_WINDOWS; +#ifdef __ia64 + th->machine_register_stack_end = rb_ia64_bsp(); +#endif + setjmp(th->machine_regs); +} + +/* + * + */ + +int rb_get_next_signal(void); + +static void +timer_thread_function(void *arg) +{ + rb_vm_t *vm = GET_VM(); /* TODO: fix me for Multi-VM */ + int sig; + rb_thread_t *mth; + + /* for time slice */ + RUBY_VM_SET_TIMER_INTERRUPT(vm->running_thread); + + /* check signal */ + mth = vm->main_thread; + if (!mth->exec_signal && (sig = rb_get_next_signal()) > 0) { + enum rb_thread_status prev_status = mth->status; + thread_debug("main_thread: %s, sig: %d\n", + thread_status_name(prev_status), sig); + mth->exec_signal = sig; + if (mth->status != THREAD_KILLED) mth->status = THREAD_RUNNABLE; + rb_thread_interrupt(mth); + mth->status = prev_status; + } + +#if 0 + /* prove profiler */ + if (vm->prove_profile.enable) { + rb_thread_t *th = vm->running_thread; + + if (vm->during_gc) { + /* GC prove profiling */ + } + } +#endif +} + +void +rb_thread_stop_timer_thread(void) +{ + if (timer_thread_id && native_stop_timer_thread()) { + native_thread_join(timer_thread_id); + timer_thread_id = 0; + } +} + +void +rb_thread_reset_timer_thread(void) +{ + timer_thread_id = 0; +} + +void +rb_thread_start_timer_thread(void) +{ + system_working = 1; + rb_thread_create_timer_thread(); +} + +static int +clear_coverage_i(st_data_t key, st_data_t val, st_data_t dummy) +{ + int i; + VALUE lines = (VALUE)val; + + for (i = 0; i < RARRAY_LEN(lines); i++) { + if (RARRAY_PTR(lines)[i] != Qnil) { + RARRAY_PTR(lines)[i] = INT2FIX(0); + } + } + return ST_CONTINUE; +} + +static void +clear_coverage(void) +{ + extern VALUE rb_get_coverages(void); + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages)) { + st_foreach(RHASH_TBL(coverages), clear_coverage_i, 0); + } +} + +static void +rb_thread_atfork_internal(int (*atfork)(st_data_t, st_data_t, st_data_t)) +{ + rb_thread_t *th = GET_THREAD(); + rb_vm_t *vm = th->vm; + VALUE thval = th->self; + vm->main_thread = th; + + st_foreach(vm->living_threads, atfork, (st_data_t)th); + st_clear(vm->living_threads); + st_insert(vm->living_threads, thval, (st_data_t)th->thread_id); + vm->sleeper = 0; + clear_coverage(); +} + +static int +terminate_atfork_i(st_data_t key, st_data_t val, st_data_t current_th) +{ + VALUE thval = key; + rb_thread_t *th; + GetThreadPtr(thval, th); + + if (th != (rb_thread_t *)current_th) { + thread_cleanup_func(th); + } + return ST_CONTINUE; +} + +void +rb_thread_atfork(void) +{ + rb_thread_atfork_internal(terminate_atfork_i); + rb_reset_random_seed(); +} + +static int +terminate_atfork_before_exec_i(st_data_t key, st_data_t val, st_data_t current_th) +{ + VALUE thval = key; + rb_thread_t *th; + GetThreadPtr(thval, th); + + if (th != (rb_thread_t *)current_th) { + thread_cleanup_func_before_exec(th); + } + return ST_CONTINUE; +} + +void +rb_thread_atfork_before_exec(void) +{ + rb_thread_atfork_internal(terminate_atfork_before_exec_i); +} + +struct thgroup { + int enclosed; + VALUE group; +}; + +/* + * Document-class: ThreadGroup + * + * ThreadGroup provides a means of keeping track of a number of + * threads as a group. A Thread can belong to only one + * ThreadGroup at a time; adding a thread to a new group will + * remove it from any previous group. + * + * Newly created threads belong to the same group as the thread from which they + * were created. + */ + +static VALUE thgroup_s_alloc(VALUE); +static VALUE +thgroup_s_alloc(VALUE klass) +{ + VALUE group; + struct thgroup *data; + + group = Data_Make_Struct(klass, struct thgroup, 0, -1, data); + data->enclosed = 0; + data->group = group; + + return group; +} + +struct thgroup_list_params { + VALUE ary; + VALUE group; +}; + +static int +thgroup_list_i(st_data_t key, st_data_t val, st_data_t data) +{ + VALUE thread = (VALUE)key; + VALUE ary = ((struct thgroup_list_params *)data)->ary; + VALUE group = ((struct thgroup_list_params *)data)->group; + rb_thread_t *th; + GetThreadPtr(thread, th); + + if (th->thgroup == group) { + rb_ary_push(ary, thread); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * thgrp.list => array + * + * Returns an array of all existing Thread objects that belong to + * this group. + * + * ThreadGroup::Default.list #=> [#] + */ + +static VALUE +thgroup_list(VALUE group) +{ + VALUE ary = rb_ary_new(); + struct thgroup_list_params param; + + param.ary = ary; + param.group = group; + st_foreach(GET_THREAD()->vm->living_threads, thgroup_list_i, (st_data_t) & param); + return ary; +} + + +/* + * call-seq: + * thgrp.enclose => thgrp + * + * Prevents threads from being added to or removed from the receiving + * ThreadGroup. New threads can still be started in an enclosed + * ThreadGroup. + * + * ThreadGroup::Default.enclose #=> # + * thr = Thread::new { Thread.stop } #=> # + * tg = ThreadGroup::new #=> # + * tg.add thr + * + * produces: + * + * ThreadError: can't move from the enclosed thread group + */ + +static VALUE +thgroup_enclose(VALUE group) +{ + struct thgroup *data; + + Data_Get_Struct(group, struct thgroup, data); + data->enclosed = 1; + + return group; +} + + +/* + * call-seq: + * thgrp.enclosed? => true or false + * + * Returns true if thgrp is enclosed. See also + * ThreadGroup#enclose. + */ + +static VALUE +thgroup_enclosed_p(VALUE group) +{ + struct thgroup *data; + + Data_Get_Struct(group, struct thgroup, data); + if (data->enclosed) + return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * thgrp.add(thread) => thgrp + * + * Adds the given thread to this group, removing it from any other + * group to which it may have previously belonged. + * + * puts "Initial group is #{ThreadGroup::Default.list}" + * tg = ThreadGroup.new + * t1 = Thread.new { sleep } + * t2 = Thread.new { sleep } + * puts "t1 is #{t1}" + * puts "t2 is #{t2}" + * tg.add(t1) + * puts "Initial group now #{ThreadGroup::Default.list}" + * puts "tg group now #{tg.list}" + * + * produces: + * + * Initial group is # + * t1 is # + * t2 is # + * Initial group now ## + * tg group now # + */ + +static VALUE +thgroup_add(VALUE group, VALUE thread) +{ + rb_thread_t *th; + struct thgroup *data; + + rb_secure(4); + GetThreadPtr(thread, th); + + if (OBJ_FROZEN(group)) { + rb_raise(rb_eThreadError, "can't move to the frozen thread group"); + } + Data_Get_Struct(group, struct thgroup, data); + if (data->enclosed) { + rb_raise(rb_eThreadError, "can't move to the enclosed thread group"); + } + + if (!th->thgroup) { + return Qnil; + } + + if (OBJ_FROZEN(th->thgroup)) { + rb_raise(rb_eThreadError, "can't move from the frozen thread group"); + } + Data_Get_Struct(th->thgroup, struct thgroup, data); + if (data->enclosed) { + rb_raise(rb_eThreadError, + "can't move from the enclosed thread group"); + } + + th->thgroup = group; + return group; +} + + +/* + * Document-class: Mutex + * + * Mutex implements a simple semaphore that can be used to coordinate access to + * shared data from multiple concurrent threads. + * + * Example: + * + * require 'thread' + * semaphore = Mutex.new + * + * a = Thread.new { + * semaphore.synchronize { + * # access shared resource + * } + * } + * + * b = Thread.new { + * semaphore.synchronize { + * # access shared resource + * } + * } + * + */ + +#define GetMutexPtr(obj, tobj) \ + Data_Get_Struct(obj, mutex_t, tobj) + +static const char *mutex_unlock(mutex_t *mutex, rb_thread_t volatile *th); + +static void +mutex_free(void *ptr) +{ + if (ptr) { + mutex_t *mutex = ptr; + if (mutex->th) { + /* rb_warn("free locked mutex"); */ + const char *err = mutex_unlock(mutex, mutex->th); + if (err) rb_bug("%s", err); + } + native_mutex_destroy(&mutex->lock); + native_cond_destroy(&mutex->cond); + } + ruby_xfree(ptr); +} + +static VALUE +mutex_alloc(VALUE klass) +{ + VALUE volatile obj; + mutex_t *mutex; + + obj = Data_Make_Struct(klass, mutex_t, NULL, mutex_free, mutex); + native_mutex_initialize(&mutex->lock); + native_cond_initialize(&mutex->cond); + return obj; +} + +/* + * call-seq: + * Mutex.new => mutex + * + * Creates a new Mutex + */ +static VALUE +mutex_initialize(VALUE self) +{ + return self; +} + +VALUE +rb_mutex_new(void) +{ + return mutex_alloc(rb_cMutex); +} + +/* + * call-seq: + * mutex.locked? => true or false + * + * Returns +true+ if this lock is currently held by some thread. + */ +VALUE +rb_mutex_locked_p(VALUE self) +{ + mutex_t *mutex; + GetMutexPtr(self, mutex); + return mutex->th ? Qtrue : Qfalse; +} + +static void +mutex_locked(rb_thread_t *th, VALUE self) +{ + mutex_t *mutex; + GetMutexPtr(self, mutex); + + if (th->keeping_mutexes) { + mutex->next_mutex = th->keeping_mutexes; + } + th->keeping_mutexes = mutex; +} + +/* + * call-seq: + * mutex.try_lock => true or false + * + * Attempts to obtain the lock and returns immediately. Returns +true+ if the + * lock was granted. + */ +VALUE +rb_mutex_trylock(VALUE self) +{ + mutex_t *mutex; + VALUE locked = Qfalse; + GetMutexPtr(self, mutex); + + native_mutex_lock(&mutex->lock); + if (mutex->th == 0) { + mutex->th = GET_THREAD(); + locked = Qtrue; + + mutex_locked(GET_THREAD(), self); + } + native_mutex_unlock(&mutex->lock); + + return locked; +} + +static int +lock_func(rb_thread_t *th, mutex_t *mutex, int last_thread) +{ + int interrupted = 0; +#if 0 /* for debug */ + native_thread_yield(); +#endif + + native_mutex_lock(&mutex->lock); + th->transition_for_lock = 0; + while (mutex->th || (mutex->th = th, 0)) { + if (last_thread) { + interrupted = 2; + break; + } + + mutex->cond_waiting++; + native_cond_wait(&mutex->cond, &mutex->lock); + mutex->cond_notified--; + + if (RUBY_VM_INTERRUPTED(th)) { + interrupted = 1; + break; + } + } + th->transition_for_lock = 1; + native_mutex_unlock(&mutex->lock); + + if (interrupted == 2) native_thread_yield(); +#if 0 /* for debug */ + native_thread_yield(); +#endif + + return interrupted; +} + +static void +lock_interrupt(void *ptr) +{ + mutex_t *mutex = (mutex_t *)ptr; + native_mutex_lock(&mutex->lock); + if (mutex->cond_waiting > 0) { + native_cond_broadcast(&mutex->cond); + mutex->cond_notified = mutex->cond_waiting; + mutex->cond_waiting = 0; + } + native_mutex_unlock(&mutex->lock); +} + +/* + * call-seq: + * mutex.lock => true or false + * + * Attempts to grab the lock and waits if it isn't available. + * Raises +ThreadError+ if +mutex+ was locked by the current thread. + */ +VALUE +rb_mutex_lock(VALUE self) +{ + + if (rb_mutex_trylock(self) == Qfalse) { + mutex_t *mutex; + rb_thread_t *th = GET_THREAD(); + GetMutexPtr(self, mutex); + + if (mutex->th == GET_THREAD()) { + rb_raise(rb_eThreadError, "deadlock; recursive locking"); + } + + while (mutex->th != th) { + int interrupted; + enum rb_thread_status prev_status = th->status; + int last_thread = 0; + struct rb_unblock_callback oldubf; + + set_unblock_function(th, lock_interrupt, mutex, &oldubf); + th->status = THREAD_STOPPED_FOREVER; + th->vm->sleeper++; + th->locking_mutex = self; + if (vm_living_thread_num(th->vm) == th->vm->sleeper) { + last_thread = 1; + } + + th->transition_for_lock = 1; + BLOCKING_REGION_CORE({ + interrupted = lock_func(th, mutex, last_thread); + }); + th->transition_for_lock = 0; + remove_signal_thread_list(th); + reset_unblock_function(th, &oldubf); + + th->locking_mutex = Qfalse; + if (mutex->th && interrupted == 2) { + rb_check_deadlock(th->vm); + } + if (th->status == THREAD_STOPPED_FOREVER) { + th->status = prev_status; + } + th->vm->sleeper--; + + if (mutex->th == th) mutex_locked(th, self); + + if (interrupted) { + RUBY_VM_CHECK_INTS(); + } + } + } + return self; +} + +static const char * +mutex_unlock(mutex_t *mutex, rb_thread_t volatile *th) +{ + const char *err = NULL; + mutex_t *th_mutex; + + native_mutex_lock(&mutex->lock); + + if (mutex->th == 0) { + err = "Attempt to unlock a mutex which is not locked"; + } + else if (mutex->th != th) { + err = "Attempt to unlock a mutex which is locked by another thread"; + } + else { + mutex->th = 0; + if (mutex->cond_waiting > 0) { + /* waiting thread */ + native_cond_signal(&mutex->cond); + mutex->cond_waiting--; + mutex->cond_notified++; + } + } + + native_mutex_unlock(&mutex->lock); + + if (!err) { + th_mutex = th->keeping_mutexes; + if (th_mutex == mutex) { + th->keeping_mutexes = mutex->next_mutex; + } + else { + while (1) { + mutex_t *tmp_mutex; + tmp_mutex = th_mutex->next_mutex; + if (tmp_mutex == mutex) { + th_mutex->next_mutex = tmp_mutex->next_mutex; + break; + } + th_mutex = tmp_mutex; + } + } + mutex->next_mutex = NULL; + } + + return err; +} + +/* + * call-seq: + * mutex.unlock => self + * + * Releases the lock. + * Raises +ThreadError+ if +mutex+ wasn't locked by the current thread. + */ +VALUE +rb_mutex_unlock(VALUE self) +{ + const char *err; + mutex_t *mutex; + GetMutexPtr(self, mutex); + + err = mutex_unlock(mutex, GET_THREAD()); + if (err) rb_raise(rb_eThreadError, "%s", err); + + return self; +} + +static void +rb_mutex_unlock_all(mutex_t *mutexes, rb_thread_t *th) +{ + const char *err; + mutex_t *mutex; + + while (mutexes) { + mutex = mutexes; + /* rb_warn("mutex #<%p> remains to be locked by terminated thread", + mutexes); */ + mutexes = mutex->next_mutex; + err = mutex_unlock(mutex, th); + if (err) rb_bug("invalid keeping_mutexes: %s", err); + } +} + +static VALUE +rb_mutex_sleep_forever(VALUE time) +{ + rb_thread_sleep_deadly(); + return Qnil; +} + +static VALUE +rb_mutex_wait_for(VALUE time) +{ + const struct timeval *t = (struct timeval *)time; + rb_thread_wait_for(*t); + return Qnil; +} + +VALUE +rb_mutex_sleep(VALUE self, VALUE timeout) +{ + time_t beg, end; + struct timeval t; + + if (!NIL_P(timeout)) { + t = rb_time_interval(timeout); + } + rb_mutex_unlock(self); + beg = time(0); + if (NIL_P(timeout)) { + rb_ensure(rb_mutex_sleep_forever, Qnil, rb_mutex_lock, self); + } + else { + rb_ensure(rb_mutex_wait_for, (VALUE)&t, rb_mutex_lock, self); + } + end = time(0) - beg; + return INT2FIX(end); +} + +/* + * call-seq: + * mutex.sleep(timeout = nil) => number + * + * Releases the lock and sleeps +timeout+ seconds if it is given and + * non-nil or forever. Raises +ThreadError+ if +mutex+ wasn't locked by + * the current thread. + */ +static VALUE +mutex_sleep(int argc, VALUE *argv, VALUE self) +{ + VALUE timeout; + + rb_scan_args(argc, argv, "01", &timeout); + return rb_mutex_sleep(self, timeout); +} + +/* + * call-seq: + * mutex.synchronize { ... } => result of the block + * + * Obtains a lock, runs the block, and releases the lock when the block + * completes. See the example under +Mutex+. + */ + +VALUE +rb_mutex_synchronize(VALUE mutex, VALUE (*func)(VALUE arg), VALUE arg) +{ + rb_mutex_lock(mutex); + return rb_ensure(func, arg, rb_mutex_unlock, mutex); +} + +/* + * Document-class: Barrier + */ +static VALUE +barrier_alloc(VALUE klass) +{ + return Data_Wrap_Struct(klass, rb_gc_mark, 0, (void *)mutex_alloc(0)); +} + +VALUE +rb_barrier_new(void) +{ + VALUE barrier = barrier_alloc(rb_cBarrier); + rb_mutex_lock((VALUE)DATA_PTR(barrier)); + return barrier; +} + +VALUE +rb_barrier_wait(VALUE self) +{ + VALUE mutex = (VALUE)DATA_PTR(self); + mutex_t *m; + + if (!mutex) return Qfalse; + GetMutexPtr(mutex, m); + if (m->th == GET_THREAD()) return Qfalse; + rb_mutex_lock(mutex); + if (DATA_PTR(self)) return Qtrue; + rb_mutex_unlock(mutex); + return Qfalse; +} + +VALUE +rb_barrier_release(VALUE self) +{ + return rb_mutex_unlock((VALUE)DATA_PTR(self)); +} + +VALUE +rb_barrier_destroy(VALUE self) +{ + VALUE mutex = (VALUE)DATA_PTR(self); + DATA_PTR(self) = 0; + return rb_mutex_unlock(mutex); +} + +/* variables for recursive traversals */ +static ID recursive_key; + +static VALUE +recursive_check(VALUE hash, VALUE obj, VALUE paired_obj) +{ + if (NIL_P(hash) || TYPE(hash) != T_HASH) { + return Qfalse; + } + else { + VALUE sym = ID2SYM(rb_frame_this_func()); + VALUE list = rb_hash_aref(hash, sym); + VALUE pair_list; + + if (NIL_P(list) || TYPE(list) != T_HASH) + return Qfalse; + pair_list = rb_hash_lookup2(list, obj, Qundef); + if (pair_list == Qundef) + return Qfalse; + if (paired_obj) { + if (TYPE(pair_list) != T_HASH) { + if (pair_list != paired_obj) + return Qfalse; + } + else { + if (NIL_P(rb_hash_lookup(pair_list, paired_obj))) + return Qfalse; + } + } + return Qtrue; + } +} + +static VALUE +recursive_push(VALUE hash, VALUE obj, VALUE paired_obj) +{ + VALUE list, sym, pair_list; + + sym = ID2SYM(rb_frame_this_func()); + if (NIL_P(hash) || TYPE(hash) != T_HASH) { + hash = rb_hash_new(); + OBJ_UNTRUST(hash); + rb_thread_local_aset(rb_thread_current(), recursive_key, hash); + list = Qnil; + } + else { + list = rb_hash_aref(hash, sym); + } + if (NIL_P(list) || TYPE(list) != T_HASH) { + list = rb_hash_new(); + OBJ_UNTRUST(list); + rb_hash_aset(hash, sym, list); + } + if (!paired_obj) { + rb_hash_aset(list, obj, Qtrue); + } + else if ((pair_list = rb_hash_lookup2(list, obj, Qundef)) == Qundef) { + rb_hash_aset(list, obj, paired_obj); + } + else { + if (TYPE(pair_list) != T_HASH){ + VALUE other_paired_obj = pair_list; + pair_list = rb_hash_new(); + OBJ_UNTRUST(pair_list); + rb_hash_aset(pair_list, other_paired_obj, Qtrue); + rb_hash_aset(list, obj, pair_list); + } + rb_hash_aset(pair_list, paired_obj, Qtrue); + } + return hash; +} + +static void +recursive_pop(VALUE hash, VALUE obj, VALUE paired_obj) +{ + VALUE list, sym, pair_list, symname, thrname; + + sym = ID2SYM(rb_frame_this_func()); + if (NIL_P(hash) || TYPE(hash) != T_HASH) { + symname = rb_inspect(sym); + thrname = rb_inspect(rb_thread_current()); + rb_raise(rb_eTypeError, "invalid inspect_tbl hash for %s in %s", + StringValuePtr(symname), StringValuePtr(thrname)); + } + list = rb_hash_aref(hash, sym); + if (NIL_P(list) || TYPE(list) != T_HASH) { + symname = rb_inspect(sym); + thrname = rb_inspect(rb_thread_current()); + rb_raise(rb_eTypeError, "invalid inspect_tbl list for %s in %s", + StringValuePtr(symname), StringValuePtr(thrname)); + } + if (paired_obj) { + pair_list = rb_hash_lookup2(list, obj, Qundef); + if (pair_list == Qundef) { + symname = rb_inspect(sym); + thrname = rb_inspect(rb_thread_current()); + rb_raise(rb_eTypeError, "invalid inspect_tbl pair_list for %s in %s", + StringValuePtr(symname), StringValuePtr(thrname)); + } + if (TYPE(pair_list) == T_HASH) { + rb_hash_delete(pair_list, paired_obj); + if (!RHASH_EMPTY_P(pair_list)) { + return; /* keep hash until is empty */ + } + } + } + rb_hash_delete(list, obj); +} + +static VALUE +exec_recursive(VALUE (*func) (VALUE, VALUE, int), VALUE obj, VALUE pairid, VALUE arg) +{ + VALUE hash = rb_thread_local_aref(rb_thread_current(), recursive_key); + VALUE objid = rb_obj_id(obj); + + if (recursive_check(hash, objid, pairid)) { + return (*func) (obj, arg, Qtrue); + } + else { + VALUE result = Qundef; + int state; + + hash = recursive_push(hash, objid, pairid); + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + result = (*func) (obj, arg, Qfalse); + } + POP_TAG(); + recursive_pop(hash, objid, pairid); + if (state) + JUMP_TAG(state); + return result; + } +} + +VALUE +rb_exec_recursive(VALUE (*func) (VALUE, VALUE, int), VALUE obj, VALUE arg) +{ + return exec_recursive(func, obj, 0, arg); +} + +VALUE +rb_exec_recursive_paired(VALUE (*func) (VALUE, VALUE, int), VALUE obj, VALUE paired_obj, VALUE arg) +{ + return exec_recursive(func, obj, rb_obj_id(paired_obj), arg); +} + +/* tracer */ + +static rb_event_hook_t * +alloc_event_hook(rb_event_hook_func_t func, rb_event_flag_t events, VALUE data) +{ + rb_event_hook_t *hook = ALLOC(rb_event_hook_t); + hook->func = func; + hook->flag = events; + hook->data = data; + return hook; +} + +static void +thread_reset_event_flags(rb_thread_t *th) +{ + rb_event_hook_t *hook = th->event_hooks; + rb_event_flag_t flag = th->event_flags & RUBY_EVENT_VM; + + while (hook) { + flag |= hook->flag; + hook = hook->next; + } +} + +void +rb_thread_add_event_hook(rb_thread_t *th, + rb_event_hook_func_t func, rb_event_flag_t events, VALUE data) +{ + rb_event_hook_t *hook = alloc_event_hook(func, events, data); + hook->next = th->event_hooks; + th->event_hooks = hook; + thread_reset_event_flags(th); +} + +static int +set_threads_event_flags_i(st_data_t key, st_data_t val, st_data_t flag) +{ + VALUE thval = key; + rb_thread_t *th; + GetThreadPtr(thval, th); + + if (flag) { + th->event_flags |= RUBY_EVENT_VM; + } + else { + th->event_flags &= (~RUBY_EVENT_VM); + } + return ST_CONTINUE; +} + +static void +set_threads_event_flags(int flag) +{ + st_foreach(GET_VM()->living_threads, set_threads_event_flags_i, (st_data_t) flag); +} + +static inline void +exec_event_hooks(const rb_event_hook_t *hook, rb_event_flag_t flag, VALUE self, ID id, VALUE klass) +{ + for (; hook; hook = hook->next) { + if (flag & hook->flag) { + (*hook->func)(flag, hook->data, self, id, klass); + } + } +} + +void +rb_threadptr_exec_event_hooks(rb_thread_t *th, rb_event_flag_t flag, VALUE self, ID id, VALUE klass) +{ + const VALUE errinfo = th->errinfo; + const rb_event_flag_t wait_event = th->event_flags; + + if (self == rb_mRubyVMFrozenCore) return; + if (wait_event & flag) { + exec_event_hooks(th->event_hooks, flag, self, id, klass); + } + if (wait_event & RUBY_EVENT_VM) { + exec_event_hooks(th->vm->event_hooks, flag, self, id, klass); + } + th->errinfo = errinfo; +} + +void +rb_add_event_hook(rb_event_hook_func_t func, rb_event_flag_t events, VALUE data) +{ + rb_event_hook_t *hook = alloc_event_hook(func, events, data); + rb_vm_t *vm = GET_VM(); + + hook->next = vm->event_hooks; + vm->event_hooks = hook; + + set_threads_event_flags(1); +} + +static int +remove_event_hook(rb_event_hook_t **root, rb_event_hook_func_t func) +{ + rb_event_hook_t *prev = NULL, *hook = *root, *next; + + while (hook) { + next = hook->next; + if (func == 0 || hook->func == func) { + if (prev) { + prev->next = hook->next; + } + else { + *root = hook->next; + } + xfree(hook); + } + else { + prev = hook; + } + hook = next; + } + return -1; +} + +int +rb_thread_remove_event_hook(rb_thread_t *th, rb_event_hook_func_t func) +{ + int ret = remove_event_hook(&th->event_hooks, func); + thread_reset_event_flags(th); + return ret; +} + +int +rb_remove_event_hook(rb_event_hook_func_t func) +{ + rb_vm_t *vm = GET_VM(); + rb_event_hook_t *hook = vm->event_hooks; + int ret = remove_event_hook(&vm->event_hooks, func); + + if (hook != NULL && vm->event_hooks == NULL) { + set_threads_event_flags(0); + } + + return ret; +} + +static int +clear_trace_func_i(st_data_t key, st_data_t val, st_data_t flag) +{ + rb_thread_t *th; + GetThreadPtr((VALUE)key, th); + rb_thread_remove_event_hook(th, 0); + return ST_CONTINUE; +} + +void +rb_clear_trace_func(void) +{ + st_foreach(GET_VM()->living_threads, clear_trace_func_i, (st_data_t) 0); + rb_remove_event_hook(0); +} + +static void call_trace_func(rb_event_flag_t, VALUE data, VALUE self, ID id, VALUE klass); + +/* + * call-seq: + * set_trace_func(proc) => proc + * set_trace_func(nil) => nil + * + * Establishes _proc_ as the handler for tracing, or disables + * tracing if the parameter is +nil+. _proc_ takes up + * to six parameters: an event name, a filename, a line number, an + * object id, a binding, and the name of a class. _proc_ is + * invoked whenever an event occurs. Events are: c-call + * (call a C-language routine), c-return (return from a + * C-language routine), call (call a Ruby method), + * class (start a class or module definition), + * end (finish a class or module definition), + * line (execute code on a new line), raise + * (raise an exception), and return (return from a Ruby + * method). Tracing is disabled within the context of _proc_. + * + * class Test + * def test + * a = 1 + * b = 2 + * end + * end + * + * set_trace_func proc { |event, file, line, id, binding, classname| + * printf "%8s %s:%-2d %10s %8s\n", event, file, line, id, classname + * } + * t = Test.new + * t.test + * + * line prog.rb:11 false + * c-call prog.rb:11 new Class + * c-call prog.rb:11 initialize Object + * c-return prog.rb:11 initialize Object + * c-return prog.rb:11 new Class + * line prog.rb:12 false + * call prog.rb:2 test Test + * line prog.rb:3 test Test + * line prog.rb:4 test Test + * return prog.rb:4 test Test + */ + +static VALUE +set_trace_func(VALUE obj, VALUE trace) +{ + rb_remove_event_hook(call_trace_func); + + if (NIL_P(trace)) { + return Qnil; + } + + if (!rb_obj_is_proc(trace)) { + rb_raise(rb_eTypeError, "trace_func needs to be Proc"); + } + + rb_add_event_hook(call_trace_func, RUBY_EVENT_ALL, trace); + return trace; +} + +static const char * +get_event_name(rb_event_flag_t event) +{ + switch (event) { + case RUBY_EVENT_LINE: + return "line"; + case RUBY_EVENT_CLASS: + return "class"; + case RUBY_EVENT_END: + return "end"; + case RUBY_EVENT_CALL: + return "call"; + case RUBY_EVENT_RETURN: + return "return"; + case RUBY_EVENT_C_CALL: + return "c-call"; + case RUBY_EVENT_C_RETURN: + return "c-return"; + case RUBY_EVENT_RAISE: + return "raise"; + default: + return "unknown"; + } +} + +VALUE ruby_suppress_tracing(VALUE (*func)(VALUE, int), VALUE arg, int always); + +struct call_trace_func_args { + rb_event_flag_t event; + VALUE proc; + VALUE self; + ID id; + VALUE klass; +}; + +static VALUE +call_trace_proc(VALUE args, int tracing) +{ + struct call_trace_func_args *p = (struct call_trace_func_args *)args; + const char *srcfile = rb_sourcefile(); + VALUE eventname = rb_str_new2(get_event_name(p->event)); + VALUE filename = srcfile ? rb_str_new2(srcfile) : Qnil; + VALUE argv[6]; + int line = rb_sourceline(); + ID id = 0; + VALUE klass = 0; + + if (p->event == RUBY_EVENT_C_CALL || + p->event == RUBY_EVENT_C_RETURN) { + id = p->id; + klass = p->klass; + } + else { + rb_thread_method_id_and_class(GET_THREAD(), &id, &klass); + } + if (id == ID_ALLOCATOR) + return Qnil; + if (klass) { + if (TYPE(klass) == T_ICLASS) { + klass = RBASIC(klass)->klass; + } + else if (FL_TEST(klass, FL_SINGLETON)) { + klass = rb_iv_get(klass, "__attached__"); + } + } + + argv[0] = eventname; + argv[1] = filename; + argv[2] = INT2FIX(line); + argv[3] = id ? ID2SYM(id) : Qnil; + argv[4] = (p->self && srcfile) ? rb_binding_new() : Qnil; + argv[5] = klass ? klass : Qnil; + + return rb_proc_call_with_block(p->proc, 6, argv, Qnil); +} + +static void +call_trace_func(rb_event_flag_t event, VALUE proc, VALUE self, ID id, VALUE klass) +{ + struct call_trace_func_args args; + + args.event = event; + args.proc = proc; + args.self = self; + args.id = id; + args.klass = klass; + ruby_suppress_tracing(call_trace_proc, (VALUE)&args, Qfalse); +} + +VALUE +ruby_suppress_tracing(VALUE (*func)(VALUE, int), VALUE arg, int always) +{ + rb_thread_t *th = GET_THREAD(); + int state, tracing; + volatile int raised; + VALUE result = Qnil; + + if ((tracing = th->tracing) != 0 && !always) { + return Qnil; + } + else { + th->tracing = 1; + } + + raised = rb_thread_reset_raised(th); + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + result = (*func)(arg, tracing); + } + + if (raised) { + rb_thread_set_raised(th); + } + POP_TAG(); + + th->tracing = tracing; + if (state) { + JUMP_TAG(state); + } + + return result; +} + +/* + * +Thread+ encapsulates the behavior of a thread of + * execution, including the main thread of the Ruby script. + * + * In the descriptions of the methods in this class, the parameter _sym_ + * refers to a symbol, which is either a quoted string or a + * +Symbol+ (such as :name). + */ + +void +Init_Thread(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + VALUE cThGroup; + + rb_define_singleton_method(rb_cThread, "new", thread_s_new, -1); + rb_define_singleton_method(rb_cThread, "start", thread_start, -2); + rb_define_singleton_method(rb_cThread, "fork", thread_start, -2); + rb_define_singleton_method(rb_cThread, "main", rb_thread_s_main, 0); + rb_define_singleton_method(rb_cThread, "current", thread_s_current, 0); + rb_define_singleton_method(rb_cThread, "stop", rb_thread_stop, 0); + rb_define_singleton_method(rb_cThread, "kill", rb_thread_s_kill, 1); + rb_define_singleton_method(rb_cThread, "exit", rb_thread_exit, 0); + rb_define_singleton_method(rb_cThread, "pass", thread_s_pass, 0); + rb_define_singleton_method(rb_cThread, "list", rb_thread_list, 0); + rb_define_singleton_method(rb_cThread, "abort_on_exception", rb_thread_s_abort_exc, 0); + rb_define_singleton_method(rb_cThread, "abort_on_exception=", rb_thread_s_abort_exc_set, 1); +#if THREAD_DEBUG < 0 + rb_define_singleton_method(rb_cThread, "DEBUG", rb_thread_s_debug, 0); + rb_define_singleton_method(rb_cThread, "DEBUG=", rb_thread_s_debug_set, 1); +#endif + + rb_define_method(rb_cThread, "initialize", thread_initialize, -2); + rb_define_method(rb_cThread, "raise", thread_raise_m, -1); + rb_define_method(rb_cThread, "join", thread_join_m, -1); + rb_define_method(rb_cThread, "value", thread_value, 0); + rb_define_method(rb_cThread, "kill", rb_thread_kill, 0); + rb_define_method(rb_cThread, "terminate", rb_thread_kill, 0); + rb_define_method(rb_cThread, "exit", rb_thread_kill, 0); + rb_define_method(rb_cThread, "run", rb_thread_run, 0); + rb_define_method(rb_cThread, "wakeup", rb_thread_wakeup, 0); + rb_define_method(rb_cThread, "[]", rb_thread_aref, 1); + rb_define_method(rb_cThread, "[]=", rb_thread_aset, 2); + rb_define_method(rb_cThread, "key?", rb_thread_key_p, 1); + rb_define_method(rb_cThread, "keys", rb_thread_keys, 0); + rb_define_method(rb_cThread, "priority", rb_thread_priority, 0); + rb_define_method(rb_cThread, "priority=", rb_thread_priority_set, 1); + rb_define_method(rb_cThread, "status", rb_thread_status, 0); + rb_define_method(rb_cThread, "alive?", rb_thread_alive_p, 0); + rb_define_method(rb_cThread, "stop?", rb_thread_stop_p, 0); + rb_define_method(rb_cThread, "abort_on_exception", rb_thread_abort_exc, 0); + rb_define_method(rb_cThread, "abort_on_exception=", rb_thread_abort_exc_set, 1); + rb_define_method(rb_cThread, "safe_level", rb_thread_safe_level, 0); + rb_define_method(rb_cThread, "group", rb_thread_group, 0); + + rb_define_method(rb_cThread, "inspect", rb_thread_inspect, 0); + + cThGroup = rb_define_class("ThreadGroup", rb_cObject); + rb_define_alloc_func(cThGroup, thgroup_s_alloc); + rb_define_method(cThGroup, "list", thgroup_list, 0); + rb_define_method(cThGroup, "enclose", thgroup_enclose, 0); + rb_define_method(cThGroup, "enclosed?", thgroup_enclosed_p, 0); + rb_define_method(cThGroup, "add", thgroup_add, 1); + + { + rb_thread_t *th = GET_THREAD(); + th->thgroup = th->vm->thgroup_default = rb_obj_alloc(cThGroup); + rb_define_const(cThGroup, "Default", th->thgroup); + } + + rb_cMutex = rb_define_class("Mutex", rb_cObject); + rb_define_alloc_func(rb_cMutex, mutex_alloc); + rb_define_method(rb_cMutex, "initialize", mutex_initialize, 0); + rb_define_method(rb_cMutex, "locked?", rb_mutex_locked_p, 0); + rb_define_method(rb_cMutex, "try_lock", rb_mutex_trylock, 0); + rb_define_method(rb_cMutex, "lock", rb_mutex_lock, 0); + rb_define_method(rb_cMutex, "unlock", rb_mutex_unlock, 0); + rb_define_method(rb_cMutex, "sleep", mutex_sleep, -1); + + recursive_key = rb_intern("__recursive_key__"); + rb_eThreadError = rb_define_class("ThreadError", rb_eStandardError); + + /* trace */ + rb_define_global_function("set_trace_func", set_trace_func, 1); + + /* init thread core */ + Init_native_thread(); + { + /* main thread setting */ + { + /* acquire global vm lock */ + rb_thread_lock_t *lp = &GET_THREAD()->vm->global_vm_lock; + native_mutex_initialize(lp); + native_mutex_lock(lp); + native_mutex_initialize(&GET_THREAD()->interrupt_lock); + } + } + + rb_thread_create_timer_thread(); + + (void)native_mutex_trylock; + (void)ruby_thread_set_native; +} + +int +ruby_native_thread_p(void) +{ + rb_thread_t *th = ruby_thread_from_native(); + + return th ? Qtrue : Qfalse; +} + +static int +check_deadlock_i(st_data_t key, st_data_t val, int *found) +{ + VALUE thval = key; + rb_thread_t *th; + GetThreadPtr(thval, th); + + if (th->status != THREAD_STOPPED_FOREVER || RUBY_VM_INTERRUPTED(th) || th->transition_for_lock) { + *found = 1; + } + else if (th->locking_mutex) { + mutex_t *mutex; + GetMutexPtr(th->locking_mutex, mutex); + + native_mutex_lock(&mutex->lock); + if (mutex->th == th || (!mutex->th && mutex->cond_notified)) { + *found = 1; + } + native_mutex_unlock(&mutex->lock); + } + + return (*found) ? ST_STOP : ST_CONTINUE; +} + +#if 0 /* for debug */ +static int +debug_i(st_data_t key, st_data_t val, int *found) +{ + VALUE thval = key; + rb_thread_t *th; + GetThreadPtr(thval, th); + + printf("th:%p %d %d %d", th, th->status, th->interrupt_flag, th->transition_for_lock); + if (th->locking_mutex) { + mutex_t *mutex; + GetMutexPtr(th->locking_mutex, mutex); + + native_mutex_lock(&mutex->lock); + printf(" %p %d\n", mutex->th, mutex->cond_notified); + native_mutex_unlock(&mutex->lock); + } + else puts(""); + + return ST_CONTINUE; +} +#endif + +static void +rb_check_deadlock(rb_vm_t *vm) +{ + int found = 0; + + if (vm_living_thread_num(vm) > vm->sleeper) return; + if (vm_living_thread_num(vm) < vm->sleeper) rb_bug("sleeper must not be more than vm_living_thread_num(vm)"); + + st_foreach(vm->living_threads, check_deadlock_i, (st_data_t)&found); + + if (!found) { + VALUE argv[2]; + argv[0] = rb_eFatal; + argv[1] = rb_str_new2("deadlock detected"); +#if 0 /* for debug */ + printf("%d %d %p %p\n", vm->living_threads->num_entries, vm->sleeper, GET_THREAD(), vm->main_thread); + st_foreach(vm->living_threads, debug_i, (st_data_t)0); +#endif + vm->sleeper--; + rb_thread_raise(2, argv, vm->main_thread); + } +} + +static void +update_coverage(rb_event_flag_t event, VALUE proc, VALUE self, ID id, VALUE klass) +{ + VALUE coverage = GET_THREAD()->cfp->iseq->coverage; + if (coverage && RBASIC(coverage)->klass == 0) { + long line = rb_sourceline() - 1; + long count; + if (RARRAY_PTR(coverage)[line] == Qnil) { + rb_bug("bug"); + } + count = FIX2LONG(RARRAY_PTR(coverage)[line]) + 1; + if (POSFIXABLE(count)) { + RARRAY_PTR(coverage)[line] = LONG2FIX(count); + } + } +} + +VALUE +rb_get_coverages(void) +{ + return GET_VM()->coverages; +} + +void +rb_set_coverages(VALUE coverages) +{ + GET_VM()->coverages = coverages; + rb_add_event_hook(update_coverage, RUBY_EVENT_COVERAGE, Qnil); +} + +void +rb_reset_coverages(void) +{ + GET_VM()->coverages = Qfalse; + rb_remove_event_hook(update_coverage); +} diff --git a/thread_pthread.c b/thread_pthread.c new file mode 100644 index 0000000..c187cfd --- /dev/null +++ b/thread_pthread.c @@ -0,0 +1,852 @@ +/* -*-c-*- */ +/********************************************************************** + + thread_pthread.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifdef THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION + +#include "gc.h" + +#ifdef HAVE_SYS_RESOURCE_H +#include +#endif + +static void native_mutex_lock(pthread_mutex_t *lock); +static void native_mutex_unlock(pthread_mutex_t *lock); +static int native_mutex_trylock(pthread_mutex_t *lock); +static void native_mutex_initialize(pthread_mutex_t *lock); +static void native_mutex_destroy(pthread_mutex_t *lock); + +static void native_cond_signal(pthread_cond_t *cond); +static void native_cond_broadcast(pthread_cond_t *cond); +static void native_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex); +static void native_cond_initialize(pthread_cond_t *cond); +static void native_cond_destroy(pthread_cond_t *cond); + +static void +native_mutex_lock(pthread_mutex_t *lock) +{ + int r; + if ((r = pthread_mutex_lock(lock)) != 0) { + rb_bug("pthread_mutex_lock: %d", r); + } +} + +static void +native_mutex_unlock(pthread_mutex_t *lock) +{ + int r; + if ((r = pthread_mutex_unlock(lock)) != 0) { + rb_bug("native_mutex_unlock return non-zero: %d", r); + } +} + +static inline int +native_mutex_trylock(pthread_mutex_t *lock) +{ + int r; + if ((r = pthread_mutex_trylock(lock)) != 0) { + if (r == EBUSY) { + return EBUSY; + } + else { + rb_bug("native_mutex_trylock return non-zero: %d", r); + } + } + return 0; +} + +static void +native_mutex_initialize(pthread_mutex_t *lock) +{ + int r = pthread_mutex_init(lock, 0); + if (r != 0) { + rb_bug("native_mutex_initialize return non-zero: %d", r); + } +} + +static void +native_mutex_destroy(pthread_mutex_t *lock) +{ + int r = pthread_mutex_destroy(lock); + if (r != 0) { + rb_bug("native_mutex_destroy return non-zero: %d", r); + } +} + +static void +native_cond_initialize(pthread_cond_t *cond) +{ + int r = pthread_cond_init(cond, 0); + if (r != 0) { + rb_bug("native_cond_initialize return non-zero: %d", r); + } +} + +static void +native_cond_destroy(pthread_cond_t *cond) +{ + int r = pthread_cond_destroy(cond); + if (r != 0) { + rb_bug("native_cond_destroy return non-zero: %d", r); + } +} + +static void +native_cond_signal(pthread_cond_t *cond) +{ + pthread_cond_signal(cond); +} + +static void +native_cond_broadcast(pthread_cond_t *cond) +{ + pthread_cond_broadcast(cond); +} + +static void +native_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + pthread_cond_wait(cond, mutex); +} + +static int +native_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, struct timespec *ts) +{ + return pthread_cond_timedwait(cond, mutex, ts); +} + + +#define native_cleanup_push pthread_cleanup_push +#define native_cleanup_pop pthread_cleanup_pop +#ifdef HAVE_SCHED_YIELD +#define native_thread_yield() (void)sched_yield() +#else +#define native_thread_yield() ((void)0) +#endif + +#ifndef __CYGWIN__ +static void add_signal_thread_list(rb_thread_t *th); +#endif +static void remove_signal_thread_list(rb_thread_t *th); + +static rb_thread_lock_t signal_thread_list_lock; + +static pthread_key_t ruby_native_thread_key; + +static void +null_func(int i) +{ + /* null */ +} + +static rb_thread_t * +ruby_thread_from_native(void) +{ + return pthread_getspecific(ruby_native_thread_key); +} + +static int +ruby_thread_set_native(rb_thread_t *th) +{ + return pthread_setspecific(ruby_native_thread_key, th) == 0; +} + +static void +Init_native_thread(void) +{ + rb_thread_t *th = GET_THREAD(); + + pthread_key_create(&ruby_native_thread_key, NULL); + th->thread_id = pthread_self(); + native_cond_initialize(&th->native_thread_data.sleep_cond); + ruby_thread_set_native(th); + native_mutex_initialize(&signal_thread_list_lock); + posix_signal(SIGVTALRM, null_func); +} + +static void +native_thread_destroy(rb_thread_t *th) +{ + pthread_mutex_destroy(&th->interrupt_lock); + pthread_cond_destroy(&th->native_thread_data.sleep_cond); +} + +#define USE_THREAD_CACHE 0 + +#if STACK_GROW_DIRECTION +#define STACK_GROW_DIR_DETECTION +#define STACK_DIR_UPPER(a,b) STACK_UPPER(0, a, b) +#else +#define STACK_GROW_DIR_DETECTION VALUE stack_grow_dir_detection +#define STACK_DIR_UPPER(a,b) STACK_UPPER(&stack_grow_dir_detection, a, b) +#endif + +#if defined HAVE_PTHREAD_GETATTR_NP || defined HAVE_PTHREAD_ATTR_GET_NP +#define STACKADDR_AVAILABLE 1 +#elif defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP +#define STACKADDR_AVAILABLE 1 +#elif defined HAVE_THR_STKSEGMENT || defined HAVE_PTHREAD_STACKSEG_NP +#define STACKADDR_AVAILABLE 1 +#endif + +#ifdef STACKADDR_AVAILABLE +static int +get_stack(void **addr, size_t *size) +{ +#define CHECK_ERR(expr) \ + {int err = (expr); if (err) return err;} +#if defined HAVE_PTHREAD_GETATTR_NP || defined HAVE_PTHREAD_ATTR_GET_NP + pthread_attr_t attr; + size_t guard = 0; + +# ifdef HAVE_PTHREAD_GETATTR_NP + CHECK_ERR(pthread_getattr_np(pthread_self(), &attr)); +# ifdef HAVE_PTHREAD_ATTR_GETSTACK + CHECK_ERR(pthread_attr_getstack(&attr, addr, size)); +# else + CHECK_ERR(pthread_attr_getstackaddr(&attr, addr)); + CHECK_ERR(pthread_attr_getstacksize(&attr, size)); +# endif + if (pthread_attr_getguardsize(&attr, &guard) == 0) { + STACK_GROW_DIR_DETECTION; + STACK_DIR_UPPER((void)0, *addr = (char *)*addr + guard); + *size -= guard; + } +# else + CHECK_ERR(pthread_attr_init(&attr)); + CHECK_ERR(pthread_attr_get_np(pthread_self(), &attr)); + CHECK_ERR(pthread_attr_getstackaddr(&attr, addr)); + CHECK_ERR(pthread_attr_getstacksize(&attr, size)); +# endif + CHECK_ERR(pthread_attr_getguardsize(&attr, &guard)); +# ifndef HAVE_PTHREAD_GETATTR_NP + pthread_attr_destroy(&attr); +# endif + size -= guard; +#elif defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP + pthread_t th = pthread_self(); + *addr = pthread_get_stackaddr_np(th); + *size = pthread_get_stacksize_np(th); +#elif defined HAVE_THR_STKSEGMENT || defined HAVE_PTHREAD_STACKSEG_NP + stack_t stk; +# if defined HAVE_THR_STKSEGMENT + CHECK_ERR(thr_stksegment(&stk)); +# else + CHECK_ERR(pthread_stackseg_np(pthread_self(), &stk)); +# endif + *addr = stk.ss_sp; + *size = stk.ss_size; +#endif + return 0; +#undef CHECK_ERR +} +#endif + +static struct { + rb_thread_id_t id; + size_t stack_maxsize; + VALUE *stack_start; +#ifdef __ia64 + VALUE *register_stack_start; +#endif +} native_main_thread; + +#ifdef STACK_END_ADDRESS +extern void *STACK_END_ADDRESS; +#endif + +#undef ruby_init_stack +void +ruby_init_stack(VALUE *addr +#ifdef __ia64 + , void *bsp +#endif + ) +{ + native_main_thread.id = pthread_self(); +#ifdef STACK_END_ADDRESS + native_main_thread.stack_start = STACK_END_ADDRESS; +#else + if (!native_main_thread.stack_start || + STACK_UPPER((VALUE *)(void *)&addr, + native_main_thread.stack_start > addr, + native_main_thread.stack_start < addr)) { + native_main_thread.stack_start = addr; + } +#endif +#ifdef __ia64 + if (!native_main_thread.register_stack_start || + (VALUE*)bsp < native_main_thread.register_stack_start) { + native_main_thread.register_stack_start = (VALUE*)bsp; + } +#endif +#ifdef HAVE_GETRLIMIT + { + struct rlimit rlim; + + if (getrlimit(RLIMIT_STACK, &rlim) == 0) { + size_t space = (size_t)(rlim.rlim_cur/5); + + if (space > 1024*1024) space = 1024*1024; + native_main_thread.stack_maxsize = (size_t)rlim.rlim_cur - space; + } + } +#endif +} + +#define CHECK_ERR(expr) \ + {int err = (expr); if (err) {rb_bug("err: %d - %s", err, #expr);}} + +static int +native_thread_init_stack(rb_thread_t *th) +{ + rb_thread_id_t curr = pthread_self(); + + if (pthread_equal(curr, native_main_thread.id)) { + th->machine_stack_start = native_main_thread.stack_start; + th->machine_stack_maxsize = native_main_thread.stack_maxsize; + } + else { +#ifdef HAVE_PTHREAD_GETATTR_NP + pthread_attr_t attr; + void *start; + CHECK_ERR(pthread_getattr_np(curr, &attr)); +# if defined HAVE_PTHREAD_ATTR_GETSTACK + CHECK_ERR(pthread_attr_getstack(&attr, &start, &th->machine_stack_maxsize)); +# elif defined HAVE_PTHREAD_ATTR_GETSTACKSIZE && defined HAVE_PTHREAD_ATTR_GETSTACKADDR + CHECK_ERR(pthread_attr_getstackaddr(&attr, &start)); + CHECK_ERR(pthread_attr_getstacksize(&attr, &th->machine_stack_maxsize)); +# endif + th->machine_stack_start = start; +#else + rb_raise(rb_eNotImpError, "ruby engine can initialize only in the main thread"); +#endif + } +#ifdef __ia64 + th->machine_register_stack_start = native_main_thread.register_stack_start; + th->machine_stack_maxsize /= 2; + th->machine_register_stack_maxsize = th->machine_stack_maxsize; +#endif + return 0; +} + +static void * +thread_start_func_1(void *th_ptr) +{ +#if USE_THREAD_CACHE + thread_start: +#endif + { + rb_thread_t *th = th_ptr; + VALUE stack_start; + + /* run */ + thread_start_func_2(th, &stack_start, rb_ia64_bsp()); + } +#if USE_THREAD_CACHE + if (1) { + /* cache thread */ + rb_thread_t *th; + static rb_thread_t *register_cached_thread_and_wait(void); + if ((th = register_cached_thread_and_wait()) != 0) { + th_ptr = (void *)th; + th->thread_id = pthread_self(); + goto thread_start; + } + } +#endif + return 0; +} + +void rb_thread_create_control_thread(void); + +struct cached_thread_entry { + volatile rb_thread_t **th_area; + pthread_cond_t *cond; + struct cached_thread_entry *next; +}; + + +#if USE_THREAD_CACHE +static pthread_mutex_t thread_cache_lock = PTHREAD_MUTEX_INITIALIZER; +struct cached_thread_entry *cached_thread_root; + +static rb_thread_t * +register_cached_thread_and_wait(void) +{ + pthread_cond_t cond = PTHREAD_COND_INITIALIZER; + volatile rb_thread_t *th_area = 0; + struct cached_thread_entry *entry = + (struct cached_thread_entry *)malloc(sizeof(struct cached_thread_entry)); + + struct timeval tv; + struct timespec ts; + gettimeofday(&tv, 0); + ts.tv_sec = tv.tv_sec + 60; + ts.tv_nsec = tv.tv_usec * 1000; + + pthread_mutex_lock(&thread_cache_lock); + { + entry->th_area = &th_area; + entry->cond = &cond; + entry->next = cached_thread_root; + cached_thread_root = entry; + + pthread_cond_timedwait(&cond, &thread_cache_lock, &ts); + + { + struct cached_thread_entry *e = cached_thread_root; + struct cached_thread_entry *prev = cached_thread_root; + + while (e) { + if (e == entry) { + if (prev == cached_thread_root) { + cached_thread_root = e->next; + } + else { + prev->next = e->next; + } + break; + } + prev = e; + e = e->next; + } + } + + free(entry); /* ok */ + pthread_cond_destroy(&cond); + } + pthread_mutex_unlock(&thread_cache_lock); + + return (rb_thread_t *)th_area; +} +#endif + +static int +use_cached_thread(rb_thread_t *th) +{ + int result = 0; +#if USE_THREAD_CACHE + struct cached_thread_entry *entry; + + if (cached_thread_root) { + pthread_mutex_lock(&thread_cache_lock); + entry = cached_thread_root; + { + if (cached_thread_root) { + cached_thread_root = entry->next; + *entry->th_area = th; + result = 1; + } + } + if (result) { + pthread_cond_signal(entry->cond); + } + pthread_mutex_unlock(&thread_cache_lock); + } +#endif + return result; +} + +static int +native_thread_create(rb_thread_t *th) +{ + int err = 0; + + if (use_cached_thread(th)) { + thread_debug("create (use cached thread): %p\n", (void *)th); + } + else { + pthread_attr_t attr; + size_t stack_size = 512 * 1024; /* 512KB */ + size_t space; + +#ifdef PTHREAD_STACK_MIN + if (stack_size < PTHREAD_STACK_MIN) { + stack_size = PTHREAD_STACK_MIN * 2; + } +#endif + space = stack_size/5; + if (space > 1024*1024) space = 1024*1024; + th->machine_stack_maxsize = stack_size - space; +#ifdef __ia64 + th->machine_stack_maxsize /= 2; + th->machine_register_stack_maxsize = th->machine_stack_maxsize; +#endif + + CHECK_ERR(pthread_attr_init(&attr)); + +#ifdef PTHREAD_STACK_MIN + thread_debug("create - stack size: %lu\n", (unsigned long)stack_size); + CHECK_ERR(pthread_attr_setstacksize(&attr, stack_size)); +#endif + +#ifdef HAVE_PTHREAD_ATTR_SETINHERITSCHED + CHECK_ERR(pthread_attr_setinheritsched(&attr, PTHREAD_INHERIT_SCHED)); +#endif + CHECK_ERR(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); + + err = pthread_create(&th->thread_id, &attr, thread_start_func_1, th); + thread_debug("create: %p (%d)", (void *)th, err); + CHECK_ERR(pthread_attr_destroy(&attr)); + + if (!err) { + pthread_cond_init(&th->native_thread_data.sleep_cond, 0); + } + else { + st_delete_wrap(th->vm->living_threads, th->self); + th->status = THREAD_KILLED; + rb_raise(rb_eThreadError, "can't create Thread (%d)", err); + } + } + return err; +} + +static void +native_thread_join(pthread_t th) +{ + int err = pthread_join(th, 0); + if (err) { + rb_raise(rb_eThreadError, "native_thread_join() failed (%d)", err); + } +} + + +#if USE_NATIVE_THREAD_PRIORITY + +static void +native_thread_apply_priority(rb_thread_t *th) +{ +#if defined(_POSIX_PRIORITY_SCHEDULING) && (_POSIX_PRIORITY_SCHEDULING > 0) + struct sched_param sp; + int policy; + int priority = 0 - th->priority; + int max, min; + pthread_getschedparam(th->thread_id, &policy, &sp); + max = sched_get_priority_max(policy); + min = sched_get_priority_min(policy); + + if (min > priority) { + priority = min; + } + else if (max < priority) { + priority = max; + } + + sp.sched_priority = priority; + pthread_setschedparam(th->thread_id, policy, &sp); +#else + /* not touched */ +#endif +} + +#endif /* USE_NATIVE_THREAD_PRIORITY */ + +static void +ubf_pthread_cond_signal(void *ptr) +{ + rb_thread_t *th = (rb_thread_t *)ptr; + thread_debug("ubf_pthread_cond_signal (%p)\n", (void *)th); + pthread_cond_signal(&th->native_thread_data.sleep_cond); +} + +#ifndef __CYGWIN__ +static void +ubf_select_each(rb_thread_t *th) +{ + thread_debug("ubf_select_each (%p)\n", (void *)th->thread_id); + if (th) { + pthread_kill(th->thread_id, SIGVTALRM); + } +} + +static void +ubf_select(void *ptr) +{ + rb_thread_t *th = (rb_thread_t *)ptr; + add_signal_thread_list(th); + ubf_select_each(th); +} +#else +#define ubf_select 0 +#endif + +#define PER_NANO 1000000000 + +static void +native_sleep(rb_thread_t *th, struct timeval *tv) +{ + struct timespec ts; + struct timeval tvn; + + if (tv) { + gettimeofday(&tvn, NULL); + ts.tv_sec = tvn.tv_sec + tv->tv_sec; + ts.tv_nsec = (tvn.tv_usec + tv->tv_usec) * 1000; + if (ts.tv_nsec >= PER_NANO){ + ts.tv_sec += 1; + ts.tv_nsec -= PER_NANO; + } + } + + thread_debug("native_sleep %ld\n", tv ? tv->tv_sec : -1); + GVL_UNLOCK_BEGIN(); + { + pthread_mutex_lock(&th->interrupt_lock); + th->unblock.func = ubf_pthread_cond_signal; + th->unblock.arg = th; + + if (RUBY_VM_INTERRUPTED(th)) { + /* interrupted. return immediate */ + thread_debug("native_sleep: interrupted before sleep\n"); + } + else { + if (tv == 0 || ts.tv_sec < tvn.tv_sec /* overflow */ ) { + int r; + thread_debug("native_sleep: pthread_cond_wait start\n"); + r = pthread_cond_wait(&th->native_thread_data.sleep_cond, + &th->interrupt_lock); + if (r) rb_bug("pthread_cond_wait: %d", r); + thread_debug("native_sleep: pthread_cond_wait end\n"); + } + else { + int r; + thread_debug("native_sleep: pthread_cond_timedwait start (%ld, %ld)\n", + (unsigned long)ts.tv_sec, ts.tv_nsec); + r = pthread_cond_timedwait(&th->native_thread_data.sleep_cond, + &th->interrupt_lock, &ts); + if (r && r != ETIMEDOUT) rb_bug("pthread_cond_timedwait: %d", r); + + thread_debug("native_sleep: pthread_cond_timedwait end (%d)\n", r); + } + } + th->unblock.func = 0; + th->unblock.arg = 0; + + pthread_mutex_unlock(&th->interrupt_lock); + } + GVL_UNLOCK_END(); + + thread_debug("native_sleep done\n"); +} + +struct signal_thread_list { + rb_thread_t *th; + struct signal_thread_list *prev; + struct signal_thread_list *next; +}; + +#ifndef __CYGWIN__ +static struct signal_thread_list signal_thread_list_anchor = { + 0, 0, 0, +}; +#endif + +#define FGLOCK(lock, body) do { \ + native_mutex_lock(lock); \ + { \ + body; \ + } \ + native_mutex_unlock(lock); \ +} while (0) + +#if 0 /* for debug */ +static void +print_signal_list(char *str) +{ + struct signal_thread_list *list = + signal_thread_list_anchor.next; + thread_debug("list (%s)> ", str); + while(list){ + thread_debug("%p (%p), ", list->th, list->th->thread_id); + list = list->next; + } + thread_debug("\n"); +} +#endif + +#ifndef __CYGWIN__ +static void +add_signal_thread_list(rb_thread_t *th) +{ + if (!th->native_thread_data.signal_thread_list) { + FGLOCK(&signal_thread_list_lock, { + struct signal_thread_list *list = + malloc(sizeof(struct signal_thread_list)); + + if (list == 0) { + fprintf(stderr, "[FATAL] failed to allocate memory\n"); + exit(1); + } + + list->th = th; + + list->prev = &signal_thread_list_anchor; + list->next = signal_thread_list_anchor.next; + if (list->next) { + list->next->prev = list; + } + signal_thread_list_anchor.next = list; + th->native_thread_data.signal_thread_list = list; + }); + } +} +#endif + +static void +remove_signal_thread_list(rb_thread_t *th) +{ + if (th->native_thread_data.signal_thread_list) { + FGLOCK(&signal_thread_list_lock, { + struct signal_thread_list *list = + (struct signal_thread_list *) + th->native_thread_data.signal_thread_list; + + list->prev->next = list->next; + if (list->next) { + list->next->prev = list->prev; + } + th->native_thread_data.signal_thread_list = 0; + list->th = 0; + free(list); /* ok */ + }); + } + else { + /* */ + } +} + +static pthread_t timer_thread_id; +static pthread_cond_t timer_thread_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t timer_thread_lock = PTHREAD_MUTEX_INITIALIZER; + +static struct timespec * +get_ts(struct timespec *ts, unsigned long nsec) +{ + struct timeval tv; + gettimeofday(&tv, 0); + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = tv.tv_usec * 1000 + nsec; + if (ts->tv_nsec >= PER_NANO) { + ts->tv_sec++; + ts->tv_nsec -= PER_NANO; + } + return ts; +} + +static void * +thread_timer(void *dummy) +{ + struct timespec ts; + + native_mutex_lock(&timer_thread_lock); + native_cond_broadcast(&timer_thread_cond); +#define WAIT_FOR_10MS() native_cond_timedwait(&timer_thread_cond, &timer_thread_lock, get_ts(&ts, PER_NANO/100)) + while (system_working > 0) { + int err = WAIT_FOR_10MS(); + if (err == ETIMEDOUT); + else if (err == 0 || err == EINTR) { + if (rb_signal_buff_size() == 0) break; + } + else rb_bug("thread_timer/timedwait: %d", err); + +#ifndef __CYGWIN__ + if (signal_thread_list_anchor.next) { + FGLOCK(&signal_thread_list_lock, { + struct signal_thread_list *list; + list = signal_thread_list_anchor.next; + while (list) { + ubf_select_each(list->th); + list = list->next; + } + }); + } +#endif + timer_thread_function(dummy); + } + native_mutex_unlock(&timer_thread_lock); + return NULL; +} + +static void +rb_thread_create_timer_thread(void) +{ + rb_enable_interrupt(); + + if (!timer_thread_id) { + pthread_attr_t attr; + int err; + + pthread_attr_init(&attr); +#ifdef PTHREAD_STACK_MIN + pthread_attr_setstacksize(&attr, + PTHREAD_STACK_MIN + (THREAD_DEBUG ? BUFSIZ : 0)); +#endif + native_mutex_lock(&timer_thread_lock); + err = pthread_create(&timer_thread_id, &attr, thread_timer, 0); + if (err != 0) { + native_mutex_unlock(&timer_thread_lock); + rb_bug("rb_thread_create_timer_thread: return non-zero (%d)", err); + } + native_cond_wait(&timer_thread_cond, &timer_thread_lock); + native_mutex_unlock(&timer_thread_lock); + } + rb_disable_interrupt(); /* only timer thread recieve signal */ +} + +static int +native_stop_timer_thread(void) +{ + int stopped; + native_mutex_lock(&timer_thread_lock); + stopped = --system_working <= 0; + if (stopped) { + native_cond_signal(&timer_thread_cond); + } + native_mutex_unlock(&timer_thread_lock); + return stopped; +} + +#ifdef HAVE_SIGALTSTACK +int +ruby_stack_overflowed_p(const rb_thread_t *th, const void *addr) +{ + void *base; + size_t size; + const size_t water_mark = 1024 * 1024; + STACK_GROW_DIR_DETECTION; + + if (th) { + size = th->machine_stack_maxsize; + base = (char *)th->machine_stack_start - STACK_DIR_UPPER(0, size); + } +#ifdef STACKADDR_AVAILABLE + else if (get_stack(&base, &size) == 0) { + STACK_DIR_UPPER(base = (char *)base + size, (void)0); + } +#endif + else { + return 0; + } + size /= 5; + if (size > water_mark) size = water_mark; + if (STACK_DIR_UPPER(1, 0)) { + if (size > ~(size_t)base+1) size = ~(size_t)base+1; + if (addr > base && addr <= (void *)((char *)base + size)) return 1; + } + else { + if (size > (size_t)base) size = (size_t)base; + if (addr > (void *)((char *)base - size) && addr <= base) return 1; + } + return 0; +} +#endif + +#endif /* THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION */ diff --git a/thread_pthread.h b/thread_pthread.h new file mode 100644 index 0000000..0851398 --- /dev/null +++ b/thread_pthread.h @@ -0,0 +1,24 @@ +/********************************************************************** + + thread_pthread.h - + + $Author: nobu $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_THREAD_PTHREAD_H +#define RUBY_THREAD_PTHREAD_H + +#include +typedef pthread_t rb_thread_id_t; +typedef pthread_mutex_t rb_thread_lock_t; +typedef pthread_cond_t rb_thread_cond_t; + +typedef struct native_thread_data_struct { + void *signal_thread_list; + pthread_cond_t sleep_cond; +} native_thread_data_t; + +#endif /* RUBY_THREAD_PTHREAD_H */ diff --git a/thread_win32.c b/thread_win32.c new file mode 100644 index 0000000..7d53557 --- /dev/null +++ b/thread_win32.c @@ -0,0 +1,573 @@ +/* -*-c-*- */ +/********************************************************************** + + thread_win32.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifdef THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION + +#include + +#define WIN32_WAIT_TIMEOUT 10 /* 10 ms */ +#undef Sleep + +#define native_thread_yield() Sleep(0) +#define remove_signal_thread_list(th) + +static volatile DWORD ruby_native_thread_key = TLS_OUT_OF_INDEXES; + +static int native_mutex_lock(rb_thread_lock_t *); +static int native_mutex_unlock(rb_thread_lock_t *); +static int native_mutex_trylock(rb_thread_lock_t *); +static void native_mutex_initialize(rb_thread_lock_t *); + +static void native_cond_signal(rb_thread_cond_t *cond); +static void native_cond_broadcast(rb_thread_cond_t *cond); +static void native_cond_wait(rb_thread_cond_t *cond, rb_thread_lock_t *mutex); +static void native_cond_initialize(rb_thread_cond_t *cond); +static void native_cond_destroy(rb_thread_cond_t *cond); + +static rb_thread_t * +ruby_thread_from_native(void) +{ + return TlsGetValue(ruby_native_thread_key); +} + +static int +ruby_thread_set_native(rb_thread_t *th) +{ + return TlsSetValue(ruby_native_thread_key, th); +} + +static void +Init_native_thread(void) +{ + rb_thread_t *th = GET_THREAD(); + + ruby_native_thread_key = TlsAlloc(); + ruby_thread_set_native(th); + DuplicateHandle(GetCurrentProcess(), + GetCurrentThread(), + GetCurrentProcess(), + &th->thread_id, 0, FALSE, DUPLICATE_SAME_ACCESS); + + th->native_thread_data.interrupt_event = CreateEvent(0, TRUE, FALSE, 0); + + thread_debug("initial thread (th: %p, thid: %p, event: %p)\n", + th, GET_THREAD()->thread_id, + th->native_thread_data.interrupt_event); +} + +static void +w32_error(void) +{ + LPVOID lpMsgBuf; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) & lpMsgBuf, 0, NULL); + rb_bug("%s", (char*)lpMsgBuf); +} + +static void +w32_set_event(HANDLE handle) +{ + if (SetEvent(handle) == 0) { + w32_error(); + } +} + +static void +w32_reset_event(HANDLE handle) +{ + if (ResetEvent(handle) == 0) { + w32_error(); + } +} + +static int +w32_wait_events(HANDLE *events, int count, DWORD timeout, rb_thread_t *th) +{ + HANDLE *targets = events; + HANDLE intr; + DWORD ret; + + thread_debug(" w32_wait_events events:%p, count:%d, timeout:%ld, th:%p\n", + events, count, timeout, th); + if (th && (intr = th->native_thread_data.interrupt_event)) { + w32_reset_event(intr); + if (RUBY_VM_INTERRUPTED(th)) { + w32_set_event(intr); + } + + targets = ALLOCA_N(HANDLE, count + 1); + memcpy(targets, events, sizeof(HANDLE) * count); + + targets[count++] = intr; + thread_debug(" * handle: %p (count: %d, intr)\n", intr, count); + } + + thread_debug(" WaitForMultipleObjects start (count: %d)\n", count); + ret = WaitForMultipleObjects(count, targets, FALSE, timeout); + thread_debug(" WaitForMultipleObjects end (ret: %lu)\n", ret); + + if (ret == WAIT_OBJECT_0 + count - 1 && th) { + errno = EINTR; + } + if (ret == -1 && THREAD_DEBUG) { + int i; + DWORD dmy; + for (i = 0; i < count; i++) { + thread_debug(" * error handle %d - %s\n", i, + GetHandleInformation(targets[i], &dmy) ? "OK" : "NG"); + } + } + return ret; +} + +static void ubf_handle(void *ptr); +#define ubf_select ubf_handle + +int +rb_w32_wait_events_blocking(HANDLE *events, int num, DWORD timeout) +{ + return w32_wait_events(events, num, timeout, GET_THREAD()); +} + +int +rb_w32_wait_events(HANDLE *events, int num, DWORD timeout) +{ + int ret; + + BLOCKING_REGION(ret = rb_w32_wait_events_blocking(events, num, timeout), + ubf_handle, GET_THREAD()); + return ret; +} + +static void +w32_close_handle(HANDLE handle) +{ + if (CloseHandle(handle) == 0) { + w32_error(); + } +} + +static void +w32_resume_thread(HANDLE handle) +{ + if (ResumeThread(handle) == -1) { + w32_error(); + } +} + +#ifdef _MSC_VER +#define HAVE__BEGINTHREADEX 1 +#else +#undef HAVE__BEGINTHREADEX +#endif + +#ifdef HAVE__BEGINTHREADEX +#define start_thread (HANDLE)_beginthreadex +typedef unsigned long (_stdcall *w32_thread_start_func)(void*); +#else +#define start_thread CreateThread +typedef LPTHREAD_START_ROUTINE w32_thread_start_func; +#endif + +static HANDLE +w32_create_thread(DWORD stack_size, w32_thread_start_func func, void *val) +{ + return start_thread(0, stack_size, func, val, CREATE_SUSPENDED, 0); +} + +int +rb_w32_sleep(unsigned long msec) +{ + return w32_wait_events(0, 0, msec, GET_THREAD()); +} + +int WINAPI +rb_w32_Sleep(unsigned long msec) +{ + int ret; + + BLOCKING_REGION(ret = rb_w32_sleep(msec), + ubf_handle, GET_THREAD()); + return ret; +} + +static void +native_sleep(rb_thread_t *th, struct timeval *tv) +{ + DWORD msec; + + if (tv) { + msec = tv->tv_sec * 1000 + tv->tv_usec / 1000; + } + else { + msec = INFINITE; + } + + GVL_UNLOCK_BEGIN(); + { + DWORD ret; + + native_mutex_lock(&th->interrupt_lock); + th->unblock.func = ubf_handle; + th->unblock.arg = th; + native_mutex_unlock(&th->interrupt_lock); + + if (RUBY_VM_INTERRUPTED(th)) { + /* interrupted. return immediate */ + } + else { + thread_debug("native_sleep start (%lu)\n", msec); + ret = w32_wait_events(0, 0, msec, th); + thread_debug("native_sleep done (%lu)\n", ret); + } + + native_mutex_lock(&th->interrupt_lock); + th->unblock.func = 0; + th->unblock.arg = 0; + native_mutex_unlock(&th->interrupt_lock); + } + GVL_UNLOCK_END(); +} + +static int +native_mutex_lock(rb_thread_lock_t *lock) +{ +#if USE_WIN32_MUTEX + DWORD result; + while (1) { + thread_debug("native_mutex_lock: %p\n", *lock); + result = w32_wait_events(&*lock, 1, INFINITE, 0); + switch (result) { + case WAIT_OBJECT_0: + /* get mutex object */ + thread_debug("acquire mutex: %p\n", *lock); + return 0; + case WAIT_OBJECT_0 + 1: + /* interrupt */ + errno = EINTR; + thread_debug("acquire mutex interrupted: %p\n", *lock); + return 0; + case WAIT_TIMEOUT: + thread_debug("timeout mutex: %p\n", *lock); + break; + case WAIT_ABANDONED: + rb_bug("win32_mutex_lock: WAIT_ABANDONED"); + break; + default: + rb_bug("win32_mutex_lock: unknown result (%d)", result); + break; + } + } + return 0; +#else + EnterCriticalSection(lock); + return 0; +#endif +} + +static int +native_mutex_unlock(rb_thread_lock_t *lock) +{ +#if USE_WIN32_MUTEX + thread_debug("release mutex: %p\n", *lock); + return ReleaseMutex(*lock); +#else + LeaveCriticalSection(lock); + return 0; +#endif +} + +static int +native_mutex_trylock(rb_thread_lock_t *lock) +{ +#if USE_WIN32_MUTEX + int result; + thread_debug("native_mutex_trylock: %p\n", *lock); + result = w32_wait_events(&*lock, 1, 1, 0); + thread_debug("native_mutex_trylock result: %d\n", result); + switch (result) { + case WAIT_OBJECT_0: + return 0; + case WAIT_TIMEOUT: + return EBUSY; + } + return EINVAL; +#else + return TryEnterCriticalSection(lock) == 0; +#endif +} + +static void +native_mutex_initialize(rb_thread_lock_t *lock) +{ +#if USE_WIN32_MUTEX + *lock = CreateMutex(NULL, FALSE, NULL); + if (*lock == NULL) { + w32_error(); + } + /* thread_debug("initialize mutex: %p\n", *lock); */ +#else + InitializeCriticalSection(lock); +#endif +} + +static void +native_mutex_destroy(rb_thread_lock_t *lock) +{ +#if USE_WIN32_MUTEX + w32_close_handle(lock); +#else + DeleteCriticalSection(lock); +#endif +} + +struct cond_event_entry { + struct cond_event_entry* next; + HANDLE event; +}; + +struct rb_thread_cond_struct { + struct cond_event_entry *next; + struct cond_event_entry *last; +}; + +static void +native_cond_signal(rb_thread_cond_t *cond) +{ + /* cond is guarded by mutex */ + struct cond_event_entry *e = cond->next; + + if (e) { + cond->next = e->next; + SetEvent(e->event); + } + else { + rb_bug("native_cond_signal: no pending threads"); + } +} + +static void +native_cond_broadcast(rb_thread_cond_t *cond) +{ + /* cond is guarded by mutex */ + struct cond_event_entry *e = cond->next; + cond->next = 0; + + while (e) { + SetEvent(e->event); + e = e->next; + } +} + +static void +native_cond_wait(rb_thread_cond_t *cond, rb_thread_lock_t *mutex) +{ + DWORD r; + struct cond_event_entry entry; + + entry.next = 0; + entry.event = CreateEvent(0, FALSE, FALSE, 0); + + /* cond is guarded by mutex */ + if (cond->next) { + cond->last->next = &entry; + cond->last = &entry; + } + else { + cond->next = &entry; + cond->last = &entry; + } + + native_mutex_unlock(mutex); + { + r = WaitForSingleObject(entry.event, INFINITE); + if (r != WAIT_OBJECT_0) { + rb_bug("native_cond_wait: WaitForSingleObject returns %lu", r); + } + } + native_mutex_lock(mutex); + + w32_close_handle(entry.event); +} + +static void +native_cond_initialize(rb_thread_cond_t *cond) +{ + cond->next = 0; + cond->last = 0; +} + +static void +native_cond_destroy(rb_thread_cond_t *cond) +{ + /* */ +} + +void +ruby_init_stack(VALUE *addr) +{ +} + +#define CHECK_ERR(expr) \ + {if (!(expr)) {rb_bug("err: %lu - %s", GetLastError(), #expr);}} + +static void +native_thread_init_stack(rb_thread_t *th) +{ + MEMORY_BASIC_INFORMATION mi; + char *base, *end; + DWORD size, space; + + CHECK_ERR(VirtualQuery(&mi, &mi, sizeof(mi))); + base = mi.AllocationBase; + end = mi.BaseAddress; + end += mi.RegionSize; + size = end - base; + space = size / 5; + if (space > 1024*1024) space = 1024*1024; + th->machine_stack_start = (VALUE *)end - 1; + th->machine_stack_maxsize = size - space; +} + +static void +native_thread_destroy(rb_thread_t *th) +{ + HANDLE intr = th->native_thread_data.interrupt_event; + native_mutex_destroy(&th->interrupt_lock); + thread_debug("close handle - intr: %p, thid: %p\n", intr, th->thread_id); + th->native_thread_data.interrupt_event = 0; + w32_close_handle(intr); +} + +static unsigned long _stdcall +thread_start_func_1(void *th_ptr) +{ + rb_thread_t *th = th_ptr; + volatile HANDLE thread_id = th->thread_id; + + native_thread_init_stack(th); + th->native_thread_data.interrupt_event = CreateEvent(0, TRUE, FALSE, 0); + + /* run */ + thread_debug("thread created (th: %p, thid: %p, event: %p)\n", th, + th->thread_id, th->native_thread_data.interrupt_event); + + thread_start_func_2(th, th->machine_stack_start, rb_ia64_bsp()); + + w32_close_handle(thread_id); + thread_debug("thread deleted (th: %p)\n", th); + return 0; +} + +static int +native_thread_create(rb_thread_t *th) +{ + size_t stack_size = 4 * 1024; /* 4KB */ + th->thread_id = w32_create_thread(stack_size, thread_start_func_1, th); + + if ((th->thread_id) == 0) { + st_delete_wrap(th->vm->living_threads, th->self); + rb_raise(rb_eThreadError, "can't create Thread (%d)", errno); + } + + w32_resume_thread(th->thread_id); + + if (THREAD_DEBUG) { + Sleep(0); + thread_debug("create: (th: %p, thid: %p, intr: %p), stack size: %d\n", + th, th->thread_id, + th->native_thread_data.interrupt_event, stack_size); + } + return 0; +} + +static void +native_thread_join(HANDLE th) +{ + w32_wait_events(&th, 1, 0, 0); +} + +#if USE_NATIVE_THREAD_PRIORITY + +static void +native_thread_apply_priority(rb_thread_t *th) +{ + int priority = th->priority; + if (th->priority > 0) { + priority = THREAD_PRIORITY_ABOVE_NORMAL; + } + else if (th->priority < 0) { + priority = THREAD_PRIORITY_BELOW_NORMAL; + } + else { + priority = THREAD_PRIORITY_NORMAL; + } + + SetThreadPriority(th->thread_id, priority); +} + +#endif /* USE_NATIVE_THREAD_PRIORITY */ + +static void +ubf_handle(void *ptr) +{ + typedef BOOL (WINAPI *cancel_io_func_t)(HANDLE); + rb_thread_t *th = (rb_thread_t *)ptr; + thread_debug("ubf_handle: %p\n", th); + + w32_set_event(th->native_thread_data.interrupt_event); +} + +static HANDLE timer_thread_id = 0; +static HANDLE timer_thread_lock; + +static unsigned long _stdcall +timer_thread_func(void *dummy) +{ + thread_debug("timer_thread\n"); + while (WaitForSingleObject(timer_thread_lock, WIN32_WAIT_TIMEOUT) == + WAIT_TIMEOUT) { + timer_thread_function(dummy); + } + thread_debug("timer killed\n"); + return 0; +} + +static void +rb_thread_create_timer_thread(void) +{ + if (timer_thread_id == 0) { + if (!timer_thread_lock) { + timer_thread_lock = CreateEvent(0, TRUE, FALSE, 0); + } + timer_thread_id = w32_create_thread(1024 + (THREAD_DEBUG ? BUFSIZ : 0), + timer_thread_func, 0); + w32_resume_thread(timer_thread_id); + } +} + +static int +native_stop_timer_thread(void) +{ + int stopped = --system_working <= 0; + if (stopped) { + CloseHandle(timer_thread_lock); + timer_thread_lock = 0; + } + return stopped; +} + +#endif /* THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION */ diff --git a/thread_win32.h b/thread_win32.h new file mode 100644 index 0000000..c056eb7 --- /dev/null +++ b/thread_win32.h @@ -0,0 +1,33 @@ +/********************************************************************** + + thread_win32.h - + + $Author: usa $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +/* interface */ +#ifndef RUBY_THREAD_WIN32_H +#define RUBY_THREAD_WIN32_H + +#include + +# ifdef __CYGWIN__ +# undef _WIN32 +# endif + +WINBASEAPI BOOL WINAPI +TryEnterCriticalSection(IN OUT LPCRITICAL_SECTION lpCriticalSection); + +typedef HANDLE rb_thread_id_t; +typedef CRITICAL_SECTION rb_thread_lock_t; +typedef struct rb_thread_cond_struct rb_thread_cond_t; + +typedef struct native_thread_data_struct { + HANDLE interrupt_event; +} native_thread_data_t; + +#endif /* RUBY_THREAD_WIN32_H */ + diff --git a/time.c b/time.c new file mode 100644 index 0000000..d484fac --- /dev/null +++ b/time.c @@ -0,0 +1,2469 @@ +/********************************************************************** + + time.c - + + $Author: yugui $ + created at: Tue Dec 28 14:31:59 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include +#include +#include +#include "ruby/encoding.h" + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include + +VALUE rb_cTime; +static VALUE time_utc_offset _((VALUE)); + +static ID id_divmod, id_mul, id_submicro; + +struct time_object { + struct timespec ts; + struct tm tm; + int gmt; + int tm_got; +}; + +#define GetTimeval(obj, tobj) \ + Data_Get_Struct(obj, struct time_object, tobj) + +static void +time_free(void *tobj) +{ + if (tobj) xfree(tobj); +} + +static VALUE +time_s_alloc(VALUE klass) +{ + VALUE obj; + struct time_object *tobj; + + obj = Data_Make_Struct(klass, struct time_object, 0, time_free, tobj); + tobj->tm_got=0; + tobj->ts.tv_sec = 0; + tobj->ts.tv_nsec = 0; + + return obj; +} + +static void +time_modify(VALUE time) +{ + rb_check_frozen(time); + if (!OBJ_UNTRUSTED(time) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify Time"); +} + +/* + * Document-method: now + * + * Synonym for Time.new. Returns a +Time+ object + * initialized to the current system time. + */ + +/* + * call-seq: + * Time.new -> time + * + * Returns a Time object initialized to the current system + * time. Note: The object created will be created using the + * resolution available on your system clock, and so may include + * fractional seconds. + * + * a = Time.new #=> 2007-11-19 07:50:02 -0600 + * b = Time.new #=> 2007-11-19 07:50:02 -0600 + * a == b #=> false + * "%.6f" % a.to_f #=> "1195480202.282373" + * "%.6f" % b.to_f #=> "1195480202.283415" + * + */ + +static VALUE +time_init(VALUE time) +{ + struct time_object *tobj; + + time_modify(time); + GetTimeval(time, tobj); + tobj->tm_got=0; + tobj->ts.tv_sec = 0; + tobj->ts.tv_nsec = 0; +#ifdef HAVE_CLOCK_GETTIME + if (clock_gettime(CLOCK_REALTIME, &tobj->ts) == -1) { + rb_sys_fail("clock_gettime"); + } +#else + { + struct timeval tv; + if (gettimeofday(&tv, 0) < 0) { + rb_sys_fail("gettimeofday"); + } + tobj->ts.tv_sec = tv.tv_sec; + tobj->ts.tv_nsec = tv.tv_usec * 1000; + } +#endif + + return time; +} + +#define NDIV(x,y) (-(-((x)+1)/(y))-1) +#define NMOD(x,y) ((y)-(-((x)+1)%(y))-1) + +static void +time_overflow_p(time_t *secp, long *nsecp) +{ + time_t tmp, sec = *secp; + long nsec = *nsecp; + + if (nsec >= 1000000000) { /* nsec positive overflow */ + tmp = sec + nsec / 1000000000; + nsec %= 1000000000; + if (sec > 0 && tmp < 0) { + rb_raise(rb_eRangeError, "out of Time range"); + } + sec = tmp; + } + if (nsec < 0) { /* nsec negative overflow */ + tmp = sec + NDIV(nsec,1000000000); /* negative div */ + nsec = NMOD(nsec,1000000000); /* negative mod */ + if (sec < 0 && tmp > 0) { + rb_raise(rb_eRangeError, "out of Time range"); + } + sec = tmp; + } +#ifndef NEGATIVE_TIME_T + if (sec < 0) + rb_raise(rb_eArgError, "time must be positive"); +#endif + *secp = sec; + *nsecp = nsec; +} + +static VALUE +time_new_internal(VALUE klass, time_t sec, long nsec) +{ + VALUE time = time_s_alloc(klass); + struct time_object *tobj; + + GetTimeval(time, tobj); + time_overflow_p(&sec, &nsec); + tobj->ts.tv_sec = sec; + tobj->ts.tv_nsec = nsec; + + return time; +} + +VALUE +rb_time_new(time_t sec, long usec) +{ + return time_new_internal(rb_cTime, sec, usec * 1000); +} + +VALUE +rb_time_nano_new(time_t sec, long nsec) +{ + return time_new_internal(rb_cTime, sec, nsec); +} + +static struct timespec +time_timespec(VALUE num, int interval) +{ + struct timespec t; + const char *tstr = interval ? "time interval" : "time"; + VALUE i, f, ary; + +#ifndef NEGATIVE_TIME_T + interval = 1; +#endif + + switch (TYPE(num)) { + case T_FIXNUM: + t.tv_sec = FIX2LONG(num); + if (interval && t.tv_sec < 0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + t.tv_nsec = 0; + break; + + case T_FLOAT: + if (interval && RFLOAT_VALUE(num) < 0.0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + else { + double f, d; + + d = modf(RFLOAT_VALUE(num), &f); + if (d < 0) { + d += 1; + f -= 1; + } + t.tv_sec = (time_t)f; + if (f != t.tv_sec) { + rb_raise(rb_eRangeError, "%f out of Time range", RFLOAT_VALUE(num)); + } + t.tv_nsec = (long)(d*1e9+0.5); + } + break; + + case T_BIGNUM: + t.tv_sec = NUM2LONG(num); + if (interval && t.tv_sec < 0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + t.tv_nsec = 0; + break; + + default: + if (rb_respond_to(num, id_divmod)) { + ary = rb_check_array_type(rb_funcall(num, id_divmod, 1, INT2FIX(1))); + if (NIL_P(ary)) { + goto typeerror; + } + i = rb_ary_entry(ary, 0); + f = rb_ary_entry(ary, 1); + t.tv_sec = NUM2LONG(i); + if (interval && t.tv_sec < 0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + f = rb_funcall(f, id_mul, 1, INT2FIX(1000000000)); + t.tv_nsec = NUM2LONG(f); + } + else { +typeerror: + rb_raise(rb_eTypeError, "can't convert %s into %s", + rb_obj_classname(num), tstr); + } + break; + } + return t; +} + +static struct timeval +time_timeval(VALUE num, int interval) +{ + struct timespec ts; + struct timeval tv; + + ts = time_timespec(num, interval); + tv.tv_sec = ts.tv_sec; + tv.tv_usec = ts.tv_nsec / 1000; + + return tv; +} + +struct timeval +rb_time_interval(VALUE num) +{ + return time_timeval(num, Qtrue); +} + +struct timeval +rb_time_timeval(VALUE time) +{ + struct time_object *tobj; + struct timeval t; + + if (TYPE(time) == T_DATA && RDATA(time)->dfree == time_free) { + GetTimeval(time, tobj); + t.tv_sec = tobj->ts.tv_sec; + t.tv_usec = tobj->ts.tv_nsec / 1000; + return t; + } + return time_timeval(time, Qfalse); +} + +struct timespec +rb_time_timespec(VALUE time) +{ + struct time_object *tobj; + struct timespec t; + + if (TYPE(time) == T_DATA && RDATA(time)->dfree == time_free) { + GetTimeval(time, tobj); + t = tobj->ts; + return t; + } + return time_timespec(time, Qfalse); +} + +/* + * call-seq: + * Time.at(time) => time + * Time.at(seconds_with_frac) => time + * Time.at(seconds, microseconds_with_frac) => time + * + * Creates a new time object with the value given by time, + * the given number of seconds_with_frac, or + * seconds and microseconds_with_frac from the Epoch. + * seconds_with_frac and microseconds_with_frac + * can be Integer, Float, Rational, or other Numeric. + * non-portable feature allows the offset to be negative on some systems. + * + * Time.at(0) #=> 1969-12-31 18:00:00 -0600 + * Time.at(Time.at(0)) #=> 1969-12-31 18:00:00 -0600 + * Time.at(946702800) #=> 1999-12-31 23:00:00 -0600 + * Time.at(-284061600) #=> 1960-12-31 00:00:00 -0600 + * Time.at(946684800.2).usec #=> 200000 + * Time.at(946684800, 123456.789).nsec #=> 123456789 + */ + +static VALUE +time_s_at(int argc, VALUE *argv, VALUE klass) +{ + struct timespec ts; + VALUE time, t; + + if (rb_scan_args(argc, argv, "11", &time, &t) == 2) { + ts.tv_sec = NUM2LONG(time); + ts.tv_nsec = NUM2LONG(rb_funcall(t, id_mul, 1, INT2FIX(1000))); + } + else { + ts = rb_time_timespec(time); + } + t = time_new_internal(klass, ts.tv_sec, ts.tv_nsec); + if (TYPE(time) == T_DATA && RDATA(time)->dfree == time_free) { + struct time_object *tobj, *tobj2; + + GetTimeval(time, tobj); + GetTimeval(t, tobj2); + tobj2->gmt = tobj->gmt; + } + return t; +} + +static const char months[][4] = { + "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec", +}; + +static long +obj2long(VALUE obj) +{ + if (TYPE(obj) == T_STRING) { + obj = rb_str_to_inum(obj, 10, Qfalse); + } + + return NUM2LONG(obj); +} + +static long +obj2nsec(VALUE obj, long *nsec) +{ + struct timespec ts; + + if (TYPE(obj) == T_STRING) { + obj = rb_str_to_inum(obj, 10, Qfalse); + *nsec = 0; + return NUM2LONG(obj); + } + + ts = time_timespec(obj, 1); + *nsec = ts.tv_nsec; + return ts.tv_sec; +} + +static long +obj2long1000(VALUE obj) +{ + if (TYPE(obj) == T_STRING) { + obj = rb_str_to_inum(obj, 10, Qfalse); + return NUM2LONG(obj) * 1000; + } + + return NUM2LONG(rb_funcall(obj, id_mul, 1, INT2FIX(1000))); +} + +static void +time_arg(int argc, VALUE *argv, struct tm *tm, long *nsec) +{ + VALUE v[8]; + int i; + long year; + + MEMZERO(tm, struct tm, 1); + *nsec = 0; + if (argc == 10) { + v[0] = argv[5]; + v[1] = argv[4]; + v[2] = argv[3]; + v[3] = argv[2]; + v[4] = argv[1]; + v[5] = argv[0]; + v[6] = Qnil; + tm->tm_isdst = RTEST(argv[8]) ? 1 : 0; + } + else { + rb_scan_args(argc, argv, "17", &v[0],&v[1],&v[2],&v[3],&v[4],&v[5],&v[6],&v[7]); + /* v[6] may be usec or zone (parsedate) */ + /* v[7] is wday (parsedate; ignored) */ + tm->tm_wday = -1; + tm->tm_isdst = -1; + } + + year = obj2long(v[0]); + + if (0 <= year && year < 39) { + rb_warning("2 digits year is used: %ld", year); + year += 100; + } + else if (69 <= year && year < 139) { + rb_warning("2 or 3 digits year is used: %ld", year); + } + else { + year -= 1900; + } + + tm->tm_year = year; + + if (NIL_P(v[1])) { + tm->tm_mon = 0; + } + else { + VALUE s = rb_check_string_type(v[1]); + if (!NIL_P(s)) { + tm->tm_mon = -1; + for (i=0; i<12; i++) { + if (RSTRING_LEN(s) == 3 && + STRCASECMP(months[i], RSTRING_PTR(s)) == 0) { + tm->tm_mon = i; + break; + } + } + if (tm->tm_mon == -1) { + char c = RSTRING_PTR(s)[0]; + + if ('0' <= c && c <= '9') { + tm->tm_mon = obj2long(s)-1; + } + } + } + else { + tm->tm_mon = obj2long(v[1])-1; + } + } + if (NIL_P(v[2])) { + tm->tm_mday = 1; + } + else { + tm->tm_mday = obj2long(v[2]); + } + tm->tm_hour = NIL_P(v[3])?0:obj2long(v[3]); + tm->tm_min = NIL_P(v[4])?0:obj2long(v[4]); + if (!NIL_P(v[6]) && argc == 7) { + tm->tm_sec = NIL_P(v[5])?0:obj2long(v[5]); + *nsec = obj2long1000(v[6]); + } + else { + /* when argc == 8, v[6] is timezone, but ignored */ + tm->tm_sec = NIL_P(v[5])?0:obj2nsec(v[5], nsec); + } + + /* value validation */ + if ( + tm->tm_year != year || +#ifndef NEGATIVE_TIME_T + tm->tm_year < 69 || +#endif + tm->tm_mon < 0 || tm->tm_mon > 11 + || tm->tm_mday < 1 || tm->tm_mday > 31 + || tm->tm_hour < 0 || tm->tm_hour > 24 + || (tm->tm_hour == 24 && (tm->tm_min > 0 || tm->tm_sec > 0)) + || tm->tm_min < 0 || tm->tm_min > 59 + || tm->tm_sec < 0 || tm->tm_sec > 60) + rb_raise(rb_eArgError, "argument out of range"); +} + +static VALUE time_gmtime(VALUE); +static VALUE time_localtime(VALUE); +static VALUE time_get_tm(VALUE, int); + +#ifdef HAVE_GMTIME_R +#define IF_HAVE_GMTIME_R(x) x +#define ASCTIME(tm, buf) asctime_r(tm, buf) +#define GMTIME(tm, result) gmtime_r(tm, &result) +#define LOCALTIME(tm, result) (tzset(),localtime_r(tm, &result)) +#else +#define IF_HAVE_GMTIME_R(x) /* nothing */ +#define ASCTIME(tm, buf) asctime(tm) +#define GMTIME(tm, result) gmtime(tm) +#define LOCALTIME(tm, result) localtime(tm) +#endif + +static int +leap_year_p(long y) +{ + return ((y % 4 == 0) && (y % 100 != 0)) || (y % 400 == 0); +} + +#define DIV(n,d) ((n)<0 ? NDIV((n),(d)) : (n)/(d)) + +static time_t +timegm_noleapsecond(struct tm *tm) +{ + static const int common_year_yday_offset[] = { + -1, + -1 + 31, + -1 + 31 + 28, + -1 + 31 + 28 + 31, + -1 + 31 + 28 + 31 + 30, + -1 + 31 + 28 + 31 + 30 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + /* 1 2 3 4 5 6 7 8 9 10 11 */ + }; + static const int leap_year_yday_offset[] = { + -1, + -1 + 31, + -1 + 31 + 29, + -1 + 31 + 29 + 31, + -1 + 31 + 29 + 31 + 30, + -1 + 31 + 29 + 31 + 30 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + /* 1 2 3 4 5 6 7 8 9 10 11 */ + }; + + long tm_year = tm->tm_year; + int tm_yday = tm->tm_mday; + if (leap_year_p(tm_year + 1900)) + tm_yday += leap_year_yday_offset[tm->tm_mon]; + else + tm_yday += common_year_yday_offset[tm->tm_mon]; + + /* + * `Seconds Since the Epoch' in SUSv3: + * tm_sec + tm_min*60 + tm_hour*3600 + tm_yday*86400 + + * (tm_year-70)*31536000 + ((tm_year-69)/4)*86400 - + * ((tm_year-1)/100)*86400 + ((tm_year+299)/400)*86400 + */ + return tm->tm_sec + tm->tm_min*60 + tm->tm_hour*3600 + + (time_t)(tm_yday + + (tm_year-70)*365 + + DIV(tm_year-69,4) - + DIV(tm_year-1,100) + + DIV(tm_year+299,400))*86400; +} + +static int +tmcmp(struct tm *a, struct tm *b) +{ + if (a->tm_year != b->tm_year) + return a->tm_year < b->tm_year ? -1 : 1; + else if (a->tm_mon != b->tm_mon) + return a->tm_mon < b->tm_mon ? -1 : 1; + else if (a->tm_mday != b->tm_mday) + return a->tm_mday < b->tm_mday ? -1 : 1; + else if (a->tm_hour != b->tm_hour) + return a->tm_hour < b->tm_hour ? -1 : 1; + else if (a->tm_min != b->tm_min) + return a->tm_min < b->tm_min ? -1 : 1; + else if (a->tm_sec != b->tm_sec) + return a->tm_sec < b->tm_sec ? -1 : 1; + else + return 0; +} + +#if SIZEOF_TIME_T == SIZEOF_LONG +typedef unsigned long unsigned_time_t; +#elif SIZEOF_TIME_T == SIZEOF_INT +typedef unsigned int unsigned_time_t; +#elif SIZEOF_TIME_T == SIZEOF_LONG_LONG +typedef unsigned LONG_LONG unsigned_time_t; +#else +# error cannot find integer type which size is same as time_t. +#endif + +static time_t +search_time_t(struct tm *tptr, int utc_p) +{ + time_t guess, guess_lo, guess_hi; + struct tm *tm, tm_lo, tm_hi; + int d, have_guess; + int find_dst; + IF_HAVE_GMTIME_R(struct tm result); +#define GUESS(p) (utc_p ? GMTIME(p, result) : LOCALTIME(p, result)) + + find_dst = 0 < tptr->tm_isdst; + +#ifdef NEGATIVE_TIME_T + guess_lo = (time_t)~((unsigned_time_t)~(time_t)0 >> 1); +#else + guess_lo = 0; +#endif + guess_hi = ((time_t)-1) < ((time_t)0) ? + (time_t)((unsigned_time_t)~(time_t)0 >> 1) : + ~(time_t)0; + + guess = timegm_noleapsecond(tptr); + tm = GUESS(&guess); + if (tm) { + d = tmcmp(tptr, tm); + if (d == 0) return guess; + if (d < 0) { + guess_hi = guess; + guess -= 24 * 60 * 60; + } + else { + guess_lo = guess; + guess += 24 * 60 * 60; + } + if (guess_lo < guess && guess < guess_hi && (tm = GUESS(&guess)) != NULL) { + d = tmcmp(tptr, tm); + if (d == 0) return guess; + if (d < 0) + guess_hi = guess; + else + guess_lo = guess; + } + } + + tm = GUESS(&guess_lo); + if (!tm) goto error; + d = tmcmp(tptr, tm); + if (d < 0) goto out_of_range; + if (d == 0) return guess_lo; + tm_lo = *tm; + + tm = GUESS(&guess_hi); + if (!tm) goto error; + d = tmcmp(tptr, tm); + if (d > 0) goto out_of_range; + if (d == 0) return guess_hi; + tm_hi = *tm; + + have_guess = 0; + + while (guess_lo + 1 < guess_hi) { + /* there is a gap between guess_lo and guess_hi. */ + unsigned long range = 0; + if (!have_guess) { + int a, b; + /* + Try precious guess by a linear interpolation at first. + `a' and `b' is a coefficient of guess_lo and guess_hi as: + + guess = (guess_lo * a + guess_hi * b) / (a + b) + + However this causes overflow in most cases, following assignment + is used instead: + + guess = guess_lo / d * a + (guess_lo % d) * a / d + + guess_hi / d * b + (guess_hi % d) * b / d + where d = a + b + + To avoid overflow in this assignment, `d' is restricted to less than + sqrt(2**31). By this restriction and other reasons, the guess is + not accurate and some error is expected. `range' approximates + the maximum error. + + When these parameters are not suitable, i.e. guess is not within + guess_lo and guess_hi, simple guess by binary search is used. + */ + range = 366 * 24 * 60 * 60; + a = (tm_hi.tm_year - tptr->tm_year); + b = (tptr->tm_year - tm_lo.tm_year); + /* 46000 is selected as `some big number less than sqrt(2**31)'. */ + if (a + b <= 46000 / 12) { + range = 31 * 24 * 60 * 60; + a *= 12; + b *= 12; + a += tm_hi.tm_mon - tptr->tm_mon; + b += tptr->tm_mon - tm_lo.tm_mon; + if (a + b <= 46000 / 31) { + range = 24 * 60 * 60; + a *= 31; + b *= 31; + a += tm_hi.tm_mday - tptr->tm_mday; + b += tptr->tm_mday - tm_lo.tm_mday; + if (a + b <= 46000 / 24) { + range = 60 * 60; + a *= 24; + b *= 24; + a += tm_hi.tm_hour - tptr->tm_hour; + b += tptr->tm_hour - tm_lo.tm_hour; + if (a + b <= 46000 / 60) { + range = 60; + a *= 60; + b *= 60; + a += tm_hi.tm_min - tptr->tm_min; + b += tptr->tm_min - tm_lo.tm_min; + if (a + b <= 46000 / 60) { + range = 1; + a *= 60; + b *= 60; + a += tm_hi.tm_sec - tptr->tm_sec; + b += tptr->tm_sec - tm_lo.tm_sec; + } + } + } + } + } + if (a <= 0) a = 1; + if (b <= 0) b = 1; + d = a + b; + /* + Although `/' and `%' may produce unexpected result with negative + argument, it doesn't cause serious problem because there is a + fail safe. + */ + guess = guess_lo / d * a + (guess_lo % d) * a / d + + guess_hi / d * b + (guess_hi % d) * b / d; + have_guess = 1; + } + + if (guess <= guess_lo || guess_hi <= guess) { + /* Precious guess is invalid. try binary search. */ + guess = guess_lo / 2 + guess_hi / 2; + if (guess <= guess_lo) + guess = guess_lo + 1; + else if (guess >= guess_hi) + guess = guess_hi - 1; + range = 0; + } + + tm = GUESS(&guess); + if (!tm) goto error; + have_guess = 0; + + d = tmcmp(tptr, tm); + if (d < 0) { + guess_hi = guess; + tm_hi = *tm; + if (range) { + guess = guess - range; + range = 0; + if (guess_lo < guess && guess < guess_hi) + have_guess = 1; + } + } + else if (d > 0) { + guess_lo = guess; + tm_lo = *tm; + if (range) { + guess = guess + range; + range = 0; + if (guess_lo < guess && guess < guess_hi) + have_guess = 1; + } + } + else { + if (!utc_p) { + /* If localtime is nonmonotonic, another result may exist. */ + time_t guess2; + if (find_dst) { + guess2 = guess - 2 * 60 * 60; + tm = LOCALTIME(&guess2, result); + if (tm) { + if (tptr->tm_hour != (tm->tm_hour + 2) % 24 || + tptr->tm_min != tm->tm_min || + tptr->tm_sec != tm->tm_sec + ) { + guess2 -= (tm->tm_hour - tptr->tm_hour) * 60 * 60 + + (tm->tm_min - tptr->tm_min) * 60 + + (tm->tm_sec - tptr->tm_sec); + if (tptr->tm_mday != tm->tm_mday) + guess2 += 24 * 60 * 60; + if (guess != guess2) { + tm = LOCALTIME(&guess2, result); + if (tmcmp(tptr, tm) == 0) { + if (guess < guess2) + return guess; + else + return guess2; + } + } + } + } + } + else { + guess2 = guess + 2 * 60 * 60; + tm = LOCALTIME(&guess2, result); + if (tm) { + if ((tptr->tm_hour + 2) % 24 != tm->tm_hour || + tptr->tm_min != tm->tm_min || + tptr->tm_sec != tm->tm_sec + ) { + guess2 -= (tm->tm_hour - tptr->tm_hour) * 60 * 60 + + (tm->tm_min - tptr->tm_min) * 60 + + (tm->tm_sec - tptr->tm_sec); + if (tptr->tm_mday != tm->tm_mday) + guess2 -= 24 * 60 * 60; + if (guess != guess2) { + tm = LOCALTIME(&guess2, result); + if (tmcmp(tptr, tm) == 0) { + if (guess < guess2) + return guess2; + else + return guess; + } + } + } + } + } + } + return guess; + } + } + /* Given argument has no corresponding time_t. Let's outerpolation. */ + if (tm_lo.tm_year == tptr->tm_year && tm_lo.tm_mon == tptr->tm_mon) { + return guess_lo + + (tptr->tm_mday - tm_lo.tm_mday) * 24 * 60 * 60 + + (tptr->tm_hour - tm_lo.tm_hour) * 60 * 60 + + (tptr->tm_min - tm_lo.tm_min) * 60 + + (tptr->tm_sec - tm_lo.tm_sec); + } + else if (tm_hi.tm_year == tptr->tm_year && tm_hi.tm_mon == tptr->tm_mon) { + return guess_hi + + (tptr->tm_mday - tm_hi.tm_mday) * 24 * 60 * 60 + + (tptr->tm_hour - tm_hi.tm_hour) * 60 * 60 + + (tptr->tm_min - tm_hi.tm_min) * 60 + + (tptr->tm_sec - tm_hi.tm_sec); + } + + out_of_range: + rb_raise(rb_eArgError, "time out of range"); + + error: + rb_raise(rb_eArgError, "gmtime/localtime error"); + return 0; /* not reached */ +} + +static time_t +make_time_t(struct tm *tptr, int utc_p) +{ + time_t t; +#ifdef NEGATIVE_TIME_T + struct tm *tmp; +#endif + struct tm buf; + IF_HAVE_GMTIME_R(struct tm result); + + buf = *tptr; + if (utc_p) { +#if defined(HAVE_TIMEGM) + if ((t = timegm(&buf)) != -1) + return t; +#ifdef NEGATIVE_TIME_T + if ((tmp = GMTIME(&t, result)) && + tptr->tm_year == tmp->tm_year && + tptr->tm_mon == tmp->tm_mon && + tptr->tm_mday == tmp->tm_mday && + tptr->tm_hour == tmp->tm_hour && + tptr->tm_min == tmp->tm_min && + tptr->tm_sec == tmp->tm_sec + ) + return t; +#endif +#endif + return search_time_t(&buf, utc_p); + } + else { +#if defined(HAVE_MKTIME) + if ((t = mktime(&buf)) != -1) + return t; +#ifdef NEGATIVE_TIME_T + if ((tmp = LOCALTIME(&t, result)) && + tptr->tm_year == tmp->tm_year && + tptr->tm_mon == tmp->tm_mon && + tptr->tm_mday == tmp->tm_mday && + tptr->tm_hour == tmp->tm_hour && + tptr->tm_min == tmp->tm_min && + tptr->tm_sec == tmp->tm_sec + ) + return t; +#endif +#endif + return search_time_t(&buf, utc_p); + } +} + +static VALUE +time_utc_or_local(int argc, VALUE *argv, int utc_p, VALUE klass) +{ + struct tm tm; + VALUE time; + long nsec; + + time_arg(argc, argv, &tm, &nsec); + time = time_new_internal(klass, make_time_t(&tm, utc_p), nsec); + if (utc_p) return time_gmtime(time); + return time_localtime(time); +} + +/* + * call-seq: + * Time.utc(year) => time + * Time.utc(year, month) => time + * Time.utc(year, month, day) => time + * Time.utc(year, month, day, hour) => time + * Time.utc(year, month, day, hour, min) => time + * Time.utc(year, month, day, hour, min, sec_with_frac) => time + * Time.utc(year, month, day, hour, min, sec, usec_with_frac) => time + * Time.utc(sec, min, hour, day, month, year, wday, yday, isdst, tz) => time + * Time.gm(year) => time + * Time.gm(year, month) => time + * Time.gm(year, month, day) => time + * Time.gm(year, month, day, hour) => time + * Time.gm(year, month, day, hour, min) => time + * Time.gm(year, month, day, hour, min, sec_with_frac) => time + * Time.gm(year, month, day, hour, min, sec, usec_with_frac) => time + * Time.gm(sec, min, hour, day, month, year, wday, yday, isdst, tz) => time + * + * Creates a time based on given values, interpreted as UTC (GMT). The + * year must be specified. Other values default to the minimum value + * for that field (and may be nil or omitted). Months may + * be specified by numbers from 1 to 12, or by the three-letter English + * month names. Hours are specified on a 24-hour clock (0..23). Raises + * an ArgumentError if any values are out of range. Will + * also accept ten arguments in the order output by + * Time#to_a. + * sec_with_frac and usec_with_frac can have a fractional part. + * + * Time.utc(2000,"jan",1,20,15,1) #=> 2000-01-01 20:15:01 UTC + * Time.gm(2000,"jan",1,20,15,1) #=> 2000-01-01 20:15:01 UTC + */ +static VALUE +time_s_mkutc(int argc, VALUE *argv, VALUE klass) +{ + return time_utc_or_local(argc, argv, Qtrue, klass); +} + +/* + * call-seq: + * Time.local(year) => time + * Time.local(year, month) => time + * Time.local(year, month, day) => time + * Time.local(year, month, day, hour) => time + * Time.local(year, month, day, hour, min) => time + * Time.local(year, month, day, hour, min, sec_with_frac) => time + * Time.local(year, month, day, hour, min, sec, usec_with_frac) => time + * Time.local(sec, min, hour, day, month, year, wday, yday, isdst, tz) => time + * Time.mktime(year) => time + * Time.mktime(year, month) => time + * Time.mktime(year, month, day) => time + * Time.mktime(year, month, day, hour) => time + * Time.mktime(year, month, day, hour, min) => time + * Time.mktime(year, month, day, hour, min, sec_with_frac) => time + * Time.mktime(year, month, day, hour, min, sec, usec_with_frac) => time + * Time.mktime(sec, min, hour, day, month, year, wday, yday, isdst, tz) => time + * + * Same as Time::gm, but interprets the values in the + * local time zone. + * + * Time.local(2000,"jan",1,20,15,1) #=> 2000-01-01 20:15:01 -0600 + */ + +static VALUE +time_s_mktime(int argc, VALUE *argv, VALUE klass) +{ + return time_utc_or_local(argc, argv, Qfalse, klass); +} + +/* + * call-seq: + * time.to_i => int + * time.tv_sec => int + * + * Returns the value of time as an integer number of seconds + * since the Epoch. + * + * t = Time.now + * "%10.5f" % t.to_f #=> "1049896564.17839" + * t.to_i #=> 1049896564 + */ + +static VALUE +time_to_i(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return LONG2NUM(tobj->ts.tv_sec); +} + +/* + * call-seq: + * time.to_f => float + * + * Returns the value of time as a floating point number of + * seconds since the Epoch. + * + * t = Time.now + * "%10.5f" % t.to_f #=> "1049896564.13654" + * t.to_i #=> 1049896564 + * + * Note that IEEE 754 double is not accurate enough to represent + * nanoseconds from the Epoch. + */ + +static VALUE +time_to_f(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return DBL2NUM((double)tobj->ts.tv_sec+(double)tobj->ts.tv_nsec/1e9); +} + +/* + * call-seq: + * time.usec => int + * time.tv_usec => int + * + * Returns just the number of microseconds for time. + * + * t = Time.now #=> 2007-11-19 08:03:26 -0600 + * "%10.6f" % t.to_f #=> "1195481006.775195" + * t.usec #=> 775195 + */ + +static VALUE +time_usec(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return LONG2NUM(tobj->ts.tv_nsec/1000); +} + +/* + * call-seq: + * time.nsec => int + * time.tv_nsec => int + * + * Returns just the number of nanoseconds for time. + * + * t = Time.now #=> 2007-11-17 15:18:03 +0900 + * "%10.9f" % t.to_f #=> "1195280283.536151409" + * t.nsec #=> 536151406 + * + * The lowest digit of to_f and nsec is different because + * IEEE 754 double is not accurate enough to represent + * nanoseconds from the Epoch. + * The accurate value is returned by nsec. + */ + +static VALUE +time_nsec(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return LONG2NUM(tobj->ts.tv_nsec); +} + +/* + * call-seq: + * time <=> other_time => -1, 0, +1 + * + * Comparison---Compares time with other_time. + * + * t = Time.now #=> 2007-11-19 08:12:12 -0600 + * t2 = t + 2592000 #=> 2007-12-19 08:12:12 -0600 + * t <=> t2 #=> -1 + * t2 <=> t #=> 1 + * + * t = Time.now #=> 2007-11-19 08:13:38 -0600 + * t2 = t + 0.1 #=> 2007-11-19 08:13:38 -0600 + * t.nsec #=> 98222999 + * t2.nsec #=> 198222999 + * t <=> t2 #=> -1 + * t2 <=> t #=> 1 + * t <=> t #=> 0 + */ + +static VALUE +time_cmp(VALUE time1, VALUE time2) +{ + struct time_object *tobj1, *tobj2; + + GetTimeval(time1, tobj1); + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + GetTimeval(time2, tobj2); + if (tobj1->ts.tv_sec == tobj2->ts.tv_sec) { + if (tobj1->ts.tv_nsec == tobj2->ts.tv_nsec) return INT2FIX(0); + if (tobj1->ts.tv_nsec > tobj2->ts.tv_nsec) return INT2FIX(1); + return INT2FIX(-1); + } + if (tobj1->ts.tv_sec > tobj2->ts.tv_sec) return INT2FIX(1); + return INT2FIX(-1); + } + else { + VALUE cmp; + int n; + + cmp = rb_funcall(time2, rb_intern("<=>"), 1, time1); + if (NIL_P(cmp)) return Qnil; + + n = -rb_cmpint(cmp, time1, time2); + if (n == 0) return INT2FIX(0); + if (n > 0) return INT2FIX(1); + return INT2FIX(-1); + } +} + +/* + * call-seq: + * time.eql?(other_time) + * + * Return true if time and other_time are + * both Time objects with the same seconds and fractional + * seconds. + */ + +static VALUE +time_eql(VALUE time1, VALUE time2) +{ + struct time_object *tobj1, *tobj2; + + GetTimeval(time1, tobj1); + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + GetTimeval(time2, tobj2); + if (tobj1->ts.tv_sec == tobj2->ts.tv_sec) { + if (tobj1->ts.tv_nsec == tobj2->ts.tv_nsec) return Qtrue; + } + } + return Qfalse; +} + +/* + * call-seq: + * time.utc? => true or false + * time.gmt? => true or false + * + * Returns true if time represents a time in UTC + * (GMT). + * + * t = Time.now #=> 2007-11-19 08:15:23 -0600 + * t.utc? #=> false + * t = Time.gm(2000,"jan",1,20,15,1) #=> 2000-01-01 20:15:01 UTC + * t.utc? #=> true + * + * t = Time.now #=> 2007-11-19 08:16:03 -0600 + * t.gmt? #=> false + * t = Time.gm(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC + * t.gmt? #=> true + */ + +static VALUE +time_utc_p(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->gmt) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * time.hash => fixnum + * + * Return a hash code for this time object. + */ + +static VALUE +time_hash(VALUE time) +{ + struct time_object *tobj; + long hash; + + GetTimeval(time, tobj); + hash = tobj->ts.tv_sec ^ tobj->ts.tv_nsec; + return LONG2FIX(hash); +} + +/* :nodoc: */ +static VALUE +time_init_copy(VALUE copy, VALUE time) +{ + struct time_object *tobj, *tcopy; + + if (copy == time) return copy; + time_modify(copy); + if (TYPE(time) != T_DATA || RDATA(time)->dfree != time_free) { + rb_raise(rb_eTypeError, "wrong argument type"); + } + GetTimeval(time, tobj); + GetTimeval(copy, tcopy); + MEMCPY(tcopy, tobj, struct time_object, 1); + + return copy; +} + +static VALUE +time_dup(VALUE time) +{ + VALUE dup = time_s_alloc(CLASS_OF(time)); + time_init_copy(dup, time); + return dup; +} + +/* + * call-seq: + * time.localtime => time + * + * Converts time to local time (using the local time zone in + * effect for this process) modifying the receiver. + * + * t = Time.gm(2000, "jan", 1, 20, 15, 1) #=> 2000-01-01 20:15:01 UTC + * t.gmt? #=> true + * t.localtime #=> 2000-01-01 14:15:01 -0600 + * t.gmt? #=> false + */ + +static VALUE +time_localtime(VALUE time) +{ + struct time_object *tobj; + struct tm *tm_tmp; + time_t t; + IF_HAVE_GMTIME_R(struct tm result); + + GetTimeval(time, tobj); + if (!tobj->gmt) { + if (tobj->tm_got) + return time; + } + else { + time_modify(time); + } + t = tobj->ts.tv_sec; + tm_tmp = LOCALTIME(&t, result); + if (!tm_tmp) + rb_raise(rb_eArgError, "localtime error"); + tobj->tm = *tm_tmp; + tobj->tm_got = 1; + tobj->gmt = 0; + return time; +} + +/* + * call-seq: + * time.gmtime => time + * time.utc => time + * + * Converts time to UTC (GMT), modifying the receiver. + * + * t = Time.now #=> 2007-11-19 08:18:31 -0600 + * t.gmt? #=> false + * t.gmtime #=> 2007-11-19 14:18:31 UTC + * t.gmt? #=> true + * + * t = Time.now #=> 2007-11-19 08:18:51 -0600 + * t.utc? #=> false + * t.utc #=> 2007-11-19 14:18:51 UTC + * t.utc? #=> true + */ + +static VALUE +time_gmtime(VALUE time) +{ + struct time_object *tobj; + struct tm *tm_tmp; + time_t t; + IF_HAVE_GMTIME_R(struct tm result); + + GetTimeval(time, tobj); + if (tobj->gmt) { + if (tobj->tm_got) + return time; + } + else { + time_modify(time); + } + t = tobj->ts.tv_sec; + tm_tmp = GMTIME(&t, result); + if (!tm_tmp) + rb_raise(rb_eArgError, "gmtime error"); + tobj->tm = *tm_tmp; + tobj->tm_got = 1; + tobj->gmt = 1; + return time; +} + +/* + * call-seq: + * time.getlocal => new_time + * + * Returns a new new_time object representing time in + * local time (using the local time zone in effect for this process). + * + * t = Time.gm(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC + * t.gmt? #=> true + * l = t.getlocal #=> 2000-01-01 14:15:01 -0600 + * l.gmt? #=> false + * t == l #=> true + */ + +static VALUE +time_getlocaltime(VALUE time) +{ + return time_localtime(time_dup(time)); +} + +/* + * call-seq: + * time.getgm => new_time + * time.getutc => new_time + * + * Returns a new new_time object representing time in + * UTC. + * + * t = Time.local(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 -0600 + * t.gmt? #=> false + * y = t.getgm #=> 2000-01-02 02:15:01 UTC + * y.gmt? #=> true + * t == y #=> true + */ + +static VALUE +time_getgmtime(VALUE time) +{ + return time_gmtime(time_dup(time)); +} + +static VALUE +time_get_tm(VALUE time, int gmt) +{ + if (gmt) return time_gmtime(time); + return time_localtime(time); +} + +/* + * call-seq: + * time.asctime => string + * time.ctime => string + * + * Returns a canonical string representation of time. + * + * Time.now.asctime #=> "Wed Apr 9 08:56:03 2003" + */ + +static VALUE +time_asctime(VALUE time) +{ + struct time_object *tobj; + char *s; + IF_HAVE_GMTIME_R(char buf[32]); + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + s = ASCTIME(&tobj->tm, buf); + if (s[24] == '\n') s[24] = '\0'; +#if (defined(_MSC_VER) && defined(_DLL)) || defined(__MSVCRT__) + /* workaround for MSVCRT's bug */ + if (s[8] == '0') s[8] = ' '; +#endif + + return rb_str_new2(s); +} + +size_t +rb_strftime(char *s, size_t maxsize, const char *format, + const struct tm *timeptr, const struct timespec *ts, int gmt); + +/* + * call-seq: + * time.inspect => string + * time.to_s => string + * + * Returns a string representing time. Equivalent to calling + * Time#strftime with a format string of + * ``%Y-%m-%d %H:%M:%S %z'' + * for a local time and + * ``%Y-%m-%d %H:%M:%S UTC'' + * for a UTC time. + * + * Time.now.to_s #=> "2007-10-05 16:09:51 +0900" + * Time.now.utc.to_s #=> "2007-10-05 07:09:51 UTC" + */ + +static VALUE +time_to_s(VALUE time) +{ + struct time_object *tobj; + char buf[128]; + int len; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + if (tobj->gmt == 1) { + len = rb_strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S UTC", + &tobj->tm, &tobj->ts, tobj->gmt); + } + else { + len = rb_strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S %z", + &tobj->tm, &tobj->ts, tobj->gmt); + } + return rb_str_new(buf, len); +} + +static VALUE +time_add(struct time_object *tobj, VALUE offset, int sign) +{ + double v = NUM2DBL(offset); + double f, d; + unsigned_time_t sec_off; + time_t sec; + long nsec_off, nsec; + VALUE result; + + if (v < 0) { + v = -v; + sign = -sign; + } + d = modf(v, &f); + sec_off = (unsigned_time_t)f; + if (f != (double)sec_off) + rb_raise(rb_eRangeError, "time %s %f out of Time range", + sign < 0 ? "-" : "+", v); + nsec_off = (long)(d*1e9+0.5); + + if (sign < 0) { + sec = tobj->ts.tv_sec - sec_off; + nsec = tobj->ts.tv_nsec - nsec_off; + if (sec > tobj->ts.tv_sec) + rb_raise(rb_eRangeError, "time - %f out of Time range", v); + } + else { + sec = tobj->ts.tv_sec + sec_off; + nsec = tobj->ts.tv_nsec + nsec_off; + if (sec < tobj->ts.tv_sec) + rb_raise(rb_eRangeError, "time + %f out of Time range", v); + } + result = rb_time_nano_new(sec, nsec); + if (tobj->gmt) { + GetTimeval(result, tobj); + tobj->gmt = 1; + } + return result; +} + +/* + * call-seq: + * time + numeric => time + * + * Addition---Adds some number of seconds (possibly fractional) to + * time and returns that value as a new time. + * + * t = Time.now #=> 2007-11-19 08:22:21 -0600 + * t + (60 * 60 * 24) #=> 2007-11-20 08:22:21 -0600 + */ + +static VALUE +time_plus(VALUE time1, VALUE time2) +{ + struct time_object *tobj; + GetTimeval(time1, tobj); + + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + rb_raise(rb_eTypeError, "time + time?"); + } + return time_add(tobj, time2, 1); +} + +/* + * call-seq: + * time - other_time => float + * time - numeric => time + * + * Difference---Returns a new time that represents the difference + * between two times, or subtracts the given number of seconds in + * numeric from time. + * + * t = Time.now #=> 2007-11-19 08:23:10 -0600 + * t2 = t + 2592000 #=> 2007-12-19 08:23:10 -0600 + * t2 - t #=> 2592000.0 + * t2 - 2592000 #=> 2007-11-19 08:23:10 -0600 + */ + +static VALUE +time_minus(VALUE time1, VALUE time2) +{ + struct time_object *tobj; + + GetTimeval(time1, tobj); + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + struct time_object *tobj2; + double f; + + GetTimeval(time2, tobj2); + if (tobj->ts.tv_sec < tobj2->ts.tv_sec) + f = -(double)(unsigned_time_t)(tobj2->ts.tv_sec - tobj->ts.tv_sec); + else + f = (double)(unsigned_time_t)(tobj->ts.tv_sec - tobj2->ts.tv_sec); + f += ((double)tobj->ts.tv_nsec - (double)tobj2->ts.tv_nsec)*1e-9; + + return DBL2NUM(f); + } + return time_add(tobj, time2, -1); +} + +/* + * call-seq: + * time.succ => new_time + * + * Return a new time object, one second later than time. + * + * t = Time.now #=> 2007-11-19 08:23:57 -0600 + * t.succ #=> 2007-11-19 08:23:58 -0600 + */ + +static VALUE +time_succ(VALUE time) +{ + struct time_object *tobj; + int gmt; + + GetTimeval(time, tobj); + gmt = tobj->gmt; + time = rb_time_nano_new(tobj->ts.tv_sec + 1, tobj->ts.tv_nsec); + GetTimeval(time, tobj); + tobj->gmt = gmt; + return time; +} + +VALUE +rb_time_succ(VALUE time) +{ + return time_succ(time); +} + +/* + * call-seq: + * time.sec => fixnum + * + * Returns the second of the minute (0..60)[Yes, seconds really can + * range from zero to 60. This allows the system to inject leap seconds + * every now and then to correct for the fact that years are not really + * a convenient number of hours long.] for time. + * + * t = Time.now #=> 2007-11-19 08:25:02 -0600 + * t.sec #=> 2 + */ + +static VALUE +time_sec(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_sec); +} + +/* + * call-seq: + * time.min => fixnum + * + * Returns the minute of the hour (0..59) for time. + * + * t = Time.now #=> 2007-11-19 08:25:51 -0600 + * t.min #=> 25 + */ + +static VALUE +time_min(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_min); +} + +/* + * call-seq: + * time.hour => fixnum + * + * Returns the hour of the day (0..23) for time. + * + * t = Time.now #=> 2007-11-19 08:26:20 -0600 + * t.hour #=> 8 + */ + +static VALUE +time_hour(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_hour); +} + +/* + * call-seq: + * time.day => fixnum + * time.mday => fixnum + * + * Returns the day of the month (1..n) for time. + * + * t = Time.now #=> 2007-11-19 08:27:03 -0600 + * t.day #=> 19 + * t.mday #=> 19 + */ + +static VALUE +time_mday(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_mday); +} + +/* + * call-seq: + * time.mon => fixnum + * time.month => fixnum + * + * Returns the month of the year (1..12) for time. + * + * t = Time.now #=> 2007-11-19 08:27:30 -0600 + * t.mon #=> 11 + * t.month #=> 11 + */ + +static VALUE +time_mon(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_mon+1); +} + +/* + * call-seq: + * time.year => fixnum + * + * Returns the year for time (including the century). + * + * t = Time.now #=> 2007-11-19 08:27:51 -0600 + * t.year #=> 2007 + */ + +static VALUE +time_year(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return LONG2NUM((long)tobj->tm.tm_year+1900); +} + +/* + * call-seq: + * time.wday => fixnum + * + * Returns an integer representing the day of the week, 0..6, with + * Sunday == 0. + * + * t = Time.now #=> 2007-11-20 02:35:35 -0600 + * t.wday #=> 2 + * t.sunday? #=> false + * t.monday? #=> false + * t.tuesday? #=> true + * t.wednesday? #=> false + * t.thursday? #=> false + * t.friday? #=> false + * t.saturday? #=> false + */ + +static VALUE +time_wday(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_wday); +} + +#define wday_p(n) {\ + struct time_object *tobj;\ + GetTimeval(time, tobj);\ + if (tobj->tm_got == 0) {\ + time_get_tm(time, tobj->gmt);\ + }\ + return (tobj->tm.tm_wday == (n)) ? Qtrue : Qfalse;\ +} + +/* + * call-seq: + * time.sunday? => true or false + * + * Returns true if time represents Sunday. + * + * t = Time.local(1990, 4, 1) #=> 1990-04-01 00:00:00 -0600 + * t.sunday? #=> true + */ + +static VALUE +time_sunday(VALUE time) +{ + wday_p(0); +} + +/* + * call-seq: + * time.monday? => true or false + * + * Returns true if time represents Monday. + * + * t = Time.local(2003, 8, 4) #=> 2003-08-04 00:00:00 -0500 + * p t.monday? #=> true + */ + +static VALUE +time_monday(VALUE time) +{ + wday_p(1); +} + +/* + * call-seq: + * time.tuesday? => true or false + * + * Returns true if time represents Tuesday. + * + * t = Time.local(1991, 2, 19) #=> 1991-02-19 00:00:00 -0600 + * p t.tuesday? #=> true + */ + +static VALUE +time_tuesday(VALUE time) +{ + wday_p(2); +} + +/* + * call-seq: + * time.wednesday? => true or false + * + * Returns true if time represents Wednesday. + * + * t = Time.local(1993, 2, 24) #=> 1993-02-24 00:00:00 -0600 + * p t.wednesday? #=> true + */ + +static VALUE +time_wednesday(VALUE time) +{ + wday_p(3); +} + +/* + * call-seq: + * time.thursday? => true or false + * + * Returns true if time represents Thursday. + * + * t = Time.local(1995, 12, 21) #=> 1995-12-21 00:00:00 -0600 + * p t.thursday? #=> true + */ + +static VALUE +time_thursday(VALUE time) +{ + wday_p(4); +} + +/* + * call-seq: + * time.friday? => true or false + * + * Returns true if time represents Friday. + * + * t = Time.local(1987, 12, 18) #=> 1987-12-18 00:00:00 -0600 + * t.friday? #=> true + */ + +static VALUE +time_friday(VALUE time) +{ + wday_p(5); +} + +/* + * call-seq: + * time.saturday? => true or false + * + * Returns true if time represents Saturday. + * + * t = Time.local(2006, 6, 10) #=> 2006-06-10 00:00:00 -0500 + * t.saturday? #=> true + */ + +static VALUE +time_saturday(VALUE time) +{ + wday_p(6); +} + +/* + * call-seq: + * time.yday => fixnum + * + * Returns an integer representing the day of the year, 1..366. + * + * t = Time.now #=> 2007-11-19 08:32:31 -0600 + * t.yday #=> 323 + */ + +static VALUE +time_yday(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_yday+1); +} + +/* + * call-seq: + * time.isdst => true or false + * time.dst? => true or false + * + * Returns true if time occurs during Daylight + * Saving Time in its time zone. + * + * # CST6CDT: + * Time.local(2000, 1, 1).zone #=> "CST" + * Time.local(2000, 1, 1).isdst #=> false + * Time.local(2000, 1, 1).dst? #=> false + * Time.local(2000, 7, 1).zone #=> "CDT" + * Time.local(2000, 7, 1).isdst #=> true + * Time.local(2000, 7, 1).dst? #=> true + * + * # Asia/Tokyo: + * Time.local(2000, 1, 1).zone #=> "JST" + * Time.local(2000, 1, 1).isdst #=> false + * Time.local(2000, 1, 1).dst? #=> false + * Time.local(2000, 7, 1).zone #=> "JST" + * Time.local(2000, 7, 1).isdst #=> false + * Time.local(2000, 7, 1).dst? #=> false + */ + +static VALUE +time_isdst(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return tobj->tm.tm_isdst?Qtrue:Qfalse; +} + +/* + * call-seq: + * time.zone => string + * + * Returns the name of the time zone used for time. As of Ruby + * 1.8, returns ``UTC'' rather than ``GMT'' for UTC times. + * + * t = Time.gm(2000, "jan", 1, 20, 15, 1) + * t.zone #=> "UTC" + * t = Time.local(2000, "jan", 1, 20, 15, 1) + * t.zone #=> "CST" + */ + +static VALUE +time_zone(VALUE time) +{ + struct time_object *tobj; +#if !defined(HAVE_TM_ZONE) && (!defined(HAVE_TZNAME) || !defined(HAVE_DAYLIGHT)) + char buf[64]; + int len; +#endif + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + + if (tobj->gmt == 1) { + return rb_str_new2("UTC"); + } +#if defined(HAVE_TM_ZONE) + return rb_str_new2(tobj->tm.tm_zone); +#elif defined(HAVE_TZNAME) && defined(HAVE_DAYLIGHT) + return rb_str_new2(tzname[daylight && tobj->tm.tm_isdst]); +#else + len = rb_strftime(buf, sizeof(buf), "%Z", + &tobj->tm, &tobj->ts, tobj->gmt); + return rb_str_new(buf, len); +#endif +} + +/* + * call-seq: + * time.gmt_offset => fixnum + * time.gmtoff => fixnum + * time.utc_offset => fixnum + * + * Returns the offset in seconds between the timezone of time + * and UTC. + * + * t = Time.gm(2000,1,1,20,15,1) #=> 2000-01-01 20:15:01 UTC + * t.gmt_offset #=> 0 + * l = t.getlocal #=> 2000-01-01 14:15:01 -0600 + * l.gmt_offset #=> -21600 + */ + +static VALUE +time_utc_offset(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + + if (tobj->gmt == 1) { + return INT2FIX(0); + } + else { +#if defined(HAVE_STRUCT_TM_TM_GMTOFF) + return INT2NUM(tobj->tm.tm_gmtoff); +#else + struct tm *u, *l; + time_t t; + long off; + IF_HAVE_GMTIME_R(struct tm result); + l = &tobj->tm; + t = tobj->ts.tv_sec; + u = GMTIME(&t, result); + if (!u) + rb_raise(rb_eArgError, "gmtime error"); + if (l->tm_year != u->tm_year) + off = l->tm_year < u->tm_year ? -1 : 1; + else if (l->tm_mon != u->tm_mon) + off = l->tm_mon < u->tm_mon ? -1 : 1; + else if (l->tm_mday != u->tm_mday) + off = l->tm_mday < u->tm_mday ? -1 : 1; + else + off = 0; + off = off * 24 + l->tm_hour - u->tm_hour; + off = off * 60 + l->tm_min - u->tm_min; + off = off * 60 + l->tm_sec - u->tm_sec; + return LONG2FIX(off); +#endif + } +} + +/* + * call-seq: + * time.to_a => array + * + * Returns a ten-element array of values for time: + * {[ sec, min, hour, day, month, year, wday, yday, isdst, zone + * ]}. See the individual methods for an explanation of the + * valid ranges of each value. The ten elements can be passed directly + * to Time::utc or Time::local to create a + * new Time. + * + * t = Time.now #=> 2007-11-19 08:36:01 -0600 + * now = t.to_a #=> [1, 36, 8, 19, 11, 2007, 1, 323, false, "CST"] + */ + +static VALUE +time_to_a(VALUE time) +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return rb_ary_new3(10, + INT2FIX(tobj->tm.tm_sec), + INT2FIX(tobj->tm.tm_min), + INT2FIX(tobj->tm.tm_hour), + INT2FIX(tobj->tm.tm_mday), + INT2FIX(tobj->tm.tm_mon+1), + LONG2NUM((long)tobj->tm.tm_year+1900), + INT2FIX(tobj->tm.tm_wday), + INT2FIX(tobj->tm.tm_yday+1), + tobj->tm.tm_isdst?Qtrue:Qfalse, + time_zone(time)); +} + +#define SMALLBUF 100 +static int +rb_strftime_alloc(char **buf, const char *format, + struct tm *time, struct timespec *ts, int gmt) +{ + int size, len, flen; + + (*buf)[0] = '\0'; + flen = strlen(format); + if (flen == 0) { + return 0; + } + errno = 0; + len = rb_strftime(*buf, SMALLBUF, format, time, ts, gmt); + if (len != 0 || (**buf == '\0' && errno != ERANGE)) return len; + for (size=1024; ; size*=2) { + *buf = xmalloc(size); + (*buf)[0] = '\0'; + len = rb_strftime(*buf, size, format, time, ts, gmt); + /* + * buflen can be zero EITHER because there's not enough + * room in the string, or because the control command + * goes to the empty string. Make a reasonable guess that + * if the buffer is 1024 times bigger than the length of the + * format string, it's not failing for lack of room. + */ + if (len > 0 || size >= 1024 * flen) return len; + xfree(*buf); + } + /* not reached */ +} + +/* + * call-seq: + * time.strftime( string ) => string + * + * Formats time according to the directives in the given format + * string. Any text not listed as a directive will be passed through + * to the output string. + * + * Format meaning: + * %a - The abbreviated weekday name (``Sun'') + * %A - The full weekday name (``Sunday'') + * %b - The abbreviated month name (``Jan'') + * %B - The full month name (``January'') + * %c - The preferred local date and time representation + * %d - Day of the month (01..31) + * %F - Equivalent to %Y-%m-%d (the ISO 8601 date format) + * %H - Hour of the day, 24-hour clock (00..23) + * %I - Hour of the day, 12-hour clock (01..12) + * %j - Day of the year (001..366) + * %L - Millisecond of the second (000..999) + * %m - Month of the year (01..12) + * %M - Minute of the hour (00..59) + * %N - Fractional seconds digits, default is 9 digits (nanosecond) + * %3N millisecond (3 digits) + * %6N microsecond (6 digits) + * %9N nanosecond (9 digits) + * %p - Meridian indicator (``AM'' or ``PM'') + * %P - Meridian indicator (``am'' or ``pm'') + * %s - Number of seconds since 1970-01-01 00:00:00 UTC. + * %S - Second of the minute (00..60) + * %U - Week number of the current year, + * starting with the first Sunday as the first + * day of the first week (00..53) + * %W - Week number of the current year, + * starting with the first Monday as the first + * day of the first week (00..53) + * %w - Day of the week (Sunday is 0, 0..6) + * %x - Preferred representation for the date alone, no time + * %X - Preferred representation for the time alone, no date + * %y - Year without a century (00..99) + * %Y - Year with century + * %Z - Time zone name + * %% - Literal ``%'' character + * + * t = Time.now #=> 2007-11-19 08:37:48 -0600 + * t.strftime("Printed on %m/%d/%Y") #=> "Printed on 11/19/2007" + * t.strftime("at %I:%M%p") #=> "at 08:37AM" + */ + +static VALUE +time_strftime(VALUE time, VALUE format) +{ + void rb_enc_copy(VALUE, VALUE); + struct time_object *tobj; + char buffer[SMALLBUF], *buf = buffer; + const char *fmt; + long len; + VALUE str; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + StringValue(format); + if (!rb_enc_str_asciicompat_p(format)) { + rb_raise(rb_eArgError, "format should have ASCII compatible encoding"); + } + format = rb_str_new4(format); + fmt = RSTRING_PTR(format); + len = RSTRING_LEN(format); + if (len == 0) { + rb_warning("strftime called with empty format string"); + } + else if (strlen(fmt) < len) { + /* Ruby string may contain \0's. */ + const char *p = fmt, *pe = fmt + len; + + str = rb_str_new(0, 0); + while (p < pe) { + len = rb_strftime_alloc(&buf, p, &tobj->tm, &tobj->ts, tobj->gmt); + rb_str_cat(str, buf, len); + p += strlen(p); + if (buf != buffer) { + xfree(buf); + buf = buffer; + } + for (fmt = p; p < pe && !*p; ++p); + if (p > fmt) rb_str_cat(str, fmt, p - fmt); + } + return str; + } + else { + len = rb_strftime_alloc(&buf, RSTRING_PTR(format), + &tobj->tm, &tobj->ts, tobj->gmt); + } + str = rb_str_new(buf, len); + if (buf != buffer) xfree(buf); + rb_enc_copy(str, format); + return str; +} + +/* + * undocumented + */ + +static VALUE +time_mdump(VALUE time) +{ + struct time_object *tobj; + struct tm *tm; + unsigned long p, s; + char buf[8]; + time_t t; + int nsec; + int i; + VALUE str; + IF_HAVE_GMTIME_R(struct tm result); + + GetTimeval(time, tobj); + + t = tobj->ts.tv_sec; + tm = GMTIME(&t, result); + + if ((tm->tm_year & 0xffff) != tm->tm_year) + rb_raise(rb_eArgError, "year too big to marshal: %ld", (long)tm->tm_year); + + p = 0x1UL << 31 | /* 1 */ + tobj->gmt << 30 | /* 1 */ + tm->tm_year << 14 | /* 16 */ + tm->tm_mon << 10 | /* 4 */ + tm->tm_mday << 5 | /* 5 */ + tm->tm_hour; /* 5 */ + s = tm->tm_min << 26 | /* 6 */ + tm->tm_sec << 20 | /* 6 */ + tobj->ts.tv_nsec / 1000; /* 20 */ + nsec = tobj->ts.tv_nsec % 1000; + + for (i=0; i<4; i++) { + buf[i] = p & 0xff; + p = RSHIFT(p, 8); + } + for (i=4; i<8; i++) { + buf[i] = s & 0xff; + s = RSHIFT(s, 8); + } + + str = rb_str_new(buf, 8); + rb_copy_generic_ivar(str, time); + if (nsec) { + /* + * submicro is formatted in fixed-point packed BCD (without sign). + * It represent digits under microsecond. + * For nanosecond resolution, 3 digits (2 bytes) are used. + * However it can be longer. + * Extra digits are ignored for loading. + */ + unsigned char buf[2]; + int len = sizeof(buf); + buf[1] = (nsec % 10) << 4; + nsec /= 10; + buf[0] = nsec % 10; + nsec /= 10; + buf[0] |= (nsec % 10) << 4; + if (buf[1] == 0) + len = 1; + rb_ivar_set(str, id_submicro, rb_str_new((char *)buf, len)); + } + return str; +} + +/* + * call-seq: + * time._dump => string + * + * Dump _time_ for marshaling. + */ + +static VALUE +time_dump(int argc, VALUE *argv, VALUE time) +{ + VALUE str; + + rb_scan_args(argc, argv, "01", 0); + str = time_mdump(time); + + return str; +} + +/* + * undocumented + */ + +static VALUE +time_mload(VALUE time, VALUE str) +{ + struct time_object *tobj; + unsigned long p, s; + time_t sec; + long usec; + unsigned char *buf; + struct tm tm; + int i, gmt; + long nsec; + VALUE submicro; + + time_modify(time); + + submicro = rb_attr_get(str, id_submicro); + if (submicro != Qnil) { + st_delete(rb_generic_ivar_table(str), (st_data_t*)&id_submicro, 0); + } + rb_copy_generic_ivar(time, str); + + StringValue(str); + buf = (unsigned char *)RSTRING_PTR(str); + if (RSTRING_LEN(str) != 8) { + rb_raise(rb_eTypeError, "marshaled time format differ"); + } + + p = s = 0; + for (i=0; i<4; i++) { + p |= buf[i]<<(8*i); + } + for (i=4; i<8; i++) { + s |= buf[i]<<(8*(i-4)); + } + + if ((p & (1UL<<31)) == 0) { + gmt = 0; + sec = p; + usec = s; + nsec = usec * 1000; + } + else { + p &= ~(1UL<<31); + gmt = (p >> 30) & 0x1; + tm.tm_year = (p >> 14) & 0xffff; + tm.tm_mon = (p >> 10) & 0xf; + tm.tm_mday = (p >> 5) & 0x1f; + tm.tm_hour = p & 0x1f; + tm.tm_min = (s >> 26) & 0x3f; + tm.tm_sec = (s >> 20) & 0x3f; + tm.tm_isdst = 0; + + sec = make_time_t(&tm, Qtrue); + usec = (long)(s & 0xfffff); + nsec = usec * 1000; + + if (submicro != Qnil) { + unsigned char *ptr; + long len; + int digit; + ptr = (unsigned char*)StringValuePtr(submicro); + len = RSTRING_LEN(submicro); + if (0 < len) { + if (10 <= (digit = ptr[0] >> 4)) goto end_submicro; + nsec += digit * 100; + if (10 <= (digit = ptr[0] & 0xf)) goto end_submicro; + nsec += digit * 10; + } + if (1 < len) { + if (10 <= (digit = ptr[1] >> 4)) goto end_submicro; + nsec += digit; + } +end_submicro: ; + } + } + time_overflow_p(&sec, &nsec); + + GetTimeval(time, tobj); + tobj->tm_got = 0; + tobj->gmt = gmt; + tobj->ts.tv_sec = sec; + tobj->ts.tv_nsec = nsec; + + return time; +} + +/* + * call-seq: + * Time._load(string) => time + * + * Unmarshal a dumped +Time+ object. + */ + +static VALUE +time_load(VALUE klass, VALUE str) +{ + VALUE time = time_s_alloc(klass); + + time_mload(time, str); + return time; +} + +/* + * Time is an abstraction of dates and times. Time is + * stored internally as the number of seconds and nanoseconds since + * the Epoch, January 1, 1970 00:00 UTC. On some operating + * systems, this offset is allowed to be negative. Also see the + * library modules Date. The + * Time class treats GMT (Greenwich Mean Time) and UTC + * (Coordinated Universal Time)[Yes, UTC really does stand for + * Coordinated Universal Time. There was a committee involved.] + * as equivalent. GMT is the older way of referring to these + * baseline times but persists in the names of calls on POSIX + * systems. + * + * All times are stored with some number of nanoseconds. Be aware of + * this fact when comparing times with each other---times that are + * apparently equal when displayed may be different when compared. + */ + +void +Init_Time(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + id_divmod = rb_intern("divmod"); + id_mul = rb_intern("*"); + id_submicro = rb_intern("submicro"); + + rb_cTime = rb_define_class("Time", rb_cObject); + rb_include_module(rb_cTime, rb_mComparable); + + rb_define_alloc_func(rb_cTime, time_s_alloc); + rb_define_singleton_method(rb_cTime, "now", rb_class_new_instance, -1); + rb_define_singleton_method(rb_cTime, "at", time_s_at, -1); + rb_define_singleton_method(rb_cTime, "utc", time_s_mkutc, -1); + rb_define_singleton_method(rb_cTime, "gm", time_s_mkutc, -1); + rb_define_singleton_method(rb_cTime, "local", time_s_mktime, -1); + rb_define_singleton_method(rb_cTime, "mktime", time_s_mktime, -1); + + rb_define_method(rb_cTime, "to_i", time_to_i, 0); + rb_define_method(rb_cTime, "to_f", time_to_f, 0); + rb_define_method(rb_cTime, "<=>", time_cmp, 1); + rb_define_method(rb_cTime, "eql?", time_eql, 1); + rb_define_method(rb_cTime, "hash", time_hash, 0); + rb_define_method(rb_cTime, "initialize", time_init, 0); + rb_define_method(rb_cTime, "initialize_copy", time_init_copy, 1); + + rb_define_method(rb_cTime, "localtime", time_localtime, 0); + rb_define_method(rb_cTime, "gmtime", time_gmtime, 0); + rb_define_method(rb_cTime, "utc", time_gmtime, 0); + rb_define_method(rb_cTime, "getlocal", time_getlocaltime, 0); + rb_define_method(rb_cTime, "getgm", time_getgmtime, 0); + rb_define_method(rb_cTime, "getutc", time_getgmtime, 0); + + rb_define_method(rb_cTime, "ctime", time_asctime, 0); + rb_define_method(rb_cTime, "asctime", time_asctime, 0); + rb_define_method(rb_cTime, "to_s", time_to_s, 0); + rb_define_method(rb_cTime, "inspect", time_to_s, 0); + rb_define_method(rb_cTime, "to_a", time_to_a, 0); + + rb_define_method(rb_cTime, "+", time_plus, 1); + rb_define_method(rb_cTime, "-", time_minus, 1); + + rb_define_method(rb_cTime, "succ", time_succ, 0); + rb_define_method(rb_cTime, "sec", time_sec, 0); + rb_define_method(rb_cTime, "min", time_min, 0); + rb_define_method(rb_cTime, "hour", time_hour, 0); + rb_define_method(rb_cTime, "mday", time_mday, 0); + rb_define_method(rb_cTime, "day", time_mday, 0); + rb_define_method(rb_cTime, "mon", time_mon, 0); + rb_define_method(rb_cTime, "month", time_mon, 0); + rb_define_method(rb_cTime, "year", time_year, 0); + rb_define_method(rb_cTime, "wday", time_wday, 0); + rb_define_method(rb_cTime, "yday", time_yday, 0); + rb_define_method(rb_cTime, "isdst", time_isdst, 0); + rb_define_method(rb_cTime, "dst?", time_isdst, 0); + rb_define_method(rb_cTime, "zone", time_zone, 0); + rb_define_method(rb_cTime, "gmtoff", time_utc_offset, 0); + rb_define_method(rb_cTime, "gmt_offset", time_utc_offset, 0); + rb_define_method(rb_cTime, "utc_offset", time_utc_offset, 0); + + rb_define_method(rb_cTime, "utc?", time_utc_p, 0); + rb_define_method(rb_cTime, "gmt?", time_utc_p, 0); + + rb_define_method(rb_cTime, "sunday?", time_sunday, 0); + rb_define_method(rb_cTime, "monday?", time_monday, 0); + rb_define_method(rb_cTime, "tuesday?", time_tuesday, 0); + rb_define_method(rb_cTime, "wednesday?", time_wednesday, 0); + rb_define_method(rb_cTime, "thursday?", time_thursday, 0); + rb_define_method(rb_cTime, "friday?", time_friday, 0); + rb_define_method(rb_cTime, "saturday?", time_saturday, 0); + + rb_define_method(rb_cTime, "tv_sec", time_to_i, 0); + rb_define_method(rb_cTime, "tv_usec", time_usec, 0); + rb_define_method(rb_cTime, "usec", time_usec, 0); + rb_define_method(rb_cTime, "tv_nsec", time_nsec, 0); + rb_define_method(rb_cTime, "nsec", time_nsec, 0); + + rb_define_method(rb_cTime, "strftime", time_strftime, 1); + + /* methods for marshaling */ + rb_define_method(rb_cTime, "_dump", time_dump, -1); + rb_define_singleton_method(rb_cTime, "_load", time_load, 1); +#if 0 + /* Time will support marshal_dump and marshal_load in the future (1.9 maybe) */ + rb_define_method(rb_cTime, "marshal_dump", time_mdump, 0); + rb_define_method(rb_cTime, "marshal_load", time_mload, 1); +#endif +} diff --git a/tool/asm_parse.rb b/tool/asm_parse.rb new file mode 100644 index 0000000..f3d7f73 --- /dev/null +++ b/tool/asm_parse.rb @@ -0,0 +1,51 @@ +stat = {} + +while line = ARGF.gets + if /\[start\] (\w+)/ =~ line + name = $1 + puts '--------------------------------------------------------------' + puts line + size = 0 + len = 0 + + while line = ARGF.gets + if /\[start\] (\w+)/ =~ line + puts "\t; # length: #{len}, size: #{size}" + puts "\t; # !!" + stat[name] = [len, size] + # + name = $1 + puts '--------------------------------------------------------------' + puts line + size = 0 + len = 0 + next + end + + unless /(\ALM)|(\ALB)|(\A\.)|(\A\/)/ =~ line + puts line + if /\[length = (\d+)\]/ =~ line + len += $1.to_i + size += 1 + end + end + + + if /__NEXT_INSN__/ !~ line && /\[end \] (\w+)/ =~ line + ename = $1 + if name != ename + puts "!! start with #{name}, but end with #{ename}" + end + stat[ename] = [len, size] + puts "\t; # length: #{len}, size: #{size}" + break + end + end + end +end + +stat.sort_by{|a, b| -b[0] * 1000 - a[0]}.each{|a, b| + puts "#{a}\t#{b.join("\t")}" +} +puts "total length :\t#{stat.inject(0){|r, e| r+e[1][0]}}" +puts "total size :\t#{stat.inject(0){|r, e| r+e[1][1]}}" diff --git a/tool/build-transcode b/tool/build-transcode new file mode 100755 index 0000000..fa71155 --- /dev/null +++ b/tool/build-transcode @@ -0,0 +1,16 @@ +#!/bin/sh + +[ "$1" -a -d "$1" ] && { cd "$1" || exit $?; } && shift +[ "$#" = 0 ] && set enc/trans/*.trans +for src; do + case "$src" in + *.trans) + c="`dirname $src`/`basename $src .trans`.c" + ${BASERUBY-ruby} tool/transcode-tblgen.rb -vo "$c" "$src" + ;; + *) + echo "$0: don't know how to deal with $src" + continue + ;; + esac +done diff --git a/tool/compile_prelude.rb b/tool/compile_prelude.rb new file mode 100644 index 0000000..e0a011c --- /dev/null +++ b/tool/compile_prelude.rb @@ -0,0 +1,96 @@ +# This file is interpreted by $(BASERUBY) and miniruby. +# $(BASERUBY) is used for miniprelude.c. +# miniruby is used for prelude.c. +# Since $(BASERUBY) may be older than Ruby 1.9, +# Ruby 1.9 feature should not be used. + +$:.unshift(File.expand_path("../..", __FILE__)) + +preludes = ARGV.dup +outfile = preludes.pop +init_name = outfile[/\w+(?=_prelude.c\b)/] || 'prelude' + +C_ESC = { + "\\" => "\\\\", + '"' => '\"', + "\n" => '\n', +} + +0x00.upto(0x1f) {|ch| C_ESC[[ch].pack("C")] ||= "\\%03o" % ch } +0x7f.upto(0xff) {|ch| C_ESC[[ch].pack("C")] = "\\%03o" % ch } +C_ESC_PAT = Regexp.union(*C_ESC.keys) + +def c_esc(str) + '"' + str.gsub(C_ESC_PAT) { C_ESC[$&] } + '"' +end + +mkconf = nil +setup_ruby_prefix = nil +teardown_ruby_prefix = nil +lines_list = preludes.map {|filename| + lines = [] + need_ruby_prefix = false + File.readlines(filename).each {|line| + line.gsub!(/RbConfig::CONFIG\["(\w+)"\]/) { + key = $1 + unless mkconf + require 'rbconfig' + mkconf = RbConfig::MAKEFILE_CONFIG.merge('prefix'=>'#{TMP_RUBY_PREFIX}') + setup_ruby_prefix = "TMP_RUBY_PREFIX = $:.reverse.find{|e|e!=\".\"}.sub(%r{(.*)/lib/.*}m, \"\\\\1\")\n" + teardown_ruby_prefix = 'Object.class_eval { remove_const "TMP_RUBY_PREFIX" }' + end + if RbConfig::MAKEFILE_CONFIG.has_key? key + val = RbConfig.expand("$(#{key})", mkconf) + need_ruby_prefix = true if /\A\#\{TMP_RUBY_PREFIX\}/ =~ val + c_esc(val) + else + "nil" + end + } + lines << c_esc(line) + } + setup_lines = [] + if need_ruby_prefix + setup_lines << c_esc(setup_ruby_prefix) + lines << c_esc(teardown_ruby_prefix) + end + [setup_lines, lines] +} + +require 'erb' + +tmp = ERB.new(<<'EOS', nil, '%').result(binding) +#include "ruby/ruby.h" +#include "vm_core.h" + +% preludes.zip(lines_list).each_with_index {|(prelude, (setup_lines, lines)), i| +static const char prelude_name<%=i%>[] = <%=c_esc("")%>; +static const char prelude_code<%=i%>[] = +% (setup_lines+lines).each {|line| +<%=line%> +% } +; +% } + +void +Init_<%=init_name%>(void) +{ +% lines_list.each_with_index {|(setup_lines, lines), i| + rb_iseq_eval(rb_iseq_compile( + rb_usascii_str_new(prelude_code<%=i%>, sizeof(prelude_code<%=i%>) - 1), + rb_usascii_str_new(prelude_name<%=i%>, sizeof(prelude_name<%=i%>) - 1), + INT2FIX(<%=1-setup_lines.length%>))); + +% } +#if 0 +% preludes.length.times {|i| + puts(prelude_code<%=i%>); +% } +#endif +} +EOS + +open(outfile, 'w'){|f| + f << tmp +} + diff --git a/tool/eval.rb b/tool/eval.rb new file mode 100644 index 0000000..906ba9c --- /dev/null +++ b/tool/eval.rb @@ -0,0 +1,161 @@ + +require 'rbconfig' +require 'fileutils' +require 'pp' + +Ruby = ENV['RUBY'] || + File.join(Config::CONFIG["bindir"], + Config::CONFIG["ruby_install_name"] + Config::CONFIG["EXEEXT"]) +# + +OPTIONS = %w{ + opt-direct-threaded-code + opt-basic-operations + opt-operands-unification + opt-instructions-unification + opt-inline-method-cache + opt-stack-caching +}.map{|opt| + '--disable-' + opt +} + +opts = OPTIONS.dup +Configs = OPTIONS.map{|opt| + o = opts.dup + opts.delete(opt) + o +} + [[]] + +pp Configs if $DEBUG + + +def exec_cmd(cmd) + puts cmd + unless system(cmd) + p cmd + raise "error" + end +end + +def dirname idx + "ev-#{idx}" +end + +def build + Configs.each_with_index{|config, idx| + dir = dirname(idx) + FileUtils.rm_rf(dir) if FileTest.exist?(dir) + Dir.mkdir(dir) + FileUtils.cd(dir){ + exec_cmd("#{Ruby} ../extconf.rb " + config.join(" ")) + exec_cmd("make clean test-all") + } + } +end + +def check + Configs.each_with_index{|c, idx| + puts "= #{idx}" + system("#{Ruby} -r ev-#{idx}/yarvcore -e 'puts YARVCore::OPTS'") + } +end + +def bench_each idx + puts "= #{idx}" + 5.times{|count| + print count + FileUtils.cd(dirname(idx)){ + exec_cmd("make benchmark OPT=-y ITEMS=#{ENV['ITEMS']} > ../b#{idx}-#{count}") + } + } + puts +end + +def bench + # return bench_each(6) + Configs.each_with_index{|c, idx| + bench_each idx + } +end + +def parse_result data + flag = false + stat = [] + data.each{|line| + if flag + if /(\w+)\t([\d\.]+)/ =~ line + stat << [$1, $2.to_f] + else + raise "not a data" + end + + end + if /benchmark summary/ =~ line + flag = true + end + } + stat +end + +def calc_each data + data.sort! + data.pop # remove max + data.shift # remove min + + data.inject(0.0){|res, e| + res += e + } / data.size +end + +def calc_stat stats + stat = [] + stats[0].each_with_index{|e, idx| + bm = e[0] + vals = stats.map{|st| + st[idx][1] + } + [bm, calc_each(vals)] + } +end + +def stat + total = [] + Configs.each_with_index{|c, idx| + stats = [] + 5.times{|count| + file = "b#{idx}-#{count}" + # p file + open(file){|f| + stats << parse_result(f.read) + } + } + # merge stats + total << calc_stat(stats) + total + } + # pp total + total[0].each_with_index{|e, idx| + bm = e[0] + # print "#{bm}\t" + total.each{|st| + print st[idx][1], "\t" + } + puts + } +end + +ARGV.each{|cmd| + case cmd + when 'build' + build + when 'check' + check + when 'bench' + bench + when 'stat' + stat + else + raise + end +} + diff --git a/tool/file2lastrev.rb b/tool/file2lastrev.rb new file mode 100644 index 0000000..ddc83e8 --- /dev/null +++ b/tool/file2lastrev.rb @@ -0,0 +1,90 @@ +#!/usr/bin/env ruby + +ENV.delete('PWD') + +require 'optparse' +require 'pathname' + +SRCDIR = Pathname(File.dirname($0)).parent.freeze +class VCSNotFoundError < RuntimeError; end + +def detect_vcs(path) + path = SRCDIR + return :svn, path.relative_path_from(SRCDIR) if File.directory?("#{path}/.svn") + return :git_svn, path.relative_path_from(SRCDIR) if File.directory?("#{path}/.git/svn") + return :git, path.relative_path_from(SRCDIR) if File.directory?("#{path}/.git") + raise VCSNotFoundError, "does not seem to be under a vcs" +end + +# return a pair of strings, the last revision and the last revision in which +# +path+ was modified. +def get_revisions(path) + vcs, path = detect_vcs(path) + + info = case vcs + when :svn + info_xml = `cd "#{SRCDIR}" && svn info --xml "#{path}"` + _, last, _, changed, _ = info_xml.split(/revision="(\d+)"/) + return last, changed + when :git_svn + `cd "#{SRCDIR}" && git svn info "#{path}"` + when :git + git_log = `cd "#{SRCDIR}" && git log HEAD~1..HEAD "#{path}"` + git_log =~ /git-svn-id: .*?@(\d+)/ + return $1, $1 + end + + if /^Revision: (\d+)/ =~ info + last = $1 + else + raise "last revision not found" + end + if /^Last Changed Rev: (\d+)/ =~ info + changed = $1 + else + raise "changed revision not found" + end + + return last, changed +end + +def raise_if_conflict + raise "you can specify only one of --changed, --revision.h and --doxygen" if $output +end + +parser = OptionParser.new {|opts| + opts.on("--changed", "changed rev") do + raise_if_conflict + $output = :changed + end + opts.on("--revision.h") do + raise_if_conflict + $output = :revision_h + end + opts.on("--doxygen") do + raise_if_conflict + $output = :doxygen + end + opts.on("-q", "--suppress_not_found") do + $suppress_not_found = true + end +} +parser.parse! + + +begin + last, changed = get_revisions(ARGV.shift) +rescue VCSNotFoundError + raise unless $suppress_not_found +end + +case $output +when :changed, nil + puts changed +when :revision_h + puts "#define RUBY_REVISION #{changed.to_i}" +when :doxygen + puts "r#{changed}/r#{last}" +else + raise "unknown output format `#{$output}'" +end diff --git a/tool/generic_erb.rb b/tool/generic_erb.rb new file mode 100644 index 0000000..3ddfd7f --- /dev/null +++ b/tool/generic_erb.rb @@ -0,0 +1,34 @@ +require 'erb' +require 'optparse' +require 'fileutils' + +timestamp = nil +output = nil +ifchange = nil +opt = OptionParser.new do |o| + o.on('-t', '--timestamp[=PATH]') {|v| timestamp = v || true} + o.on('-o', '--output=PATH') {|v| output = v} + o.on('-c', '--[no-]if-change') {|v| ifchange = v} + o.order!(ARGV) +end or abort opt.opt_s +template = ARGV.shift +erb = ERB.new(File.read(template), nil, '%') +erb.filename = template +result = erb.result +if output + if ifchange and (IO.read(output) rescue nil) == result + puts "#{output} unchanged" + else + open(output, "wb") {|f| f.print result} + puts "#{output} updated" + end + if timestamp + if timestamp == true + dir, base = File.split(output) + timestamp = File.join(dir, ".time." + base) + end + FileUtils.touch(timestamp) + end +else + print result +end diff --git a/tool/ifchange b/tool/ifchange new file mode 100755 index 0000000..8bfca26 --- /dev/null +++ b/tool/ifchange @@ -0,0 +1,25 @@ +#!/bin/sh +# usage: ifchange target temporary + +unset timestamp +if [ "$1" = --timestamp ]; then + timestamp=yes + shift +fi +target="$1" +temp="$2" +if [ "$temp" = - ]; then + temp="tmpdata$$.tmp~" + cat > "$temp" || exit $? + trap 'rm -f "$temp"' 0 +fi +if cmp "$target" "$temp" >/dev/null 2>&1; then + echo "$target unchanged" + rm -f "$temp" +else + echo "$target updated" + mv -f "$temp" "$target" +fi +if [ $timestamp ]; then + touch `dirname "$target"`/.time.`basename "$target"` +fi diff --git a/tool/insns2vm.rb b/tool/insns2vm.rb new file mode 100755 index 0000000..f518707 --- /dev/null +++ b/tool/insns2vm.rb @@ -0,0 +1,15 @@ +#!ruby + +require 'optparse' + +Version = %w$Revision: 11626 $[1..-1] + +require "#{File.join(File.dirname(__FILE__), 'instruction')}" + +if $0 == __FILE__ + opts = ARGV.options + maker = RubyVM::SourceCodeGenerator.def_options(opts) + files = opts.parse! + generator = maker.call + generator.generate(files) +end diff --git a/tool/instruction.rb b/tool/instruction.rb new file mode 100644 index 0000000..30377f3 --- /dev/null +++ b/tool/instruction.rb @@ -0,0 +1,1385 @@ +# +# +# + +require 'erb' + +class RubyVM + class Instruction + def initialize name, opes, pops, rets, comm, body, tvars, sp_inc, + orig = self, defopes = [], type = nil, + nsc = [], psc = [[], []] + + @name = name + @opes = opes # [[type, name], ...] + @pops = pops # [[type, name], ...] + @rets = rets # [[type, name], ...] + @comm = comm # {:c => category, :e => en desc, :j => ja desc} + @body = body # '...' + + @orig = orig + @defopes = defopes + @type = type + @tvars = tvars + + @nextsc = nsc + @pushsc = psc + @sc = [] + @unifs = [] + @optimized = [] + @is_sc = false + @sp_inc = sp_inc + end + + def add_sc sci + @sc << sci + sci.set_sc + end + + attr_reader :name, :opes, :pops, :rets + attr_reader :body, :comm + attr_reader :nextsc, :pushsc + attr_reader :orig, :defopes, :type + attr_reader :sc + attr_reader :unifs, :optimized + attr_reader :is_sc + attr_reader :tvars + attr_reader :sp_inc + + def set_sc + @is_sc = true + end + + def add_unif insns + @unifs << insns + end + + def add_optimized insn + @optimized << insn + end + + def sp_increase_c_expr + if(pops.any?{|t, v| v == '...'} || + rets.any?{|t, v| v == '...'}) + # user definision + raise "no sp increase definition" if @sp_inc.nil? + ret = "int inc = 0;\n" + + @opes.each_with_index{|(t, v), i| + if t == 'rb_num_t' && ((re = /\b#{v}\b/n) =~ @sp_inc || + @defopes.any?{|t, val| re =~ val}) + ret << " #{t} #{v} = FIX2INT(opes[#{i}]);\n" + end + } + @defopes.each_with_index{|((t, var), val), i| + if t == 'rb_num_t' && val != '*' && /\b#{var}\b/ =~ @sp_inc + ret << " #{t} #{var} = #{val};\n" + end + } + + ret << " #{@sp_inc};\n" + ret << " return depth + inc;" + ret + else + "return depth + #{rets.size - pops.size};" + end + end + + def inspect + "#" + end + end + + class InstructionsLoader + def initialize opts = {} + @insns = [] + @insn_map = {} + + @vpath = opts[:VPATH] || File + @use_const = opts[:use_const] + @verbose = opts[:verbose] + @destdir = opts[:destdir] + + (@vm_opts = load_vm_opts).each {|k, v| + @vm_opts[k] = opts[k] if opts.key?(k) + } + + load_insns_def opts[:"insns.def"] || 'insns.def' + + load_opt_operand_def opts[:"opope.def"] || 'defs/opt_operand.def' + load_insn_unification_def opts[:"unif.def"] || 'defs/opt_insn_unif.def' + make_stackcaching_insns if vm_opt?('STACK_CACHING') + end + + attr_reader :vpath + attr_reader :destdir + + %w[use_const verbose].each do |attr| + attr_reader attr + alias_method "#{attr}?", attr + remove_method attr + end + + def [](s) + @insn_map[s.to_s] + end + + def each + @insns.each{|insn| + yield insn + } + end + + def size + @insns.size + end + + ### + private + + def vm_opt? name + @vm_opts[name] + end + + def load_vm_opts file = nil + file ||= 'vm_opts.h' + opts = {} + vpath.open(file) do |f| + f.grep(/^\#define\s+OPT_([A-Z_]+)\s+(\d+)/) do + opts[$1] = !$2.to_i.zero? + end + end + opts + end + + SKIP_COMMENT_PATTERN = Regexp.compile(Regexp.escape('/** ##skip')) + + include Enumerable + + def add_insn insn + @insns << insn + @insn_map[insn.name] = insn + end + + def make_insn name, opes, pops, rets, comm, body, sp_inc + add_insn Instruction.new(name, opes, pops, rets, comm, body, [], sp_inc) + end + + # str -> [[type, var], ...] + def parse_vars line + raise unless /\((.*?)\)/ =~ line + vars = $1.split(',') + vars.map!{|v| + if /\s*(\S+)\s+(\S+)\s*/ =~ v + type = $1 + var = $2 + elsif /\s*\.\.\.\s*/ =~ v + type = var = '...' + else + raise + end + [type, var] + } + vars + end + + def parse_comment comm + c = 'others' + j = '' + e = '' + comm.each_line{|line| + case line + when /@c (.+)/ + c = $1 + when /@e (.+)/ + e = $1 + when /@e\s*$/ + e = '' + when /@j (.+)$/ + j = $1 + when /@j\s*$/ + j = '' + end + } + { :c => c, + :e => e, + :j => j, + } + end + + def load_insns_def file + body = insn = opes = pops = rets = nil + comment = '' + + vpath.open(file) {|f| + f.instance_variable_set(:@line_no, 0) + class << f + def line_no + @line_no + end + def gets + @line_no += 1 + super + end + end + + while line = f.gets + line.chomp! + case line + + when SKIP_COMMENT_PATTERN + while line = f.gets.chomp + if /\s+\*\/$/ =~ line + break + end + end + + # collect instruction comment + when /^\/\*\*$/ + while line = f.gets + if /\s+\*\/\s*$/ =~ line + break + else + comment << line + end + end + + # start instruction body + when /^DEFINE_INSN$/ + insn = f.gets.chomp + opes = parse_vars(f.gets.chomp) + pops = parse_vars(f.gets.chomp).reverse + rets_str = f.gets.chomp + rets = parse_vars(rets_str).reverse + comment = parse_comment(comment) + insn_in = true + body = '' + + sp_inc = rets_str[%r"//\s*(.+)", 1] + + raise unless /^\{$/ =~ f.gets.chomp + line_no = f.line_no + + # end instruction body + when /^\}/ + if insn_in + body.instance_variable_set(:@line_no, line_no) + body.instance_variable_set(:@file, f.path) + insn = make_insn(insn, opes, pops, rets, comment, body, sp_inc) + insn_in = false + comment = '' + end + + else + if insn_in + body << line + "\n" + end + end + end + } + end + + ## opt op + def load_opt_operand_def file + vpath.foreach(file) {|line| + line = line.gsub(/\#.*/, '').strip + next if line.length == 0 + break if /__END__/ =~ line + /(\S+)\s+(.+)/ =~ line + insn = $1 + opts = $2 + add_opt_operand insn, opts.split(/,/).map{|e| e.strip} + } if file + end + + def label_escape label + label.gsub(/\(/, '_O_'). + gsub(/\)/, '_C_'). + gsub(/\*/, '_WC_') + end + + def add_opt_operand insn_name, opts + insn = @insn_map[insn_name] + opes = insn.opes + + if opes.size != opts.size + raise "operand size mismatcvh for #{insn.name} (opes: #{opes.size}, opts: #{opts.size})" + end + + ninsn = insn.name + '_OP_' + opts.map{|e| label_escape(e)}.join('_') + nopes = [] + defv = [] + + opts.each_with_index{|e, i| + if e == '*' + nopes << opes[i] + end + defv << [opes[i], e] + } + + make_insn_operand_optimized(insn, ninsn, nopes, defv) + end + + def make_insn_operand_optimized orig_insn, name, opes, defopes + comm = orig_insn.comm.dup + comm[:c] = 'optimize' + add_insn insn = Instruction.new( + name, opes, orig_insn.pops, orig_insn.rets, comm, + orig_insn.body, orig_insn.tvars, orig_insn.sp_inc, + orig_insn, defopes) + orig_insn.add_optimized insn + end + + ## insn unif + def load_insn_unification_def file + vpath.foreach(file) {|line| + line = line.gsub(/\#.*/, '').strip + next if line.length == 0 + break if /__END__/ =~ line + make_unified_insns line.split.map{|e| + raise "unknown insn: #{e}" unless @insn_map[e] + @insn_map[e] + } + } if file + end + + def all_combination sets + ret = sets.shift.map{|e| [e]} + + sets.each{|set| + prev = ret + ret = [] + prev.each{|ary| + set.each{|e| + eary = ary.dup + eary << e + ret << eary + } + } + } + ret + end + + def make_unified_insns insns + if vm_opt?('UNIFY_ALL_COMBINATION') + insn_sets = insns.map{|insn| + [insn] + insn.optimized + } + + all_combination(insn_sets).each{|insns_set| + make_unified_insn_each insns_set + } + else + make_unified_insn_each insns + end + end + + def mk_private_val vals, i, redef + vals.dup.map{|v| + # v[0] : type + # v[1] : var name + + v = v.dup + if v[0] != '...' + redef[v[1]] = v[0] + v[1] = "#{v[1]}_#{i}" + end + v + } + end + + def mk_private_val2 vals, i, redef + vals.dup.map{|v| + # v[0][0] : type + # v[0][1] : var name + # v[1] : default val + + pv = v.dup + v = pv[0] = pv[0].dup + if v[0] != '...' + redef[v[1]] = v[0] + v[1] = "#{v[1]}_#{i}" + end + pv + } + end + + def make_unified_insn_each insns + names = [] + opes = [] + pops = [] + rets = [] + comm = { + :c => 'optimize', + :e => 'unified insn', + :j => 'unified insn', + } + body = '' + passed = [] + tvars = [] + defopes = [] + sp_inc = '' + + insns.each_with_index{|insn, i| + names << insn.name + redef_vars = {} + + e_opes = mk_private_val(insn.opes, i, redef_vars) + e_pops = mk_private_val(insn.pops, i, redef_vars) + e_rets = mk_private_val(insn.rets, i, redef_vars) + # ToDo: fix it + e_defs = mk_private_val2(insn.defopes, i, redef_vars) + + passed_vars = [] + while pvar = e_pops.pop + rvar = rets.pop + if rvar + raise "unsupported unif insn: #{insns.inspect}" if rvar[0] == '...' + passed_vars << [pvar, rvar] + tvars << rvar + else + e_pops.push pvar + break + end + end + + opes.concat e_opes + pops.concat e_pops + rets.concat e_rets + defopes.concat e_defs + sp_inc += "#{insn.sp_inc}" + + body += "{ /* unif: #{i} */\n" + + passed_vars.map{|rpvars| + pv = rpvars[0] + rv = rpvars[1] + "#define #{pv[1]} #{rv[1]}" + }.join("\n") + + "\n" + + redef_vars.map{|v, type| + "#define #{v} #{v}_#{i}" + }.join("\n") + "\n" + + insn.body + + passed_vars.map{|rpvars| + "#undef #{rpvars[0][1]}" + }.join("\n") + + "\n" + + redef_vars.keys.map{|v| + "#undef #{v}" + }.join("\n") + + "\n}\n" + } + + tvars_ary = [] + tvars.each{|tvar| + unless opes.any?{|var| + var[1] == tvar[1] + } || defopes.any?{|pvar| + pvar[0][1] == tvar[1] + } + tvars_ary << tvar + end + } + add_insn insn = Instruction.new("UNIFIED_" + names.join('_'), + opes, pops, rets.reverse, comm, body, + tvars_ary, sp_inc) + insn.defopes.replace defopes + insns[0].add_unif [insn, insns] + end + + ## sc + SPECIAL_INSN_FOR_SC_AFTER = { + /\Asend/ => [:a], + /\Aend/ => [:a], + /\Ayield/ => [:a], + /\Aclassdef/ => [:a], + /\Amoduledef/ => [:a], + } + FROM_SC = [[], [:a], [:b], [:a, :b], [:b, :a]] + + def make_stackcaching_insns + pops = rets = nil + + @insns.dup.each{|insn| + opops = insn.pops + orets = insn.rets + oopes = insn.opes + ocomm = insn.comm + + after = nil + SPECIAL_INSN_FOR_SC_AFTER.any?{|k, v| + if k =~ insn.name + after = v + break + end + } + + insns = [] + FROM_SC.each{|from| + name, pops, rets, pushs1, pushs2, nextsc = + *calc_stack(insn, from, after, opops, orets) + + make_insn_sc(insn, name, oopes, pops, rets, [pushs1, pushs2], nextsc) + } + } + end + + def make_insn_sc orig_insn, name, opes, pops, rets, pushs, nextsc + comm = orig_insn.comm.dup + comm[:c] = 'optimize(sc)' + + scinsn = Instruction.new( + name, opes, pops, rets, comm, + orig_insn.body, orig_insn.tvars, orig_insn.sp_inc, + orig_insn, orig_insn.defopes, :sc, nextsc, pushs) + + add_insn scinsn + orig_insn.add_sc scinsn + end + + def self.complement_name st + "#{st[0] ? st[0] : 'x'}#{st[1] ? st[1] : 'x'}" + end + + def add_stack_value st + len = st.length + if len == 0 + st[0] = :a + [nil, :a] + elsif len == 1 + if st[0] == :a + st[1] = :b + else + st[1] = :a + end + [nil, st[1]] + else + st[0], st[1] = st[1], st[0] + [st[1], st[1]] + end + end + + def calc_stack insn, ofrom, oafter, opops, orets + from = ofrom.dup + pops = opops.dup + rets = orets.dup + rest_scr = ofrom.dup + + pushs_before = [] + pushs= [] + + pops.each_with_index{|e, i| + if e[0] == '...' + pushs_before = from + from = [] + end + r = from.pop + break unless r + pops[i] = pops[i].dup << r + } + + if oafter + from = oafter + from.each_with_index{|r, i| + rets[i] = rets[i].dup << r if rets[i] + } + else + rets = rets.reverse + rets.each_with_index{|e, i| + break if e[0] == '...' + pushed, r = add_stack_value from + rets[i] = rets[i].dup << r + if pushed + if rest_scr.pop + pushs << pushed + end + + if i - 2 >= 0 + rets[i-2].pop + end + end + } + end + + if false #|| insn.name =~ /test3/ + p ofrom + p pops + p rets + p pushs_before + p pushs + p from + exit + end + + ret = ["#{insn.name}_SC_#{InstructionsLoader.complement_name(ofrom)}_#{complement_name(from)}", + pops, rets, pushs_before, pushs, from] + end + end + + class SourceCodeGenerator + def initialize insns + @insns = insns + end + + attr_reader :insns + + def generate + raise "should not reach here" + end + + def vpath + @insns.vpath + end + + def verbose? + @insns.verbose? + end + + def use_const? + @insns.use_const? + end + + def build_string + @lines = [] + yield + @lines.join("\n") + end + + EMPTY_STRING = ''.freeze + + def commit str = EMPTY_STRING + @lines << str + end + + def comment str + @lines << str if verbose? + end + + def output_path(fn) + d = @insns.destdir + fn = File.join(d, fn) if d + fn + end + end + + ################################################################### + # vm.inc + class VmBodyGenerator < SourceCodeGenerator + # vm.inc + def generate + vm_body = '' + @insns.each{|insn| + vm_body << "\n" + vm_body << make_insn_def(insn) + } + src = vpath.read('template/vm.inc.tmpl') + ERB.new(src).result(binding) + end + + def generate_from_insnname insnname + make_insn_def @insns[insnname.to_s] + end + + ####### + private + + def make_header_prepare_stack insn + comment " /* prepare stack status */" + + push_ba = insn.pushsc + raise "unsupport" if push_ba[0].size > 0 && push_ba[1].size > 0 + + push_ba.each{|pushs| + pushs.each{|r| + commit " PUSH(SCREG(#{r}));" + } + } + end + + def make_header_operands insn + comment " /* declare and get from iseq */" + + vars = insn.opes + n = 0 + ops = [] + + vars.each_with_index{|(type, var), i| + if type == '...' + break + end + + re = /\b#{var}\b/n + if re =~ insn.body or re =~ insn.sp_inc or insn.rets.any?{|t, v| re =~ v} + ops << " #{type} #{var} = (#{type})GET_OPERAND(#{i+1});" + end + n += 1 + } + @opn = n + + # reverse or not? + # ops.join + commit ops.reverse + end + + def make_header_default_operands insn + vars = insn.defopes + + vars.each{|e| + next if e[1] == '*' + if use_const? + commit " const #{e[0][0]} #{e[0][1]} = #{e[1]};" + else + commit " #define #{e[0][1]} #{e[1]}" + end + } + end + + def make_footer_default_operands insn + comment " /* declare and initialize default opes */" + if use_const? + commit + else + vars = insn.defopes + + vars.each{|e| + next if e[1] == '*' + commit "#undef #{e[0][1]}" + } + end + end + + def make_header_stack_pops insn + comment " /* declare and pop from stack */" + + n = 0 + pops = [] + vars = insn.pops + vars.each_with_index{|iter, i| + type, var, r = *iter + if type == '...' + break + end + if r + pops << " #{type} #{var} = SCREG(#{r});" + else + pops << " #{type} #{var} = TOPN(#{n});" + n += 1 + end + } + @popn = n + + # reverse or not? + commit pops.reverse + end + + def make_header_temporary_vars insn + comment " /* declare temporary vars */" + + insn.tvars.each{|var| + commit " #{var[0]} #{var[1]};" + } + end + + def make_header_stack_val insn + comment "/* declare stack push val */" + + vars = insn.opes + insn.pops + insn.defopes.map{|e| e[0]} + + insn.rets.each{|var| + if vars.all?{|e| e[1] != var[1]} && var[1] != '...' + commit " #{var[0]} #{var[1]};" + end + } + end + + def make_header_analysis insn + commit " USAGE_ANALYSIS_INSN(BIN(#{insn.name}));" + insn.opes.each_with_index{|op, i| + commit " USAGE_ANALYSIS_OPERAND(BIN(#{insn.name}), #{i}, #{op[1]});" + } + end + + def make_header_pc insn + commit " ADD_PC(1+#{@opn});" + commit " PREFETCH(GET_PC());" + end + + def make_header_popn insn + comment " /* management */" + commit " POPN(#{@popn});" if @popn > 0 + end + + def make_hader_debug insn + comment " /* for debug */" + commit " DEBUG_ENTER_INSN(\"#{insn.name}\");" + end + + def make_header_defines insn + commit " #define CURRENT_INSN_#{insn.name} 1" + commit " #define INSN_IS_SC() #{insn.sc ? 0 : 1}" + commit " #define INSN_LABEL(lab) LABEL_#{insn.name}_##lab" + commit " #define LABEL_IS_SC(lab) LABEL_##lab##_###{insn.sc.size == 0 ? 't' : 'f'}" + end + + def make_footer_stack_val insn + comment " /* push stack val */" + + insn.rets.reverse_each{|v| + if v[1] == '...' + break + end + if v[2] + commit " SCREG(#{v[2]}) = #{v[1]};" + else + commit " PUSH(#{v[1]});" + end + } + end + + def make_footer_undefs insn + commit "#undef CURRENT_INSN_#{insn.name}" + commit "#undef INSN_IS_SC" + commit "#undef INSN_LABEL" + commit "#undef LABEL_IS_SC" + end + + def make_header insn + commit "INSN_ENTRY(#{insn.name}){" + make_header_prepare_stack insn + commit "{" + make_header_stack_val insn + make_header_default_operands insn + make_header_operands insn + make_header_stack_pops insn + make_header_temporary_vars insn + # + make_hader_debug insn + make_header_pc insn + make_header_popn insn + make_header_defines insn + make_header_analysis insn + commit "{" + end + + def make_footer insn + make_footer_stack_val insn + make_footer_default_operands insn + make_footer_undefs insn + commit " END_INSN(#{insn.name});}}}" + end + + def make_insn_def insn + build_string do + make_header insn + if line = insn.body.instance_variable_get(:@line_no) + file = insn.body.instance_variable_get(:@file) + commit "#line #{line+1} \"#{file}\"" + commit insn.body + commit '#line __CURRENT_LINE__ "__CURRENT_FILE__"' + else + insn.body + end + make_footer(insn) + end + end + end + + ################################################################### + # vmtc.inc + class VmTCIncGenerator < SourceCodeGenerator + def generate + + insns_table = build_string do + @insns.each{|insn| + commit " LABEL_PTR(#{insn.name})," + } + end + + insn_end_table = build_string do + @insns.each{|insn| + commit " ELABEL_PTR(#{insn.name}),\n" + } + end + + ERB.new(vpath.read('template/vmtc.inc.tmpl')).result(binding) + end + end + + ################################################################### + # insns_info.inc + class InsnsInfoIncGenerator < SourceCodeGenerator + def generate + insns_info_inc + end + + ### + private + + def op2typesig op + case op + when /^OFFSET/ + "TS_OFFSET" + when /^rb_num_t/ + "TS_NUM" + when /^lindex_t/ + "TS_LINDEX" + when /^dindex_t/ + "TS_DINDEX" + when /^VALUE/ + "TS_VALUE" + when /^ID/ + "TS_ID" + when /GENTRY/ + "TS_GENTRY" + when /^IC/ + "TS_IC" + when /^\.\.\./ + "TS_VARIABLE" + when /^CDHASH/ + "TS_CDHASH" + when /^ISEQ/ + "TS_ISEQ" + when /rb_insn_func_t/ + "TS_FUNCPTR" + else + raise "unknown op type: #{op}" + end + end + + TYPE_CHARS = { + 'TS_OFFSET' => 'O', + 'TS_NUM' => 'N', + 'TS_LINDEX' => 'L', + 'TS_DINDEX' => 'D', + 'TS_VALUE' => 'V', + 'TS_ID' => 'I', + 'TS_GENTRY' => 'G', + 'TS_IC' => 'C', + 'TS_CDHASH' => 'H', + 'TS_ISEQ' => 'S', + 'TS_VARIABLE' => '.', + 'TS_FUNCPTR' => 'F', + } + + # insns_info.inc + def insns_info_inc + # insn_type_chars + insn_type_chars = TYPE_CHARS.map{|t, c| + "#define #{t} '#{c}'" + }.join("\n") + + # insn_names + insn_names = '' + @insns.each{|insn| + insn_names << " \"#{insn.name}\",\n" + } + + # operands info + operands_info = '' + operands_num_info = '' + @insns.each{|insn| + opes = insn.opes + operands_info << ' ' + ot = opes.map{|type, var| + TYPE_CHARS.fetch(op2typesig(type)) + } + operands_info << "\"#{ot.join}\"" << ", \n" + + num = opes.size + 1 + operands_num_info << " #{num},\n" + } + + # stack num + stack_num_info = '' + @insns.each{|insn| + num = insn.rets.size + stack_num_info << " #{num},\n" + } + + # stack increase + stack_increase = '' + @insns.each{|insn| + stack_increase << <<-EOS + case BIN(#{insn.name}):{ + #{insn.sp_increase_c_expr} + } + EOS + } + ERB.new(vpath.read('template/insns_info.inc.tmpl')).result(binding) + end + end + + ################################################################### + # insns.inc + class InsnsIncGenerator < SourceCodeGenerator + def generate + i=0 + insns = build_string do + @insns.each{|insn| + commit " %-30s = %d,\n" % ["BIN(#{insn.name})", i] + i+=1 + } + end + + ERB.new(vpath.read('template/insns.inc.tmpl')).result(binding) + end + end + + ################################################################### + # minsns.inc + class MInsnsIncGenerator < SourceCodeGenerator + def generate + i=0 + defs = build_string do + @insns.each{|insn| + commit " rb_define_const(mYarvInsns, %-30s, INT2FIX(%d));\n" % + ["\"I#{insn.name}\"", i] + i+=1 + } + end + ERB.new(vpath.read('template/minsns.inc.tmpl')).result(binding) + end + end + + ################################################################### + # optinsn.inc + class OptInsnIncGenerator < SourceCodeGenerator + def generate + optinsn_inc + end + + ### + private + + def val_as_type op + type = op[0][0] + val = op[1] + + case type + when /^long/, /^rb_num_t/, /^lindex_t/, /^dindex_t/ + "INT2FIX(#{val})" + when /^VALUE/ + val + when /^ID/ + "INT2FIX(#{val})" + when /^ISEQ/, /^rb_insn_func_t/ + val + when /GENTRY/ + raise + when /^\.\.\./ + raise + else + raise "type: #{type}" + end + end + + # optinsn.inc + def optinsn_inc + rule = '' + opt_insns_map = Hash.new{|h, k| h[k] = []} + + @insns.each{|insn| + next if insn.defopes.size == 0 + next if insn.type == :sc + next if /^UNIFIED/ =~ insn.name.to_s + + originsn = insn.orig + opt_insns_map[originsn] << insn + } + + rule = build_string do + opt_insns_map.each{|originsn, optinsns| + commit "case BIN(#{originsn.name}):" + + optinsns.sort_by{|opti| + opti.defopes.find_all{|e| e[1] == '*'}.size + }.each{|opti| + commit " if(" + i = 0 + commit " " + opti.defopes.map{|opinfo| + i += 1 + next if opinfo[1] == '*' + "insnobj->operands[#{i-1}] == #{val_as_type(opinfo)}" + }.compact.join('&& ') + commit " ){" + idx = 0 + n = 0 + opti.defopes.each{|opinfo| + if opinfo[1] == '*' + if idx != n + commit " insnobj->operands[#{idx}] = insnobj->operands[#{n}];" + end + idx += 1 + else + # skip + end + n += 1 + } + commit " insnobj->insn_id = BIN(#{opti.name});" + commit " insnobj->operand_size = #{idx};" + commit " break;\n }\n" + } + commit " break;"; + } + end + + ERB.new(vpath.read('template/optinsn.inc.tmpl')).result(binding) + end + end + + ################################################################### + # optunifs.inc + class OptUnifsIncGenerator < SourceCodeGenerator + def generate + unif_insns_each = '' + unif_insns = '' + unif_insns_data = [] + + insns = @insns.find_all{|insn| !insn.is_sc} + insns.each{|insn| + size = insn.unifs.size + if size > 0 + insn.unifs.sort_by{|unif| -unif[1].size}.each_with_index{|unif, i| + + uni_insn, uni_insns = *unif + uni_insns = uni_insns[1..-1] + unif_insns_each << "static const int UNIFIED_#{insn.name}_#{i}[] = {" + + " BIN(#{uni_insn.name}), #{uni_insns.size + 2}, \n " + + uni_insns.map{|e| "BIN(#{e.name})"}.join(", ") + "};\n" + } + else + + end + if size > 0 + unif_insns << "static const int *const UNIFIED_#{insn.name}[] = {(int *)#{size+1}, \n" + unif_insns << (0...size).map{|e| " UNIFIED_#{insn.name}_#{e}"}.join(",\n") + "};\n" + unif_insns_data << " UNIFIED_#{insn.name}" + else + unif_insns_data << " 0" + end + } + unif_insns_data = "static const int *const *const unified_insns_data[] = {\n" + + unif_insns_data.join(",\n") + "};\n" + ERB.new(vpath.read('template/optunifs.inc.tmpl')).result(binding) + end + end + + ################################################################### + # opt_sc.inc + class OptSCIncGenerator < SourceCodeGenerator + def generate + sc_insn_info = [] + @insns.each{|insn| + insns = insn.sc + if insns.size > 0 + insns = ['SC_ERROR'] + insns.map{|e| " BIN(#{e.name})"} + else + insns = Array.new(6){'SC_ERROR'} + end + sc_insn_info << " {\n#{insns.join(",\n")}}" + } + sc_insn_info = sc_insn_info.join(",\n") + + sc_insn_next = @insns.map{|insn| + " SCS_#{InstructionsLoader.complement_name(insn.nextsc).upcase}" + + (verbose? ? " /* #{insn.name} */" : '') + }.join(",\n") + ERB.new(vpath.read('template/opt_sc.inc.tmpl')).result(binding) + end + end + + ################################################################### + # yasmdata.rb + class YASMDataRbGenerator < SourceCodeGenerator + def generate + insn_id2no = '' + @insns.each_with_index{|insn, i| + insn_id2no << " :#{insn.name} => #{i},\n" + } + ERB.new(vpath.read('template/yasmdata.rb.tmpl')).result(binding) + end + end + + ################################################################### + # yarvarch.* + class YARVDocGenerator < SourceCodeGenerator + def generate + + end + + def desc lang + d = '' + i = 0 + cat = nil + @insns.each{|insn| + seq = insn.opes.map{|t,v| v}.join(' ') + before = insn.pops.reverse.map{|t,v| v}.join(' ') + after = insn.rets.reverse.map{|t,v| v}.join(' ') + + if cat != insn.comm[:c] + d << "** #{insn.comm[:c]}\n\n" + cat = insn.comm[:c] + end + + d << "*** #{insn.name}\n" + d << "\n" + d << insn.comm[lang] + "\n\n" + d << ":instruction sequence: 0x%02x #{seq}\n" % i + d << ":stack: #{before} => #{after}\n\n" + i+=1 + } + d + end + + def desc_ja + d = desc :j + ERB.new(vpath.read('template/yarvarch.ja')).result(binding) + end + + def desc_en + d = desc :e + ERB.new(vpath.read('template/yarvarch.en')).result(binding) + end + end + + module VPATH + def search(meth, base, *rest) + begin + meth.call(base, *rest) + rescue Errno::ENOENT => error + each do |dir| + return meth.call(File.join(dir, base), *rest) rescue nil + end + raise error + end + end + + def process(*args, &block) + search(File.method(__callee__), *args, &block) + end + + alias stat process + alias lstat process + + def open(*args) + f = search(File.method(:open), *args) + if block_given? + begin + yield f + ensure + f.close unless f.closed? + end + else + f + end + end + + def read(*args) + open(*args) {|f| f.read} + end + + def foreach(file, *args, &block) + open(file) {|f| f.each(*args, &block)} + end + + def self.def_options(opt) + vpath = [] + path_sep = ':' + + opt.on("-I", "--srcdir=DIR", "add a directory to search path") {|dir| + vpath |= [dir] + } + opt.on("-L", "--vpath=PATH LIST", "add directories to search path") {|dirs| + vpath |= dirs.split(path_sep) + } + opt.on("--path-separator=SEP", /\A\W\z/, "separator for vpath") {|sep| + path_sep = sep + } + + proc { + vpath.extend(self) unless vpath.empty? + } + end + end + + class SourceCodeGenerator + Files = { # codes + 'vm.inc' => VmBodyGenerator, + 'vmtc.inc' => VmTCIncGenerator, + 'insns.inc' => InsnsIncGenerator, + 'insns_info.inc' => InsnsInfoIncGenerator, + # 'minsns.inc' => MInsnsIncGenerator, + 'optinsn.inc' => OptInsnIncGenerator, + 'optunifs.inc' => OptUnifsIncGenerator, + 'opt_sc.inc' => OptSCIncGenerator, + 'yasmdata.rb' => YASMDataRbGenerator, + } + + def generate args = [] + args = Files.keys if args.empty? + args.each{|fn| + s = Files[fn].new(@insns).generate + open(output_path(fn), 'w') {|f| f.puts(s)} + } + end + + def self.def_options(opt) + opts = { + :"insns.def" => 'insns.def', + :"opope.def" => 'defs/opt_operand.def', + :"unif.def" => 'defs/opt_insn_unif.def', + } + + opt.on("-Dname", /\AOPT_(\w+)\z/, "enable VM option") {|s, v| + opts[v] = true + } + opt.on("--enable=name[,name...]", Array, + "enable VM options (without OPT_ prefix)") {|*a| + a.each {|v| opts[v] = true} + } + opt.on("-Uname", /\AOPT_(\w+)\z/, "disable VM option") {|s, v| + opts[v] = false + } + opt.on("--disable=name[,name...]", Array, + "disable VM options (without OPT_ prefix)") {|*a| + a.each {|v| opts[v] = false} + } + opt.on("-i", "--insnsdef=FILE", "--instructions-def", + "instructions definition file") {|n| + opts[:insns_def] = n + } + opt.on("-o", "--opt-operanddef=FILE", "--opt-operand-def", + "vm option: operand definition file") {|n| + opts[:opope_def] = n + } + opt.on("-u", "--opt-insnunifdef=FILE", "--opt-insn-unif-def", + "vm option: instruction unification file") {|n| + opts[:unif_def] = n + } + opt.on("-C", "--[no-]use-const", + "use consts for default operands instead of macros") {|v| + opts[:use_const] = v + } + opt.on("-d", "--destdir", "--output-directory=DIR", + "make output file underneath DIR") {|v| + opts[:destdir] = v + } + opt.on("-V", "--[no-]verbose") {|v| + opts[:verbose] = v + } + + vpath = VPATH.def_options(opt) + + proc { + opts[:VPATH] = vpath.call + build opts + } + end + + def self.build opts, vpath = ['./'] + opts[:VPATH] = vpath.extend(VPATH) unless opts[:VPATH] + self.new InstructionsLoader.new(opts) + end + end +end + diff --git a/tool/make-snapshot b/tool/make-snapshot new file mode 100755 index 0000000..45b6c51 --- /dev/null +++ b/tool/make-snapshot @@ -0,0 +1,218 @@ +#!/usr/bin/ruby -s +# -*- coding: us-ascii -*- +require 'uri' +require 'digest/md5' +require 'digest/sha2' +require 'fileutils' +require 'tmpdir' +STDOUT.sync = true + +$exported = nil if $exported == "" +$archname = nil if $archname == "" + +ENV["LC_ALL"] = ENV["LANG"] = "C" +SVNURL = URI.parse("http://svn.ruby-lang.org/repos/ruby/") +RUBY_VERSION_PATTERN = /^\#define\s+RUBY_VERSION\s+"([\d.]+)"/ + +ENV["VPATH"] ||= "include/ruby" +YACC = ENV["YACC"] ||= "bison" +ENV["BASERUBY"] ||= "ruby" +ENV["RUBY"] ||= "ruby" +ENV["MV"] ||= "mv" +ENV["RM"] ||= "rm -f" +ENV["MINIRUBY"] ||= "ruby" +ENV["PROGRAM"] ||= "ruby" + +class String + # for older ruby + alias bytesize size unless method_defined?(:bytesize) +end + +$patch_file &&= File.expand_path($patch_file) +path = ENV["PATH"].split(File::PATH_SEPARATOR) +%w[YACC BASERUBY RUBY MV MINIRUBY].each do |var| + cmd = ENV[var] + unless path.any? {|dir| + file = File.expand_path(cmd, dir) + File.file?(file) and File.executable?(file) + } + abort "#{File.basename $0}: #{var} command not found - #{cmd}" + end +end + +unless destdir = ARGV.shift + abort "usage: #{File.basename $0} new-directory-to-save [version ...]" +end +revisions = ARGV.empty? ? ["trunk"] : ARGV +unless tmp = $exported + FileUtils.mkpath(destdir) + destdir = File.expand_path(destdir) + tmp = Dir.mktmpdir("ruby-snapshot") + FileUtils.mkpath(tmp) + at_exit { + Dir.chdir "/" + FileUtils.rm_rf(tmp) + } unless $keep_temp +end +Dir.chdir tmp + +def package(rev, destdir) + patchlevel = false + case rev + when /\Atrunk\z/, /\Abranches\//, /\Atags\// + url = SVNURL + rev + when /\Astable\z/ + url = SVNURL + "branches/" + url = url + `svn ls #{url}`[/.*^(ruby_\d+_\d+)\//m, 1] + when /\A(.*)\.(.*)\.(.*)-((?!preview)p)?(.*)/ + patchlevel = !!$4 + tag = "#{$4}#{$5}" + url = SVNURL + "tags/v#{$1}_#{$2}_#{$3}_#{$5}" + when /\./ + url = SVNURL + "branches/ruby_#{rev.tr('.', '_')}" + else + warn "#{$0}: unknown version - #{rev}" + return + end + revision = `svn info #{url} 2>&1`[/Last Changed Rev: (\d+)/, 1] + version = nil + unless revision + url = SVNURL + "trunk" + version = `svn cat #{url + "version.h"}`[RUBY_VERSION_PATTERN, 1] + unless rev == version + warn "#{$0}: #{rev} not found" + return + end + revision = `svn info #{url}`[/Last Changed Rev: (\d+)/, 1] + end + v = nil + if $exported + if String === $exported + v = $exported + end + else + v = "ruby" + puts "Exporting #{rev}@#{revision}" + IO.popen("svn export #{url} #{v}") do |pipe| + pipe.each {|line| /^A/ =~ line or print line} + end + unless $?.success? + warn("Export failed") + return + end + end + + if !File.directory?(v) + v = Dir.glob("ruby-*").select(&File.method(:directory?)) + v.size == 1 or abort "not exported" + v = v[0] + end + open("#{v}/revision.h", "wb") {|f| f.puts "#define RUBY_REVISION #{revision}"} + open("#{v}/.revision.time", "wb") {} + version ||= (versionhdr = IO.read("#{v}/version.h"))[RUBY_VERSION_PATTERN, 1] + version or return + if patchlevel + versionhdr ||= IO.read("#{v}/version.h") + patchlevel = versionhdr[/^\#define\s+RUBY_PATCHLEVEL\s+(\d+)/, 1] + tag = (patchlevel ? "p#{patchlevel}" : "r#{revision}") + else + tag ||= "r#{revision}" + end + unless v == $exported + n = "ruby-#{version}-#{tag}" + File.directory?(n) or File.rename v, n + v = n + end + system("patch -d #{v} -p0 -i #{$patch_file}") if $patch_file + def (clean = []).add(n) push(n); n end + Dir.chdir(v) do + File.open(clean.add("cross.rb"), "w") {|f| f.puts "CROSS_COMPILING=true"} + unless File.exist?("configure") + print "creating configure..." + unless system("autoconf") + puts " failed" + return + end + puts " done" + end + clean.add("autom4te.cache") + print "creating prerequisites..." + if File.file?("common.mk") && /^prereq/ =~ commonmk = IO.read("common.mk") + puts + extout = clean.add('tmp') + File.open(clean.add("config.status"), "w") {|f| + f.puts "s,@configure_args@,|#_!!_#|,g" + f.puts "s,@EXTOUT@,|#_!!_#|#{extout},g" + f.puts "s,@bindir@,|#_!!_#|,g" + f.puts "s,@ruby_install_name@,|#_!!_#|,g" + f.puts "s,@ARCH_FLAG@,|#_!!_#|,g" + f.puts "s,@CFLAGS@,|#_!!_#|,g" + f.puts "s,@CPPFLAGS@,|#_!!_#|,g" + f.puts "s,@LDFLAGS@,|#_!!_#|,g" + f.puts "s,@DLDFLAGS@,|#_!!_#|,g" + f.puts "s,@LIBEXT@,|#_!!_#|a,g" + f.puts "s,@OBJEXT@,|#_!!_#|o,g" + f.puts "s,@LIBRUBY@,|#_!!_#|liburyb.a,g" + f.puts "s,@LIBRUBY_A@,|#_!!_#|liburyb.a,g" + f.puts "s,@RM@,|#_!!_#|rm -f,g" + f.puts "s,@CP@,|#_!!_#|cp,g" + } + FileUtils.mkpath(hdrdir = "#{extout}/include/ruby") + File.open("#{hdrdir}/config.h", "w") {} + miniruby = ENV['MINIRUBY'] + " -rcross" + IO.popen("make -f - prereq srcdir=. CHDIR=cd IFCHANGE=tool/ifchange 'MINIRUBY=#{miniruby}' 'RUBY=#{ENV["RUBY"]}'", "w") do |f| + f.puts(IO.read("Makefile.in").gsub(/^@.*\n/, '').gsub(/@([A-Za-z_]\w*)@/) {ENV[$1]}) + f.puts(commonmk.gsub(/\{[^{}]*\}/, "")) + end + clean.push("rbconfig.rb", ".rbconfig.time", "enc.mk") + print "prerequisites" + else + system("#{YACC} -o parse.c parse.y") + end + FileUtils.rm_rf(clean) + unless $?.success? + puts " failed" + return + end + puts " done" + end + + if v == "." + v = File.basename(Dir.pwd) + Dir.chdir ".." + else + Dir.chdir(File.dirname(v)) + v = File.basename(v) + end + + return [["bzip tarball", ".tar.bz2", %w"tar cjf"], + ["gzip tarball", ".tar.gz", %w"tar czf"], + ["zip archive", ".zip", %w"zip -qr"] + ].collect do |mesg, ext, cmd| + file = "#{destdir}/#{v||$archname}#{ext}" + print "creating #{mesg}... #{file}" + if system(*(cmd + [file, v])) + puts " done" + file + else + puts " failed" + nil + end + end.compact +ensure + FileUtils.rm_rf(v) if v and !$exported and !$keep_temp +end + +revisions.collect {|rev| package(rev, destdir)}.flatten.each do |name| + name or next + str = open(name, "rb") {|f| f.read} + md5 = Digest::MD5.hexdigest str + sha = Digest::SHA256.hexdigest str + puts "* #{name}" + puts " SIZE: #{str.bytesize} bytes" + puts " MD5: #{md5}" + puts " SHA256: #{sha}" + puts +end + +# vim:fileencoding=US-ASCII sw=2 ts=4 noexpandtab ff=unix diff --git a/tool/mdoc2man.rb b/tool/mdoc2man.rb new file mode 100755 index 0000000..043f642 --- /dev/null +++ b/tool/mdoc2man.rb @@ -0,0 +1,465 @@ +#!/usr/bin/env ruby +### +### mdoc2man - mdoc to man converter +### +### Quick usage: mdoc2man.rb < mdoc_manpage.8 > man_manpage.8 +### +### Ported from Perl by Akinori MUSHA. +### +### Copyright (c) 2001 University of Illinois Board of Trustees +### Copyright (c) 2001 Mark D. Roth +### Copyright (c) 2002, 2003 Akinori MUSHA +### All rights reserved. +### +### Redistribution and use in source and binary forms, with or without +### modification, are permitted provided that the following conditions +### are met: +### 1. Redistributions of source code must retain the above copyright +### notice, this list of conditions and the following disclaimer. +### 2. Redistributions in binary form must reproduce the above copyright +### notice, this list of conditions and the following disclaimer in the +### documentation and/or other materials provided with the distribution. +### 3. All advertising materials mentioning features or use of this software +### must display the following acknowledgement: +### This product includes software developed by the University of +### Illinois at Urbana, and their contributors. +### 4. The University nor the names of their +### contributors may be used to endorse or promote products derived from +### this software without specific prior written permission. +### +### THIS SOFTWARE IS PROVIDED BY THE TRUSTEES AND CONTRIBUTORS ``AS IS'' AND +### ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +### ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES OR CONTRIBUTORS BE LIABLE +### FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +### DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +### OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +### HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +### LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +### OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +### SUCH DAMAGE. +### +### $Id: mdoc2man.rb 19939 2008-10-25 17:20:05Z yugui $ +### + +class Mdoc2Man + ANGLE = 1 + OPTION = 2 + PAREN = 3 + + RE_PUNCT = /^[!"'),\.\/:;>\?\]`]$/ + + def initialize + @name = @date = @id = nil + @refauthors = @reftitle = @refissue = @refdate = @refopt = nil + + @optlist = 0 ### 1 = bullet, 2 = enum, 3 = tag, 4 = item + @oldoptlist = 0 + @nospace = 0 ### 0, 1, 2 + @enum = 0 + @synopsis = true + @reference = false + @ext = false + @extopt = false + @literal = false + end + + def mdoc2man(i, o) + i.each { |line| + if /^\./ !~ line + o.print line + o.print ".br\n" if @literal + next + end + + line.slice!(0, 1) + + next if /\\"/ =~ line + + line = parse_macro(line) and o.print line + } + + initialize + end + + def parse_macro(line) + words = line.split + retval = '' + + quote = [] + dl = false + + while word = words.shift + case word + when RE_PUNCT + while q = quote.pop + case q + when OPTION + retval << ']' + when PAREN + retval << ')' + when ANGLE + retval << '>' + end + end + retval << word + next + when 'Li', 'Pf' + @nospace = 1 + next + when 'Xo' + @ext = true + retval << ' ' unless retval.empty? || /[\n ]\z/ =~ retval + next + when 'Xc' + @ext = false + retval << "\n" unless @extopt + break + when 'Bd' + @literal = true if words[0] == '-literal' + retval << "\n" + break + when 'Ed' + @literal = false + break + when 'Ns' + @nospace = 1 if @nospace == 0 + retval.chomp!(' ') + next + when 'No' + retval.chomp!(' ') + retval << words.shift + next + when 'Dq' + retval << '``' + begin + retval << words.shift << ' ' + end until words.empty? || RE_PUNCT =~ words[0] + retval.chomp!(' ') + retval << '\'\'' + @nospace = 1 if @nospace == 0 && RE_PUNCT =~ words[0] + next + when 'Sq', 'Ql' + retval << '`' << words.shift << '\'' + @nospace = 1 if @nospace == 0 && RE_PUNCT =~ words[0] + next + # when 'Ic' + # retval << '\\fB' << words.shift << '\\fP' + # next + when 'Oo' + #retval << "[\\c\n" + @extopt = true + @nospace = 1 if @nospace == 0 + retval << '[' + next + when 'Oc' + @extopt = false + retval << ']' + next + when 'Ao' + @nospace = 1 if @nospace == 0 + retval << '<' + next + when 'Ac' + retval << '>' + next + end + + retval << ' ' if @nospace == 0 && !(retval.empty? || /[\n ]\z/ =~ retval) + @nospace = 0 if @nospace == 1 + + case word + when 'Dd' + @date = words.join(' ') + return nil + when 'Dt' + if words.size >= 2 && words[1] == '""' && + /^(.*)\(([0-9])\)$/ =~ words[0] + words[0] = $1 + words[1] = $2 + end + @id = words.join(' ') + return nil + when 'Os' + retval << '.TH ' << @id << ' "' << @date << '" "' << + words.join(' ') << '"' + break + when 'Sh' + retval << '.SH' + @synopsis = (words[0] == 'SYNOPSIS') + next + when 'Xr' + retval << '\\fB' << words.shift << + '\\fP(' << words.shift << ')' << (words.shift||'') + break + when 'Rs' + @refauthors = [] + @reftitle = '' + @refissue = '' + @refdate = '' + @refopt = '' + @reference = true + break + when 'Re' + retval << "\n" + + # authors + while @refauthors.size > 1 + retval << @refauthors.shift << ', ' + end + retval << 'and ' unless retval.empty? + retval << @refauthors.shift + + # title + retval << ', \\fI' << @reftitle << '\\fP' + + # issue + retval << ', ' << @refissue unless @refissue.empty? + + # date + retval << ', ' << @refdate unless @refdate.empty? + + # optional info + retval << ', ' << @refopt unless @refopt.empty? + + retval << ".\n" + + @reference = false + break + when 'An' + next + when 'Dl' + retval << ".nf\n" << '\\& ' + dl = true + next + when 'Ux' + retval << "UNIX" + next + end + + if @reference + case word + when '%A' + @refauthors.unshift(words.join(' ')) + break + when '%T' + @reftitle = words.join(' ') + @reftitle.sub!(/^"/, '') + @reftitle.sub!(/"$/, '') + break + when '%N' + @refissue = words.join(' ') + break + when '%D' + @refdate = words.join(' ') + break + when '%O' + @refopt = words.join(' ') + break + end + end + + case word + when 'Nm' + name = words.empty? ? @name : words.shift + @name ||= name + retval << ".br\n" if @synopsis + retval << "\\fB" << name << "\\fP" + @nospace = 1 if @nospace == 0 && RE_PUNCT =~ words[0] + next + when 'Nd' + retval << '\\-' + next + when 'Fl' + retval << '\\fB\\-' << words.shift << '\\fP' + @nospace = 1 if @nospace == 0 && RE_PUNCT =~ words[0] + next + when 'Ar' + retval << '\\fI' + if words.empty? + retval << 'file ...\\fP' + else + retval << words.shift << '\\fP' + while words[0] == '|' + retval << ' ' << words.shift << ' \\fI' << words.shift << '\\fP' + end + @nospace = 1 if @nospace == 0 && RE_PUNCT =~ words[0] + next + end + when 'Cm' + retval << '\\fB' << words.shift << '\\fP' + while RE_PUNCT =~ words[0] + retval << words.shift + end + next + when 'Op' + quote << OPTION + @nospace = 1 if @nospace == 0 + retval << '[' + # words.push(words.pop + ']') + next + when 'Aq' + quote << ANGLE + @nospace = 1 if @nospace == 0 + retval << '<' + # words.push(words.pop + '>') + next + when 'Pp' + retval << "\n" + next + when 'Ss' + retval << '.SS' + next + end + + if word == 'Pa' && !quote.include?(OPTION) + retval << '\\fI' + retval << '\\&' if /^\./ =~ words[0] + retval << words.shift << '\\fP' + while RE_PUNCT =~ words[0] + retval << words.shift + end + # @nospace = 1 if @nospace == 0 && RE_PUNCT =~ words[0] + next + end + + case word + when 'Dv' + retval << '.BR' + next + when 'Em', 'Ev' + retval << '.IR' + next + when 'Pq' + retval << '(' + @nospace = 1 + quote << PAREN + next + when 'Sx', 'Sy' + retval << '.B ' << words.join(' ') + break + when 'Ic' + retval << '\\fB' + until words.empty? || RE_PUNCT =~ words[0] + case words[0] + when 'Op' + words.shift + retval << '[' + words.push(words.pop + ']') + next + when 'Aq' + words.shift + retval << '<' + words.push(words.pop + '>') + next + when 'Ar' + words.shift + retval << '\\fI' << words.shift << '\\fP' + else + retval << words.shift + end + + retval << ' ' if @nospace == 0 + end + + retval.chomp!(' ') + retval << '\\fP' + retval << words.shift unless words.empty? + break + when 'Bl' + @oldoptlist = @optlist + + case words[0] + when '-bullet' + @optlist = 1 + when '-enum' + @optlist = 2 + @enum = 0 + when '-tag' + @optlist = 3 + when '-item' + @optlist = 4 + end + + break + when 'El' + @optlist = @oldoptlist + next + end + + if @optlist != 0 && word == 'It' + case @optlist + when 1 + # bullets + retval << '.IP \\(bu' + when 2 + # enum + @enum += 1 + retval << '.IP ' << @enum << '.' + when 3 + # tags + retval << ".TP\n" + case words[0] + when 'Pa', 'Ev' + words.shift + retval << '.B' + end + when 4 + # item + retval << ".IP\n" + end + + next + end + + case word + when 'Sm' + case words[0] + when 'off' + @nospace = 2 + when 'on' + # retval << "\n" + @nospace = 0 + end + words.shift + next + end + + retval << word + end + + return nil if retval == '.' + + retval.sub!(/\A\.([^a-zA-Z])/, "\\1") + # retval.chomp!(' ') + + while q = quote.pop + case q + when OPTION + retval << ']' + when PAREN + retval << ')' + when ANGLE + retval << '>' + end + end + + # retval << ' ' unless @nospace == 0 || retval.empty? || /\n\z/ =~ retval + + retval << ' ' unless !@ext || @extopt || / $/ =~ retval + + retval << "\n" unless @ext || @extopt || retval.empty? || /\n\z/ =~ retval + + retval << ".fi\n" if dl + + return retval + end + + def self.mdoc2man(i, o) + new.mdoc2man(i, o) + end +end + +if $0 == __FILE__ + Mdoc2Man.mdoc2man(ARGF, STDOUT) +end diff --git a/tool/node_name.rb b/tool/node_name.rb new file mode 100755 index 0000000..5d39e9f --- /dev/null +++ b/tool/node_name.rb @@ -0,0 +1,4 @@ +#! ./miniruby -n +if ~/enum node_type \{/..~/^\};/ + ~/(NODE_.+),/ and puts(" case #{$1}:\n\treturn \"#{$1}\";") +end diff --git a/tool/parse.rb b/tool/parse.rb new file mode 100644 index 0000000..6243d7a --- /dev/null +++ b/tool/parse.rb @@ -0,0 +1,13 @@ +$file = ARGV[0] +$str = ARGF.read.sub(/^__END__.*\z/m, '') +puts '# ' + '-' * 70 +puts "# target program: " +puts '# ' + '-' * 70 +puts $str +puts '# ' + '-' * 70 + +$parsed = RubyVM::InstructionSequence.compile_file($file) +puts "# disasm result: " +puts '# ' + '-' * 70 +puts $parsed.disasm +puts '# ' + '-' * 70 diff --git a/tool/rmdirs b/tool/rmdirs new file mode 100755 index 0000000..b116898 --- /dev/null +++ b/tool/rmdirs @@ -0,0 +1,11 @@ +#!/bin/sh +for dir; do + while rmdir "$dir" >/dev/null 2>&1 && + parent=`expr "$dir" : '\(.*\)/[^/][^/]*'`; do + case "$parent" in + . | .. | "$dir") break;; + *) dir="$parent";; + esac + done +done +true diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb new file mode 100644 index 0000000..266b5a8 --- /dev/null +++ b/tool/transcode-tblgen.rb @@ -0,0 +1,849 @@ +require 'optparse' +require 'erb' +require 'fileutils' + +NUM_ELEM_BYTELOOKUP = 2 + +C_ESC = { + "\\" => "\\\\", + '"' => '\"', + "\n" => '\n', +} + +0x00.upto(0x1f) {|ch| C_ESC[[ch].pack("C")] ||= "\\%03o" % ch } +0x7f.upto(0xff) {|ch| C_ESC[[ch].pack("C")] = "\\%03o" % ch } +C_ESC_PAT = Regexp.union(*C_ESC.keys) + +def c_esc(str) + '"' + str.gsub(C_ESC_PAT) { C_ESC[$&] } + '"' +end + +class StrSet + def self.parse(pattern) + if /\A\s*(([0-9a-f][0-9a-f]|\{([0-9a-f][0-9a-f]|[0-9a-f][0-9a-f]-[0-9a-f][0-9a-f])(,([0-9a-f][0-9a-f]|[0-9a-f][0-9a-f]-[0-9a-f][0-9a-f]))*\})+(\s+|\z))*\z/i !~ pattern + raise ArgumentError, "invalid pattern: #{pattern.inspect}" + end + result = [] + pattern.scan(/\S+/) {|seq| + seq_result = [] + while !seq.empty? + if /\A([0-9a-f][0-9a-f])/i =~ seq + byte = $1.to_i(16) + seq_result << [byte..byte] + seq = $' + elsif /\A\{([^\}]+)\}/ =~ seq + set = $1 + seq = $' + set_result = [] + set.scan(/[^,]+/) {|range| + if /\A([0-9a-f][0-9a-f])-([0-9a-f][0-9a-f])\z/i =~ range + b = $1.to_i(16) + e = $2.to_i(16) + set_result << (b..e) + elsif /\A([0-9a-f][0-9a-f])\z/i =~ range + byte = $1.to_i(16) + set_result << (byte..byte) + else + raise "invalid range: #{range.inspect}" + end + } + seq_result << set_result + else + raise "invalid sequence: #{seq.inspect}" + end + end + result << seq_result + } + self.new(result) + end + + def initialize(pat) + @pat = pat + end + + def hash + return @hash if defined? @hash + @hash = @pat.hash + end + + def eql?(other) + self.class == other.class && + @pat == other.instance_eval { @pat } + end + + alias == eql? + + def to_s + if @pat.empty? + "(empset)" + else + @pat.map {|seq| + if seq.empty? + "(empstr)" + else + seq.map {|byteset| + if byteset.length == 1 && byteset[0].begin == byteset[0].end + "%02x" % byteset[0].begin + else + "{" + + byteset.map {|range| + if range.begin == range.end + "%02x" % range.begin + else + "%02x-%02x" % [range.begin, range.end] + end + }.join(',') + + "}" + end + }.join('') + end + }.join(' ') + end + end + + def inspect + "\#<#{self.class}: #{self.to_s}>" + end + + def min_length + if @pat.empty? + nil + else + @pat.map {|seq| seq.length }.min + end + end + + def max_length + if @pat.empty? + nil + else + @pat.map {|seq| seq.length }.max + end + end + + def emptyable? + @pat.any? {|seq| + seq.empty? + } + end + + def first_bytes + result = {} + @pat.each {|seq| + next if seq.empty? + seq.first.each {|range| + range.each {|byte| + result[byte] = true + } + } + } + result.keys.sort + end + + def each_firstbyte + h = {} + @pat.each {|seq| + next if seq.empty? + seq.first.each {|range| + range.each {|byte| + (h[byte] ||= []) << seq[1..-1] + } + } + } + h.keys.sort.each {|byte| + yield byte, StrSet.new(h[byte]) + } + end +end + +class ArrayCode + def initialize(type, name) + @type = type + @name = name + @len = 0; + @content = '' + end + + def length + @len + end + + def insert_at_last(num, str) + newnum = self.length + num + @content << str + @len += num + end + + def to_s + <<"End" +static const #{@type} +#{@name}[#{@len}] = { +#{@content}}; +End + end +end + +class ActionMap + def self.parse(hash) + h = {} + hash.each {|pat, action| + h[StrSet.parse(pat)] = action + } + self.new(h) + end + + def initialize(h) + @map = h + end + + def hash + return @hash if defined? @hash + hash = 0 + @map.each {|k,v| + hash ^= k.hash ^ v.hash + } + @hash = hash + end + + def eql?(other) + self.class == other.class && + @map == other.instance_eval { @map } + end + + alias == eql? + + def inspect + "\#<#{self.class}:" + + @map.map {|k, v| " [" + k.to_s + "]=>" + v.inspect }.join('') + + ">" + end + + def max_input_length + @map.keys.map {|k| k.max_length }.max + end + + def empty_action + @map.each {|ss, action| + return action if ss.emptyable? + } + nil + end + + def each_firstbyte(valid_encoding=nil) + h = {} + @map.each {|ss, action| + if ss.emptyable? + raise "emptyable pattern" + else + ss.each_firstbyte {|byte, rest| + h[byte] ||= {} + if h[byte][rest] + raise "ambiguous %s or %s (%02X/%s)" % [h[byte][rest], action, byte, rest] + end + h[byte][rest] = action + } + end + } + if valid_encoding + valid_encoding.each_firstbyte {|byte, rest| + if h[byte] + am = ActionMap.new(h[byte]) + yield byte, am, rest + else + am = ActionMap.new(rest => :undef) + yield byte, am, nil + end + } + else + h.keys.sort.each {|byte| + am = ActionMap.new(h[byte]) + yield byte, am, nil + } + end + end + + OffsetsMemo = {} + InfosMemo = {} + + def format_offsets(min, max, offsets) + offsets = offsets[min..max] + code = "%d, %d,\n" % [min, max] + 0.step(offsets.length-1,16) {|i| + code << " " + code << offsets[i,8].map {|off| "%3d," % off.to_s }.join('') + if i+8 < offsets.length + code << " " + code << offsets[i+8,8].map {|off| "%3d," % off.to_s }.join('') + end + code << "\n" + } + code + end + + UsedName = {} + + StrMemo = {} + + def str_name(bytes) + size = @bytes_code.length + rawbytes = [bytes].pack("H*") + + n = nil + if !n && !(suf = rawbytes.gsub(/[^A-Za-z0-9_]/, '')).empty? && !UsedName[nn = "str1_" + suf] then n = nn end + if !n && !UsedName[nn = "str1_" + bytes] then n = nn end + n ||= "str1s_#{size}" + + StrMemo[bytes] = n + UsedName[n] = true + n + end + + def gen_str(bytes) + if n = StrMemo[bytes] + n + else + len = bytes.length/2 + size = @bytes_code.length + n = str_name(bytes) + @bytes_code.insert_at_last(1 + len, + "\#define #{n} makeSTR1(#{size})\n" + + " makeSTR1LEN(#{len})," + bytes.gsub(/../, ' 0x\&,') + "\n\n") + n + end + end + + def generate_info(info) + case info + when :nomap + "NOMAP" + when :undef + "UNDEF" + when :invalid + "INVALID" + when :func_ii + "FUNii" + when :func_si + "FUNsi" + when :func_io + "FUNio" + when :func_so + "FUNso" + when /\A([0-9a-f][0-9a-f])\z/i + "o1(0x#$1)" + when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i + "o2(0x#$1,0x#$2)" + when /\A([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i + "o3(0x#$1,0x#$2,0x#$3)" + when /\A([0-9a-f][0-9a-f])(3[0-9])([0-9a-f][0-9a-f])(3[0-9])\z/i + "g4(0x#$1,0x#$2,0x#$3,0x#$4)" + when /\A(f[0-7])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])([0-9a-f][0-9a-f])\z/i + "o4(0x#$1,0x#$2,0x#$3,0x#$4)" + when /\A([0-9a-f][0-9a-f]){4,259}\z/i + gen_str(info.upcase) + when /\A\/\*BYTE_LOOKUP\*\// # pointer to BYTE_LOOKUP structure + $'.to_s + else + raise "unexpected action: #{info.inspect}" + end + end + + def format_infos(infos) + infos = infos.map {|info| generate_info(info) } + maxlen = infos.map {|info| info.length }.max + columns = maxlen <= 16 ? 4 : 2 + code = "" + 0.step(infos.length-1, columns) {|i| + code << " " + is = infos[i,columns] + is.each {|info| + code << sprintf(" %#{maxlen}s,", info) + } + code << "\n" + } + code + end + + def generate_lookup_node(bytes_code, words_code, name, table) + offsets = [] + infos = [] + infomap = {} + min = max = nil + table.each_with_index {|action, byte| + action ||= :invalid + if action != :invalid + min = byte if !min + max = byte + end + unless o = infomap[action] + infomap[action] = o = infos.length + infos[o] = action + end + offsets[byte] = o + } + if !min + min = max = 0 + end + + offsets_key = [min, max, offsets[min..max]] + if n = OffsetsMemo[offsets_key] + offsets_name = n + else + offsets_name = "#{name}_offsets" + OffsetsMemo[offsets_key] = offsets_name + size = bytes_code.length + bytes_code.insert_at_last(2+max-min+1, + "\#define #{offsets_name} #{size}\n" + + format_offsets(min,max,offsets) + "\n") + end + + if n = InfosMemo[infos] + infos_name = n + else + infos_name = "#{name}_infos" + InfosMemo[infos] = infos_name + + size = words_code.length + words_code.insert_at_last(infos.length, + "\#define #{infos_name} WORDINDEX2INFO(#{size})\n" + + format_infos(infos) + "\n") + end + + size = words_code.length + words_code.insert_at_last(NUM_ELEM_BYTELOOKUP, + "\#define #{name} WORDINDEX2INFO(#{size})\n" + + <<"End" + "\n") + #{offsets_name}, + #{infos_name}, +End + end + + PreMemo = {} + PostMemo = {} + NextName = "a" + + def generate_node(bytes_code, words_code, name_hint=nil, valid_encoding=nil) + if n = PreMemo[[self,valid_encoding]] + return n + end + + table = Array.new(0x100, :invalid) + each_firstbyte(valid_encoding) {|byte, rest, rest_valid_encoding| + if a = rest.empty_action + table[byte] = a + else + name_hint2 = nil + name_hint2 = "#{name_hint}_#{'%02X' % byte}" if name_hint + table[byte] = "/*BYTE_LOOKUP*/" + rest.gennode(bytes_code, words_code, name_hint2, rest_valid_encoding) + end + } + + if n = PostMemo[table] + return n + end + + if !name_hint + name_hint = "fun_" + NextName.dup + NextName.succ! + end + + PreMemo[[self,valid_encoding]] = PostMemo[table] = name_hint + + generate_lookup_node(bytes_code, words_code, name_hint, table) + name_hint + end + + def gennode(bytes_code, words_code, name_hint=nil, valid_encoding=nil) + @bytes_code = bytes_code + @words_code = words_code + name = generate_node(bytes_code, words_code, name_hint, valid_encoding) + @bytes_code = nil + @words_code = nil + return name + end +end + +def citrus_mskanji_cstomb(csid, index) + case csid + when 0 + index + when 1 + index + 0x80 + when 2, 3 + row = index >> 8 + raise "invalid byte sequence" if row < 0x21 + if csid == 3 + if row <= 0x2F + offset = (row == 0x22 || row >= 0x26) ? 0xED : 0xF0 + elsif row >= 0x4D && row <= 0x7E + offset = 0xCE + else + raise "invalid byte sequence" + end + else + raise "invalid byte sequence" if row > 0x97 + offset = (row < 0x5F) ? 0x81 : 0xC1 + end + col = index & 0xFF + raise "invalid byte sequence" if (col < 0x21 || col > 0x7E) + + row -= 0x21 + col -= 0x21 + if (row & 1) == 0 + col += 0x40 + col += 1 if (col >= 0x7F) + else + col += 0x9F; + end + row = row / 2 + offset + (row << 8) | col + end.to_s(16) +end + +def citrus_euc_cstomb(csid, index) + case csid + when 0x0000 + index + when 0x8080 + index | 0x8080 + when 0x0080 + index | 0x8E80 + when 0x8000 + index | 0x8F8080 + end.to_s(16) +end + +def citrus_cstomb(ces, csid, index) + case ces + when 'mskanji' + citrus_mskanji_cstomb(csid, index) + when 'euc' + citrus_euc_cstomb(csid, index) + end +end + +SUBDIR = %w/APPLE AST BIG5 CNS CP EBCDIC GB GEORGIAN ISO646 ISO-8859 JIS KAZAKH KOI KS MISC TCVN/ + + +def citrus_decode_mapsrc(ces, csid, mapsrcs) + table = [] + mapsrcs.split(',').each do |mapsrc| + path = [$srcdir] + mode = nil + if mapsrc.rindex('UCS', 0) + mode = :from_ucs + from = mapsrc[4..-1] + path << SUBDIR.find{|x| from.rindex(x, 0) } + else + mode = :to_ucs + path << SUBDIR.find{|x| mapsrc.rindex(x, 0) } + end + path << mapsrc.gsub(':', '@') + path = File.join(*path) + path << ".src" + path[path.rindex('/')] = '%' + STDERR.puts 'load mapsrc %s' % path if VERBOSE_MODE + open(path) do |f| + f.each_line do |l| + break if /^BEGIN_MAP/ =~ l + end + f.each_line do |l| + next if /^\s*(?:#|$)/ =~ l + break if /^END_MAP/ =~ l + case mode + when :from_ucs + case l + when /0x(\w+)\s*-\s*0x(\w+)\s*=\s*INVALID/ + # Citrus OOB_MODE + when /(0x\w+)\s*=\s*(0x\w+)/ + table.push << [$1.hex, citrus_cstomb(ces, csid, $2.hex)] + else + raise "unknown notation '%s'"% l + end + when :to_ucs + case l + when /(0x\w+)\s*=\s*(0x\w+)/ + table.push << [citrus_cstomb(ces, csid, $1.hex), $2.hex] + else + raise "unknown notation '%s'"% l + end + end + end + end + end + return table +end + +def encode_utf8(map) + r = [] + map.each {|k, v| + # integer means UTF-8 encoded sequence. + k = [k].pack("U").unpack("H*")[0].upcase if Integer === k + v = [v].pack("U").unpack("H*")[0].upcase if Integer === v + r << [k,v] + } + r +end + +def transcode_compile_tree(name, from, map) + map = encode_utf8(map) + h = {} + map.each {|k, v| + h[k] = v unless h[k] # use first mapping + } + am = ActionMap.parse(h) + + max_input = am.max_input_length + + if ValidEncoding[from] + valid_encoding = StrSet.parse(ValidEncoding[from]) + else + valid_encoding = nil + end + + defined_name = am.gennode(TRANSCODE_GENERATED_BYTES_CODE, TRANSCODE_GENERATED_WORDS_CODE, name, valid_encoding) + return defined_name, max_input +end + +TRANSCODERS = [] +TRANSCODE_GENERATED_TRANSCODER_CODE = '' + +def transcode_tbl_only(from, to, map) + if VERBOSE_MODE + if from.empty? || to.empty? + STDERR.puts "converter for #{from.empty? ? to : from}" + else + STDERR.puts "converter from #{from} to #{to}" + end + end + id_from = from.tr('^0-9A-Za-z', '_') + id_to = to.tr('^0-9A-Za-z', '_') + if from == "UTF-8" + tree_name = "to_#{id_to}" + elsif to == "UTF-8" + tree_name = "from_#{id_from}" + else + tree_name = "from_#{id_from}_to_#{id_to}" + end + map = encode_utf8(map) + real_tree_name, max_input = transcode_compile_tree(tree_name, from, map) + return map, tree_name, real_tree_name, max_input +end + +def transcode_tblgen(from, to, map) + map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map) + transcoder_name = "rb_#{tree_name}" + TRANSCODERS << transcoder_name + input_unit_length = UnitLength[from] + max_output = map.map {|k,v| String === v ? v.length/2 : 1 }.max + transcoder_code = <<"End" +static const rb_transcoder +#{transcoder_name} = { + #{c_esc from}, #{c_esc to}, #{real_tree_name}, + TRANSCODE_TABLE_INFO, + #{input_unit_length}, /* input_unit_length */ + #{max_input}, /* max_input */ + #{max_output}, /* max_output */ + asciicompat_converter, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, NULL, + NULL, NULL, NULL +}; +End + TRANSCODE_GENERATED_TRANSCODER_CODE << transcoder_code + '' +end + +def transcode_generate_node(am, name_hint=nil) + STDERR.puts "converter for #{name_hint}" if VERBOSE_MODE + name = am.gennode(TRANSCODE_GENERATED_BYTES_CODE, TRANSCODE_GENERATED_WORDS_CODE, name_hint) + '' +end + +def transcode_generated_code + TRANSCODE_GENERATED_BYTES_CODE.to_s + + TRANSCODE_GENERATED_WORDS_CODE.to_s + + "\#define TRANSCODE_TABLE_INFO " + + "#{OUTPUT_PREFIX}byte_array, #{TRANSCODE_GENERATED_BYTES_CODE.length}, " + + "#{OUTPUT_PREFIX}word_array, #{TRANSCODE_GENERATED_WORDS_CODE.length}, " + + "sizeof(unsigned int)\n" + + TRANSCODE_GENERATED_TRANSCODER_CODE +end + +def transcode_register_code + code = '' + TRANSCODERS.each {|transcoder_name| + code << " rb_register_transcoder(&#{transcoder_name});\n" + } + code +end + +UnitLength = { + 'UTF-16BE' => 2, + 'UTF-16LE' => 2, + 'UTF-32BE' => 4, + 'UTF-32LE' => 4, +} +UnitLength.default = 1 + +ValidEncoding = { + '1byte' => '{00-ff}', + '2byte' => '{00-ff}{00-ff}', + '4byte' => '{00-ff}{00-ff}{00-ff}{00-ff}', + 'US-ASCII' => '{00-7f}', + 'UTF-8' => '{00-7f} + {c2-df}{80-bf} + e0{a0-bf}{80-bf} + {e1-ec}{80-bf}{80-bf} + ed{80-9f}{80-bf} + {ee-ef}{80-bf}{80-bf} + f0{90-bf}{80-bf}{80-bf} + {f1-f3}{80-bf}{80-bf}{80-bf} + f4{80-8f}{80-bf}{80-bf}', + 'UTF-16BE' => '{00-d7,e0-ff}{00-ff} + {d8-db}{00-ff}{dc-df}{00-ff}', + 'UTF-16LE' => '{00-ff}{00-d7,e0-ff} + {00-ff}{d8-db}{00-ff}{dc-df}', + 'UTF-32BE' => '0000{00-d7,e0-ff}{00-ff} + 00{01-10}{00-ff}{00-ff}', + 'UTF-32LE' => '{00-ff}{00-d7,e0-ff}0000 + {00-ff}{00-ff}{01-10}00', + 'EUC-JP' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-fe} + 8f{a1-fe}{a1-fe}', + 'CP51932' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-fe}', + 'Shift_JIS' => '{00-7f} + {81-9f,e0-fc}{40-7e,80-fc} + {a1-df}', + 'EUC-KR' => '{00-7f} + {a1-fe}{a1-fe}', + 'CP949' => '{00-7f} + {81-fe}{41-5a,61-7a,81-fe}', + 'Big5' => '{00-7f} + {81-fe}{40-7e,a1-fe}', + 'EUC-TW' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-b0}{a1-fe}{a1-fe}', + 'GBK' => '{00-80} + {81-fe}{40-7e,80-fe}', + 'GB18030' => '{00-7f} + {81-fe}{40-7e,80-fe} + {81-fe}{30-39}{81-fe}{30-39}', +} + +def set_valid_byte_pattern (encoding, pattern_or_label) + pattern = + if ValidEncoding[pattern_or_label] + ValidEncoding[pattern_or_label] + else + pattern_or_label + end + if ValidEncoding[encoding] and ValidEncoding[encoding]!=pattern + raise ArgumentError, "trying to change valid byte pattern for encoding #{encoding} from #{ValidEncoding[encoding]} to #{pattern}" + end + ValidEncoding[encoding] = pattern +end + +# the following may be used in different places, so keep them here for the moment +set_valid_byte_pattern 'ASCII-8BIT', '1byte' +set_valid_byte_pattern 'Windows-31J', 'Shift_JIS' +set_valid_byte_pattern 'eucJP-ms', 'EUC-JP' + +def make_signature(filename, src) + "src=#{filename.dump}, len=#{src.length}, checksum=#{src.sum}" +end + +output_filename = nil +verbose_mode = false +force_mode = false + +op = OptionParser.new +op.def_option("--help", "show help message") { puts op; exit 0 } +op.def_option("--verbose", "verbose mode") { verbose_mode = true } +op.def_option("--force", "force table generation") { force_mode = true } +op.def_option("--output=FILE", "specify output file") {|arg| output_filename = arg } +op.parse! + +VERBOSE_MODE = verbose_mode + +OUTPUT_FILENAME = output_filename +OUTPUT_PREFIX = output_filename ? File.basename(output_filename)[/\A[A-Za-z0-9_]*/] : "" +OUTPUT_PREFIX.sub!(/\A_+/, '') +OUTPUT_PREFIX.sub!(/_*\z/, '_') + +TRANSCODE_GENERATED_BYTES_CODE = ArrayCode.new("unsigned char", "#{OUTPUT_PREFIX}byte_array") +TRANSCODE_GENERATED_WORDS_CODE = ArrayCode.new("unsigned int", "#{OUTPUT_PREFIX}word_array") + +arg = ARGV.shift +$srcdir = File.dirname(arg) +$:.unshift $srcdir unless $:.include? $srcdir +src = File.read(arg) +src.force_encoding("ascii-8bit") if src.respond_to? :force_encoding +this_script = File.read(__FILE__) +this_script.force_encoding("ascii-8bit") if this_script.respond_to? :force_encoding + +base_signature = "/* autogenerated. */\n" +base_signature << "/* #{make_signature(File.basename(__FILE__), this_script)} */\n" +base_signature << "/* #{make_signature(File.basename(arg), src)} */\n" + +if !force_mode && output_filename && File.readable?(output_filename) + old_signature = File.open(output_filename) {|f| f.gets("").chomp } + chk_signature = base_signature.dup + old_signature.each_line {|line| + if %r{/\* src="([0-9a-z_.-]+)",} =~ line + name = $1 + next if name == File.basename(arg) || name == File.basename(__FILE__) + path = File.join($srcdir, name) + if File.readable? path + chk_signature << "/* #{make_signature(name, File.read(path))} */\n" + end + end + } + if old_signature == chk_signature + now = Time.now + File.utime(now, now, output_filename) + STDERR.puts "already up-to-date: #{output_filename}" if VERBOSE_MODE + exit + end +end + +if VERBOSE_MODE + if output_filename + STDERR.puts "generating #{output_filename} ..." + end +end + +libs1 = $".dup +erb = ERB.new(src, nil, '%') +erb.filename = arg +erb_result = erb.result(binding) +libs2 = $".dup + +libs = libs2 - libs1 +lib_sigs = '' +libs.each {|lib| + lib = File.basename(lib) + path = File.join($srcdir, lib) + if File.readable? path + lib_sigs << "/* #{make_signature(lib, File.read(path))} */\n" + end +} + +result = '' +result << base_signature +result << lib_sigs +result << "\n" +result << erb_result +result << "\n" + +if output_filename + new_filename = output_filename + ".new" + FileUtils.mkdir_p(File.dirname(output_filename)) + File.open(new_filename, "wb") {|f| f << result } + File.rename(new_filename, output_filename) + STDERR.puts "done." if VERBOSE_MODE +else + print result +end diff --git a/tool/vtlh.rb b/tool/vtlh.rb new file mode 100644 index 0000000..fcd3630 --- /dev/null +++ b/tool/vtlh.rb @@ -0,0 +1,15 @@ +# ARGF = open('ha') +cd = `pwd`.chomp + '/' +ARGF.each{|line| + if /^0x([a-z0-9]+),/ =~ line + stat = line.split(',') + addr = stat[0].hex + 0x00400000 + retired = stat[2].to_i + ticks = stat[3].to_i + + src = `addr2line -e miniruby.exe #{addr.to_s(16)}`.chomp + src.sub!(cd, '') + puts '%-40s 0x%08x %8d %8d' % [src, addr, retired, ticks] + end +} + diff --git a/tool/ytab.sed b/tool/ytab.sed new file mode 100755 index 0000000..17a57fe --- /dev/null +++ b/tool/ytab.sed @@ -0,0 +1,30 @@ +#!/bin/sed -f +/^int yydebug;/{ +i\ +#ifndef yydebug +a\ +#endif +} +/^yydestruct.*yymsg/,/#endif/{ + /^yydestruct/{ + /parser/!{ + h + s/^/ruby_parser_&/ + s/)$/, parser)/ + /\*/s/parser)$/struct parser_params *&/ + } + } + /^#endif/{ + x + /^./{ + i\ + struct parser_params *parser; + a\ +#define yydestruct(m, t, v) ruby_parser_yydestruct(m, t, v, parser) + } + x + } +} +s/^\([ ]*\)\(yyerror[ ]*([ ]*parser,\)/\1parser_\2/ +s!^ *extern char \*getenv();!/* & */! +s/^\(#.*\)".*\.tab\.c"/\1"parse.c"/ diff --git a/transcode.c b/transcode.c new file mode 100644 index 0000000..a940165 --- /dev/null +++ b/transcode.c @@ -0,0 +1,4133 @@ +/********************************************************************** + + transcode.c - + + $Author: yugui $ + created at: Tue Oct 30 16:10:22 JST 2007 + + Copyright (C) 2007 Martin Duerst + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/encoding.h" +#include "transcode_data.h" +#include + +/* VALUE rb_cEncoding = rb_define_class("Encoding", rb_cObject); */ +VALUE rb_eUndefinedConversionError; +VALUE rb_eInvalidByteSequenceError; +VALUE rb_eConverterNotFoundError; + +VALUE rb_cEncodingConverter; + +static VALUE sym_invalid, sym_undef, sym_replace; +static VALUE sym_xml, sym_text, sym_attr; +static VALUE sym_universal_newline; +static VALUE sym_crlf_newline; +static VALUE sym_cr_newline; +static VALUE sym_partial_input; + +static VALUE sym_invalid_byte_sequence; +static VALUE sym_undefined_conversion; +static VALUE sym_destination_buffer_full; +static VALUE sym_source_buffer_empty; +static VALUE sym_finished; +static VALUE sym_after_output; +static VALUE sym_incomplete_input; + +static unsigned char * +allocate_converted_string(const char *sname, const char *dname, + const unsigned char *str, size_t len, + unsigned char *caller_dst_buf, size_t caller_dst_bufsize, + size_t *dst_len_ptr); + +/* dynamic structure, one per conversion (similar to iconv_t) */ +/* may carry conversion state (e.g. for iso-2022-jp) */ +typedef struct rb_transcoding { + const rb_transcoder *transcoder; + + int flags; + + int resume_position; + unsigned int next_table; + VALUE next_info; + unsigned char next_byte; + unsigned int output_index; + + int recognized_len; /* already interpreted */ + int readagain_len; /* not yet interpreted */ + union { + unsigned char ary[8]; /* max_input <= sizeof(ary) */ + unsigned char *ptr; /* length: max_input */ + } readbuf; /* recognized_len + readagain_len used */ + + int writebuf_off; + int writebuf_len; + union { + unsigned char ary[8]; /* max_output <= sizeof(ary) */ + unsigned char *ptr; /* length: max_output */ + } writebuf; + + union rb_transcoding_state_t { /* opaque data for stateful encoding */ + void *ptr; + double dummy_for_alignment; + } state; +} rb_transcoding; +#define TRANSCODING_READBUF(tc) \ + ((tc)->transcoder->max_input <= sizeof((tc)->readbuf.ary) ? \ + (tc)->readbuf.ary : \ + (tc)->readbuf.ptr) +#define TRANSCODING_WRITEBUF(tc) \ + ((tc)->transcoder->max_output <= sizeof((tc)->writebuf.ary) ? \ + (tc)->writebuf.ary : \ + (tc)->writebuf.ptr) +#define TRANSCODING_WRITEBUF_SIZE(tc) \ + ((tc)->transcoder->max_output <= sizeof((tc)->writebuf.ary) ? \ + sizeof((tc)->writebuf.ary) : \ + (tc)->transcoder->max_output) +#define TRANSCODING_STATE_EMBED_MAX sizeof(union rb_transcoding_state_t) +#define TRANSCODING_STATE(tc) \ + ((tc)->transcoder->state_size <= sizeof((tc)->state) ? \ + (void *)&(tc)->state : \ + (tc)->state.ptr) + +typedef struct { + struct rb_transcoding *tc; + unsigned char *out_buf_start; + unsigned char *out_data_start; + unsigned char *out_data_end; + unsigned char *out_buf_end; + rb_econv_result_t last_result; +} rb_econv_elem_t; + +struct rb_econv_t { + int flags; + const char *source_encoding_name; + const char *destination_encoding_name; + + int started; + + const unsigned char *replacement_str; + size_t replacement_len; + const char *replacement_enc; + int replacement_allocated; + + unsigned char *in_buf_start; + unsigned char *in_data_start; + unsigned char *in_data_end; + unsigned char *in_buf_end; + rb_econv_elem_t *elems; + int num_allocated; + int num_trans; + int num_finished; + struct rb_transcoding *last_tc; + + /* last error */ + struct { + rb_econv_result_t result; + struct rb_transcoding *error_tc; + const char *source_encoding; + const char *destination_encoding; + const unsigned char *error_bytes_start; + size_t error_bytes_len; + size_t readagain_len; + } last_error; + + /* The following fields are only for Encoding::Converter. + * rb_econv_open set them NULL. */ + rb_encoding *source_encoding; + rb_encoding *destination_encoding; +}; + +/* + * Dispatch data and logic + */ + +#define DECORATOR_P(sname, dname) (*(sname) == '\0') + +typedef struct { + const char *sname; + const char *dname; + const char *lib; /* null means means no need to load a library */ + const rb_transcoder *transcoder; +} transcoder_entry_t; + +static st_table *transcoder_table; + +static transcoder_entry_t * +make_transcoder_entry(const char *sname, const char *dname) +{ + st_data_t val; + st_table *table2; + + if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) { + val = (st_data_t)st_init_strcasetable(); + st_add_direct(transcoder_table, (st_data_t)sname, val); + } + table2 = (st_table *)val; + if (!st_lookup(table2, (st_data_t)dname, &val)) { + transcoder_entry_t *entry = ALLOC(transcoder_entry_t); + entry->sname = sname; + entry->dname = dname; + entry->lib = NULL; + entry->transcoder = NULL; + val = (st_data_t)entry; + st_add_direct(table2, (st_data_t)dname, val); + } + return (transcoder_entry_t *)val; +} + +static transcoder_entry_t * +get_transcoder_entry(const char *sname, const char *dname) +{ + st_data_t val; + st_table *table2; + + if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) { + return NULL; + } + table2 = (st_table *)val; + if (!st_lookup(table2, (st_data_t)dname, &val)) { + return NULL; + } + return (transcoder_entry_t *)val; +} + +void +rb_register_transcoder(const rb_transcoder *tr) +{ + const char *const sname = tr->src_encoding; + const char *const dname = tr->dst_encoding; + + transcoder_entry_t *entry; + + entry = make_transcoder_entry(sname, dname); + if (entry->transcoder) { + rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered", + sname, dname); + } + + entry->transcoder = tr; +} + +static void +declare_transcoder(const char *sname, const char *dname, const char *lib) +{ + transcoder_entry_t *entry; + + entry = make_transcoder_entry(sname, dname); + entry->lib = lib; +} + +#define MAX_TRANSCODER_LIBNAME_LEN 64 +static const char transcoder_lib_prefix[] = "enc/trans/"; + +void +rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib) +{ + if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) { + rb_raise(rb_eArgError, "invalid library name - %s", + lib ? lib : "(null)"); + } + declare_transcoder(enc1, enc2, lib); +} + +#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0) + +typedef struct search_path_queue_tag { + struct search_path_queue_tag *next; + const char *enc; +} search_path_queue_t; + +typedef struct { + st_table *visited; + search_path_queue_t *queue; + search_path_queue_t **queue_last_ptr; + const char *base_enc; +} search_path_bfs_t; + +static int +transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg) +{ + const char *dname = (const char *)key; + search_path_bfs_t *bfs = (search_path_bfs_t *)arg; + search_path_queue_t *q; + + if (st_lookup(bfs->visited, (st_data_t)dname, &val)) { + return ST_CONTINUE; + } + + q = ALLOC(search_path_queue_t); + q->enc = dname; + q->next = NULL; + *bfs->queue_last_ptr = q; + bfs->queue_last_ptr = &q->next; + + st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc); + return ST_CONTINUE; +} + +static int +transcode_search_path(const char *sname, const char *dname, + void (*callback)(const char *sname, const char *dname, int depth, void *arg), + void *arg) +{ + search_path_bfs_t bfs; + search_path_queue_t *q; + st_data_t val; + st_table *table2; + int found; + int pathlen = -1; + + if (encoding_equal(sname, dname)) + return -1; + + q = ALLOC(search_path_queue_t); + q->enc = sname; + q->next = NULL; + bfs.queue_last_ptr = &q->next; + bfs.queue = q; + + bfs.visited = st_init_strcasetable(); + st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL); + + while (bfs.queue) { + q = bfs.queue; + bfs.queue = q->next; + if (!bfs.queue) + bfs.queue_last_ptr = &bfs.queue; + + if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) { + xfree(q); + continue; + } + table2 = (st_table *)val; + + if (st_lookup(table2, (st_data_t)dname, &val)) { + st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc); + xfree(q); + found = 1; + goto cleanup; + } + + bfs.base_enc = q->enc; + st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs); + bfs.base_enc = NULL; + + xfree(q); + } + found = 0; + + cleanup: + while (bfs.queue) { + q = bfs.queue; + bfs.queue = q->next; + xfree(q); + } + + if (found) { + const char *enc = dname; + int depth; + pathlen = 0; + while (1) { + st_lookup(bfs.visited, (st_data_t)enc, &val); + if (!val) + break; + pathlen++; + enc = (const char *)val; + } + depth = pathlen; + enc = dname; + while (1) { + st_lookup(bfs.visited, (st_data_t)enc, &val); + if (!val) + break; + callback((const char *)val, enc, --depth, arg); + enc = (const char *)val; + } + } + + st_free_table(bfs.visited); + + return pathlen; /* is -1 if not found */ +} + +static const rb_transcoder * +load_transcoder_entry(transcoder_entry_t *entry) +{ + if (entry->transcoder) + return entry->transcoder; + + if (entry->lib) { + const char *lib = entry->lib; + int len = strlen(lib); + char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN]; + + entry->lib = NULL; + + if (len > MAX_TRANSCODER_LIBNAME_LEN) + return NULL; + memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1); + memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1); + if (!rb_require(path)) + return NULL; + } + + if (entry->transcoder) + return entry->transcoder; + + return NULL; +} + +static const char* +get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr) +{ + if (encoding_equal(encname, "UTF-8")) { + *len_ret = 3; + *repl_encname_ptr = "UTF-8"; + return "\xEF\xBF\xBD"; + } + else { + *len_ret = 1; + *repl_encname_ptr = "US-ASCII"; + return "?"; + } +} + +/* + * Transcoding engine logic + */ + +static const unsigned char * +transcode_char_start(rb_transcoding *tc, + const unsigned char *in_start, + const unsigned char *inchar_start, + const unsigned char *in_p, + size_t *char_len_ptr) +{ + const unsigned char *ptr; + if (inchar_start - in_start < tc->recognized_len) { + MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len, + inchar_start, unsigned char, in_p - inchar_start); + ptr = TRANSCODING_READBUF(tc); + } + else { + ptr = inchar_start - tc->recognized_len; + } + *char_len_ptr = tc->recognized_len + (in_p - inchar_start); + return ptr; +} + +static rb_econv_result_t +transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + rb_transcoding *tc, + const int opt) + +{ + const rb_transcoder *tr = tc->transcoder; + int unitlen = tr->input_unit_length; + int readagain_len = 0; + + const unsigned char *inchar_start; + const unsigned char *in_p; + + unsigned char *out_p; + + in_p = inchar_start = *in_pos; + + out_p = *out_pos; + +#define SUSPEND(ret, num) \ + do { \ + tc->resume_position = (num); \ + if (0 < in_p - inchar_start) \ + MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \ + inchar_start, unsigned char, in_p - inchar_start); \ + *in_pos = in_p; \ + *out_pos = out_p; \ + tc->recognized_len += in_p - inchar_start; \ + if (readagain_len) { \ + tc->recognized_len -= readagain_len; \ + tc->readagain_len = readagain_len; \ + } \ + return ret; \ + resume_label ## num:; \ + } while (0) +#define SUSPEND_OBUF(num) \ + do { \ + while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \ + } while (0) + +#define SUSPEND_AFTER_OUTPUT(num) \ + if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \ + SUSPEND(econv_after_output, num); \ + } + +#define next_table (tc->next_table) +#define next_info (tc->next_info) +#define next_byte (tc->next_byte) +#define writebuf_len (tc->writebuf_len) +#define writebuf_off (tc->writebuf_off) + + switch (tc->resume_position) { + case 0: break; + case 1: goto resume_label1; + case 2: goto resume_label2; + case 3: goto resume_label3; + case 4: goto resume_label4; + case 5: goto resume_label5; + case 6: goto resume_label6; + case 7: goto resume_label7; + case 8: goto resume_label8; + case 9: goto resume_label9; + case 10: goto resume_label10; + case 11: goto resume_label11; + case 12: goto resume_label12; + case 13: goto resume_label13; + case 14: goto resume_label14; + case 15: goto resume_label15; + case 16: goto resume_label16; + case 17: goto resume_label17; + case 18: goto resume_label18; + case 19: goto resume_label19; + case 20: goto resume_label20; + case 21: goto resume_label21; + case 22: goto resume_label22; + case 23: goto resume_label23; + case 24: goto resume_label24; + case 25: goto resume_label25; + case 26: goto resume_label26; + case 27: goto resume_label27; + case 28: goto resume_label28; + case 29: goto resume_label29; + case 30: goto resume_label30; + case 31: goto resume_label31; + case 32: goto resume_label32; + } + + while (1) { + inchar_start = in_p; + tc->recognized_len = 0; + next_table = tr->conv_tree_start; + + SUSPEND_AFTER_OUTPUT(24); + + if (in_stop <= in_p) { + if (!(opt & ECONV_PARTIAL_INPUT)) + break; + SUSPEND(econv_source_buffer_empty, 7); + continue; + } + +#define BYTE_ADDR(index) (tr->byte_array + (index)) +#define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index)) +#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table))) +#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table))) +#define BL_MIN_BYTE (BL_BASE[0]) +#define BL_MAX_BYTE (BL_BASE[1]) +#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE]) +#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))]) + + next_byte = (unsigned char)*in_p++; + follow_byte: + if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte) + next_info = INVALID; + else { + next_info = (VALUE)BL_ACTION(next_byte); + } + follow_info: + switch (next_info & 0x1F) { + case NOMAP: /* xxx: copy last byte only? */ + SUSPEND_OBUF(3); *out_p++ = next_byte; + continue; + case 0x00: case 0x04: case 0x08: case 0x0C: + case 0x10: case 0x14: case 0x18: case 0x1C: + SUSPEND_AFTER_OUTPUT(25); + while (in_p >= in_stop) { + if (!(opt & ECONV_PARTIAL_INPUT)) + goto incomplete; + SUSPEND(econv_source_buffer_empty, 5); + } + next_byte = (unsigned char)*in_p++; + next_table = next_info; + goto follow_byte; + case ZERObt: /* drop input */ + continue; + case ONEbt: + SUSPEND_OBUF(9); *out_p++ = getBT1(next_info); + continue; + case TWObt: + SUSPEND_OBUF(10); *out_p++ = getBT1(next_info); + SUSPEND_OBUF(21); *out_p++ = getBT2(next_info); + continue; + case THREEbt: + SUSPEND_OBUF(11); *out_p++ = getBT1(next_info); + SUSPEND_OBUF(15); *out_p++ = getBT2(next_info); + SUSPEND_OBUF(16); *out_p++ = getBT3(next_info); + continue; + case FOURbt: + SUSPEND_OBUF(12); *out_p++ = getBT0(next_info); + SUSPEND_OBUF(17); *out_p++ = getBT1(next_info); + SUSPEND_OBUF(18); *out_p++ = getBT2(next_info); + SUSPEND_OBUF(19); *out_p++ = getBT3(next_info); + continue; + case GB4bt: + SUSPEND_OBUF(29); *out_p++ = getGB4bt0(next_info); + SUSPEND_OBUF(30); *out_p++ = getGB4bt1(next_info); + SUSPEND_OBUF(31); *out_p++ = getGB4bt2(next_info); + SUSPEND_OBUF(32); *out_p++ = getGB4bt3(next_info); + continue; + case STR1: + tc->output_index = 0; + while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(next_info)))) { + SUSPEND_OBUF(28); *out_p++ = BYTE_ADDR(STR1_BYTEINDEX(next_info))[1+tc->output_index]; + tc->output_index++; + } + continue; + case FUNii: + next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info); + goto follow_info; + case FUNsi: + { + const unsigned char *char_start; + size_t char_len; + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len); + goto follow_info; + } + case FUNio: + SUSPEND_OBUF(13); + if (tr->max_output <= out_stop - out_p) + out_p += tr->func_io(TRANSCODING_STATE(tc), + next_info, out_p, out_stop - out_p); + else { + writebuf_len = tr->func_io(TRANSCODING_STATE(tc), + next_info, + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(20); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + break; + case FUNso: + { + const unsigned char *char_start; + size_t char_len; + SUSPEND_OBUF(14); + if (tr->max_output <= out_stop - out_p) { + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + out_p += tr->func_so(TRANSCODING_STATE(tc), + char_start, (size_t)char_len, + out_p, out_stop - out_p); + } + else { + char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len); + writebuf_len = tr->func_so(TRANSCODING_STATE(tc), + char_start, (size_t)char_len, + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(22); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + break; + } + case INVALID: + if (tc->recognized_len + (in_p - inchar_start) <= unitlen) { + if (tc->recognized_len + (in_p - inchar_start) < unitlen) + SUSPEND_AFTER_OUTPUT(26); + while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) { + in_p = in_stop; + SUSPEND(econv_source_buffer_empty, 8); + } + if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) { + in_p = in_stop; + } + else { + in_p = inchar_start + (unitlen - tc->recognized_len); + } + } + else { + int invalid_len; /* including the last byte which causes invalid */ + int discard_len; + invalid_len = tc->recognized_len + (in_p - inchar_start); + discard_len = ((invalid_len - 1) / unitlen) * unitlen; + readagain_len = invalid_len - discard_len; + } + goto invalid; + case UNDEF: + goto undef; + } + continue; + + invalid: + SUSPEND(econv_invalid_byte_sequence, 1); + continue; + + incomplete: + SUSPEND(econv_incomplete_input, 27); + continue; + + undef: + SUSPEND(econv_undefined_conversion, 2); + continue; + } + + /* cleanup */ + if (tr->finish_func) { + SUSPEND_OBUF(4); + if (tr->max_output <= out_stop - out_p) { + out_p += tr->finish_func(TRANSCODING_STATE(tc), + out_p, out_stop - out_p); + } + else { + writebuf_len = tr->finish_func(TRANSCODING_STATE(tc), + TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc)); + writebuf_off = 0; + while (writebuf_off < writebuf_len) { + SUSPEND_OBUF(23); + *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++]; + } + } + } + while (1) + SUSPEND(econv_finished, 6); +#undef SUSPEND +#undef next_table +#undef next_info +#undef next_byte +#undef writebuf_len +#undef writebuf_off +} + +static rb_econv_result_t +transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + rb_transcoding *tc, + const int opt) +{ + if (tc->readagain_len) { + unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len); + const unsigned char *readagain_pos = readagain_buf; + const unsigned char *readagain_stop = readagain_buf + tc->readagain_len; + rb_econv_result_t res; + + MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len, + unsigned char, tc->readagain_len); + tc->readagain_len = 0; + res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT); + if (res != econv_source_buffer_empty) { + MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len, + readagain_pos, unsigned char, readagain_stop - readagain_pos); + tc->readagain_len += readagain_stop - readagain_pos; + return res; + } + } + return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt); +} + +static rb_transcoding * +rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags) +{ + rb_transcoding *tc; + + tc = ALLOC(rb_transcoding); + tc->transcoder = tr; + tc->flags = flags; + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) + tc->state.ptr = xmalloc(tr->state_size); + if (tr->state_init_func) { + (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */ + } + tc->resume_position = 0; + tc->recognized_len = 0; + tc->readagain_len = 0; + tc->writebuf_len = 0; + tc->writebuf_off = 0; + if (sizeof(tc->readbuf.ary) < tr->max_input) { + tc->readbuf.ptr = xmalloc(tr->max_input); + } + if (sizeof(tc->writebuf.ary) < tr->max_output) { + tc->writebuf.ptr = xmalloc(tr->max_output); + } + return tc; +} + +static rb_econv_result_t +rb_transcoding_convert(rb_transcoding *tc, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + return transcode_restartable( + input_ptr, output_ptr, + input_stop, output_stop, + tc, flags); +} + +static void +rb_transcoding_close(rb_transcoding *tc) +{ + const rb_transcoder *tr = tc->transcoder; + if (tr->state_fini_func) { + (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */ + } + if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) + xfree(tc->state.ptr); + if (sizeof(tc->readbuf.ary) < tr->max_input) + xfree(tc->readbuf.ptr); + if (sizeof(tc->writebuf.ary) < tr->max_output) + xfree(tc->writebuf.ptr); + xfree(tc); +} + +static rb_econv_t * +rb_econv_alloc(int n_hint) +{ + rb_econv_t *ec; + + if (n_hint <= 0) + n_hint = 1; + + ec = ALLOC(rb_econv_t); + ec->flags = 0; + ec->source_encoding_name = NULL; + ec->destination_encoding_name = NULL; + ec->started = 0; + ec->replacement_str = NULL; + ec->replacement_len = 0; + ec->replacement_enc = NULL; + ec->replacement_allocated = 0; + ec->in_buf_start = NULL; + ec->in_data_start = NULL; + ec->in_data_end = NULL; + ec->in_buf_end = NULL; + ec->num_allocated = n_hint; + ec->num_trans = 0; + ec->elems = ALLOC_N(rb_econv_elem_t, ec->num_allocated); + ec->num_finished = 0; + ec->last_tc = NULL; + ec->last_error.result = econv_source_buffer_empty; + ec->last_error.error_tc = NULL; + ec->last_error.source_encoding = NULL; + ec->last_error.destination_encoding = NULL; + ec->last_error.error_bytes_start = NULL; + ec->last_error.error_bytes_len = 0; + ec->last_error.readagain_len = 0; + ec->source_encoding = NULL; + ec->destination_encoding = NULL; + return ec; +} + +static int +rb_econv_add_transcoder_at(rb_econv_t *ec, const rb_transcoder *tr, int i) +{ + int n, j; + int bufsize = 4096; + unsigned char *p; + + if (ec->num_trans == ec->num_allocated) { + n = ec->num_allocated * 2; + REALLOC_N(ec->elems, rb_econv_elem_t, n); + ec->num_allocated = n; + } + + p = xmalloc(bufsize); + + MEMMOVE(ec->elems+i+1, ec->elems+i, rb_econv_elem_t, ec->num_trans-i); + + ec->elems[i].tc = rb_transcoding_open_by_transcoder(tr, 0); + ec->elems[i].out_buf_start = p; + ec->elems[i].out_buf_end = p + bufsize; + ec->elems[i].out_data_start = p; + ec->elems[i].out_data_end = p; + ec->elems[i].last_result = econv_source_buffer_empty; + + ec->num_trans++; + + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding)) + for (j = ec->num_trans-1; i <= j; j--) { + rb_transcoding *tc = ec->elems[j].tc; + const rb_transcoder *tr2 = tc->transcoder; + if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) { + ec->last_tc = tc; + break; + } + } + + return 0; +} + +static rb_econv_t * +rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries) +{ + rb_econv_t *ec; + int i, ret; + + for (i = 0; i < n; i++) { + const rb_transcoder *tr; + tr = load_transcoder_entry(entries[i]); + if (!tr) + return NULL; + } + + ec = rb_econv_alloc(n); + + for (i = 0; i < n; i++) { + const rb_transcoder *tr = load_transcoder_entry(entries[i]); + ret = rb_econv_add_transcoder_at(ec, tr, ec->num_trans); + if (ret == -1) { + rb_econv_close(ec); + return NULL; + } + } + + return ec; +} + +struct trans_open_t { + transcoder_entry_t **entries; + int num_additional; +}; + +static void +trans_open_i(const char *sname, const char *dname, int depth, void *arg) +{ + struct trans_open_t *toarg = arg; + + if (!toarg->entries) { + toarg->entries = ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional); + } + toarg->entries[depth] = get_transcoder_entry(sname, dname); +} + +static rb_econv_t * +rb_econv_open0(const char *sname, const char *dname, int ecflags) +{ + transcoder_entry_t **entries = NULL; + int num_trans; + rb_econv_t *ec; + + rb_encoding *senc, *denc; + int sidx, didx; + + senc = NULL; + if (*sname) { + sidx = rb_enc_find_index(sname); + if (0 <= sidx) { + senc = rb_enc_from_index(sidx); + } + } + + denc = NULL; + if (*dname) { + didx = rb_enc_find_index(dname); + if (0 <= didx) { + denc = rb_enc_from_index(didx); + } + } + + if (*sname == '\0' && *dname == '\0') { + num_trans = 0; + entries = NULL; + } + else { + struct trans_open_t toarg; + toarg.entries = NULL; + toarg.num_additional = 0; + num_trans = transcode_search_path(sname, dname, trans_open_i, (void *)&toarg); + entries = toarg.entries; + if (num_trans < 0) { + xfree(entries); + return NULL; + } + } + + ec = rb_econv_open_by_transcoder_entries(num_trans, entries); + xfree(entries); + if (!ec) + return NULL; + + ec->flags = ecflags; + ec->source_encoding_name = sname; + ec->destination_encoding_name = dname; + + return ec; +} + +#define MAX_ECFLAGS_DECORATORS 32 + +static int +decorator_names(int ecflags, const char **decorators_ret) +{ + int num_decorators; + + if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) && + (ecflags & ECONV_CR_NEWLINE_DECORATOR)) + return -1; + + if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) && + (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)) + return -1; + + if ((ecflags & ECONV_XML_TEXT_DECORATOR) && + (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)) + return -1; + + num_decorators = 0; + + if (ecflags & ECONV_XML_TEXT_DECORATOR) + decorators_ret[num_decorators++] = "xml_text_escape"; + if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) + decorators_ret[num_decorators++] = "xml_attr_content_escape"; + if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) + decorators_ret[num_decorators++] = "xml_attr_quote"; + + if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "crlf_newline"; + if (ecflags & ECONV_CR_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "universal_newline"; + + return num_decorators; +} + +rb_econv_t * +rb_econv_open(const char *sname, const char *dname, int ecflags) +{ + rb_econv_t *ec; + int num_decorators; + const char *decorators[MAX_ECFLAGS_DECORATORS]; + int i; + + num_decorators = decorator_names(ecflags, decorators); + if (num_decorators == -1) + return NULL; + + ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK); + if (!ec) + return NULL; + + for (i = 0; i < num_decorators; i++) + if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) { + rb_econv_close(ec); + return NULL; + } + + ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK; + + return ec; +} + +static int +trans_sweep(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags, + int start) +{ + int try; + int i, f; + + const unsigned char **ipp, *is, *iold; + unsigned char **opp, *os, *oold; + rb_econv_result_t res; + + try = 1; + while (try) { + try = 0; + for (i = start; i < ec->num_trans; i++) { + rb_econv_elem_t *te = &ec->elems[i]; + + if (i == 0) { + ipp = input_ptr; + is = input_stop; + } + else { + rb_econv_elem_t *prev_te = &ec->elems[i-1]; + ipp = (const unsigned char **)&prev_te->out_data_start; + is = prev_te->out_data_end; + } + + if (i == ec->num_trans-1) { + opp = output_ptr; + os = output_stop; + } + else { + if (te->out_buf_start != te->out_data_start) { + int len = te->out_data_end - te->out_data_start; + int off = te->out_data_start - te->out_buf_start; + MEMMOVE(te->out_buf_start, te->out_data_start, unsigned char, len); + te->out_data_start = te->out_buf_start; + te->out_data_end -= off; + } + opp = &te->out_data_end; + os = te->out_buf_end; + } + + f = flags; + if (ec->num_finished != i) + f |= ECONV_PARTIAL_INPUT; + if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) { + start = 1; + flags &= ~ECONV_AFTER_OUTPUT; + } + if (i != 0) + f &= ~ECONV_AFTER_OUTPUT; + iold = *ipp; + oold = *opp; + te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f); + if (iold != *ipp || oold != *opp) + try = 1; + + switch (res) { + case econv_invalid_byte_sequence: + case econv_incomplete_input: + case econv_undefined_conversion: + case econv_after_output: + return i; + + case econv_destination_buffer_full: + case econv_source_buffer_empty: + break; + + case econv_finished: + ec->num_finished = i+1; + break; + } + } + } + return -1; +} + +static rb_econv_result_t +rb_trans_conv(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags, + int *result_position_ptr) +{ + int i; + int needreport_index; + int sweep_start; + + unsigned char empty_buf; + unsigned char *empty_ptr = &empty_buf; + + if (!input_ptr) { + input_ptr = (const unsigned char **)&empty_ptr; + input_stop = empty_ptr; + } + + if (!output_ptr) { + output_ptr = &empty_ptr; + output_stop = empty_ptr; + } + + if (ec->elems[0].last_result == econv_after_output) + ec->elems[0].last_result = econv_source_buffer_empty; + + needreport_index = -1; + for (i = ec->num_trans-1; 0 <= i; i--) { + switch (ec->elems[i].last_result) { + case econv_invalid_byte_sequence: + case econv_incomplete_input: + case econv_undefined_conversion: + case econv_after_output: + case econv_finished: + sweep_start = i+1; + needreport_index = i; + goto found_needreport; + + case econv_destination_buffer_full: + case econv_source_buffer_empty: + break; + + default: + rb_bug("unexpected transcode last result"); + } + } + + /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */ + + if (ec->elems[ec->num_trans-1].last_result == econv_destination_buffer_full && + (flags & ECONV_AFTER_OUTPUT)) { + rb_econv_result_t res; + + res = rb_trans_conv(ec, NULL, NULL, output_ptr, output_stop, + (flags & ~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, + result_position_ptr); + + if (res == econv_source_buffer_empty) + return econv_after_output; + return res; + } + + sweep_start = 0; + + found_needreport: + + do { + needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start); + sweep_start = needreport_index + 1; + } while (needreport_index != -1 && needreport_index != ec->num_trans-1); + + for (i = ec->num_trans-1; 0 <= i; i--) { + if (ec->elems[i].last_result != econv_source_buffer_empty) { + rb_econv_result_t res = ec->elems[i].last_result; + if (res == econv_invalid_byte_sequence || + res == econv_incomplete_input || + res == econv_undefined_conversion || + res == econv_after_output) { + ec->elems[i].last_result = econv_source_buffer_empty; + } + if (result_position_ptr) + *result_position_ptr = i; + return res; + } + } + if (result_position_ptr) + *result_position_ptr = -1; + return econv_source_buffer_empty; +} + +static rb_econv_result_t +rb_econv_convert0(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + rb_econv_result_t res; + int result_position; + int has_output = 0; + + memset(&ec->last_error, 0, sizeof(ec->last_error)); + + if (ec->num_trans == 0) { + size_t len; + if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) { + if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) { + len = output_stop - *output_ptr; + memcpy(*output_ptr, ec->in_data_start, len); + *output_ptr = output_stop; + ec->in_data_start += len; + res = econv_destination_buffer_full; + goto gotresult; + } + len = ec->in_data_end - ec->in_data_start; + memcpy(*output_ptr, ec->in_data_start, len); + *output_ptr += len; + ec->in_data_start = ec->in_data_end = ec->in_buf_start; + if (flags & ECONV_AFTER_OUTPUT) { + res = econv_after_output; + goto gotresult; + } + } + if (output_stop - *output_ptr < input_stop - *input_ptr) { + len = output_stop - *output_ptr; + } + else { + len = input_stop - *input_ptr; + } + if (0 < len && (flags & ECONV_AFTER_OUTPUT)) { + *(*output_ptr)++ = *(*input_ptr)++; + res = econv_after_output; + goto gotresult; + } + memcpy(*output_ptr, *input_ptr, len); + *output_ptr += len; + *input_ptr += len; + if (*input_ptr != input_stop) + res = econv_destination_buffer_full; + else if (flags & ECONV_PARTIAL_INPUT) + res = econv_source_buffer_empty; + else + res = econv_finished; + goto gotresult; + } + + if (ec->elems[ec->num_trans-1].out_data_start) { + unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start; + unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end; + if (data_start != data_end) { + size_t len; + if (output_stop - *output_ptr < data_end - data_start) { + len = output_stop - *output_ptr; + memcpy(*output_ptr, data_start, len); + *output_ptr = output_stop; + ec->elems[ec->num_trans-1].out_data_start += len; + res = econv_destination_buffer_full; + goto gotresult; + } + len = data_end - data_start; + memcpy(*output_ptr, data_start, len); + *output_ptr += len; + ec->elems[ec->num_trans-1].out_data_start = + ec->elems[ec->num_trans-1].out_data_end = + ec->elems[ec->num_trans-1].out_buf_start; + has_output = 1; + } + } + + if (ec->in_buf_start && + ec->in_data_start != ec->in_data_end) { + res = rb_trans_conv(ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop, + (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position); + if (res != econv_source_buffer_empty) + goto gotresult; + } + + if (has_output && + (flags & ECONV_AFTER_OUTPUT) && + *input_ptr != input_stop) { + input_stop = *input_ptr; + res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position); + if (res == econv_source_buffer_empty) + res = econv_after_output; + } + else if ((flags & ECONV_AFTER_OUTPUT) || + ec->num_trans == 1) { + res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position); + } + else { + flags |= ECONV_AFTER_OUTPUT; + do { + res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position); + } while (res == econv_after_output); + } + + gotresult: + ec->last_error.result = res; + if (res == econv_invalid_byte_sequence || + res == econv_incomplete_input || + res == econv_undefined_conversion) { + rb_transcoding *error_tc = ec->elems[result_position].tc; + ec->last_error.error_tc = error_tc; + ec->last_error.source_encoding = error_tc->transcoder->src_encoding; + ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding; + ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc); + ec->last_error.error_bytes_len = error_tc->recognized_len; + ec->last_error.readagain_len = error_tc->readagain_len; + } + + return res; +} + +static int output_replacement_character(rb_econv_t *ec); + +static int +output_hex_charref(rb_econv_t *ec) +{ + int ret; + unsigned char utfbuf[1024]; + const unsigned char *utf; + size_t utf_len; + int utf_allocated = 0; + char charef_buf[16]; + const unsigned char *p; + + if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) { + utf = ec->last_error.error_bytes_start; + utf_len = ec->last_error.error_bytes_len; + } + else { + utf = allocate_converted_string(ec->last_error.source_encoding, "UTF-32BE", + ec->last_error.error_bytes_start, ec->last_error.error_bytes_len, + utfbuf, sizeof(utfbuf), + &utf_len); + if (!utf) + return -1; + if (utf != utfbuf && utf != ec->last_error.error_bytes_start) + utf_allocated = 1; + } + + if (utf_len % 4 != 0) + goto fail; + + p = utf; + while (4 <= utf_len) { + unsigned int u = 0; + u += p[0] << 24; + u += p[1] << 16; + u += p[2] << 8; + u += p[3]; + snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u); + + ret = rb_econv_insert_output(ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII"); + if (ret == -1) + goto fail; + + p += 4; + utf_len -= 4; + } + + if (utf_allocated) + xfree((void *)utf); + return 0; + + fail: + if (utf_allocated) + xfree((void *)utf); + return -1; +} + +rb_econv_result_t +rb_econv_convert(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + rb_econv_result_t ret; + + unsigned char empty_buf; + unsigned char *empty_ptr = &empty_buf; + + ec->started = 1; + + if (!input_ptr) { + input_ptr = (const unsigned char **)&empty_ptr; + input_stop = empty_ptr; + } + + if (!output_ptr) { + output_ptr = &empty_ptr; + output_stop = empty_ptr; + } + + resume: + ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags); + + if (ret == econv_invalid_byte_sequence || + ret == econv_incomplete_input) { + /* deal with invalid byte sequence */ + /* todo: add more alternative behaviors */ + switch (ec->flags & ECONV_INVALID_MASK) { + case ECONV_INVALID_REPLACE: + if (output_replacement_character(ec) == 0) + goto resume; + } + } + + if (ret == econv_undefined_conversion) { + /* valid character in source encoding + * but no related character(s) in destination encoding */ + /* todo: add more alternative behaviors */ + switch (ec->flags & ECONV_UNDEF_MASK) { + case ECONV_UNDEF_REPLACE: + if (output_replacement_character(ec) == 0) + goto resume; + break; + + case ECONV_UNDEF_HEX_CHARREF: + if (output_hex_charref(ec) == 0) + goto resume; + break; + } + } + + return ret; +} + +const char * +rb_econv_encoding_to_insert_output(rb_econv_t *ec) +{ + rb_transcoding *tc = ec->last_tc; + const rb_transcoder *tr; + + if (tc == NULL) + return ""; + + tr = tc->transcoder; + + if (tr->asciicompat_type == asciicompat_encoder) + return tr->src_encoding; + return tr->dst_encoding; +} + +static unsigned char * +allocate_converted_string(const char *sname, const char *dname, + const unsigned char *str, size_t len, + unsigned char *caller_dst_buf, size_t caller_dst_bufsize, + size_t *dst_len_ptr) +{ + unsigned char *dst_str; + size_t dst_len; + size_t dst_bufsize; + + rb_econv_t *ec; + rb_econv_result_t res; + + const unsigned char *sp; + unsigned char *dp; + + if (caller_dst_buf) + dst_bufsize = caller_dst_bufsize; + else if (len == 0) + dst_bufsize = 1; + else + dst_bufsize = len; + + ec = rb_econv_open(sname, dname, 0); + if (ec == NULL) + return NULL; + if (caller_dst_buf) + dst_str = caller_dst_buf; + else + dst_str = xmalloc(dst_bufsize); + dst_len = 0; + sp = str; + dp = dst_str+dst_len; + res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0); + dst_len = dp - dst_str; + while (res == econv_destination_buffer_full) { + if (SIZE_MAX/2 < dst_bufsize) { + goto fail; + } + dst_bufsize *= 2; + if (dst_str == caller_dst_buf) { + unsigned char *tmp; + tmp = xmalloc(dst_bufsize); + memcpy(tmp, dst_str, dst_bufsize/2); + dst_str = tmp; + } + else { + dst_str = xrealloc(dst_str, dst_bufsize); + } + dp = dst_str+dst_len; + res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0); + dst_len = dp - dst_str; + } + if (res != econv_finished) { + goto fail; + } + rb_econv_close(ec); + *dst_len_ptr = dst_len; + return dst_str; + + fail: + if (dst_str != caller_dst_buf) + xfree(dst_str); + rb_econv_close(ec); + return NULL; +} + +/* result: 0:success -1:failure */ +int +rb_econv_insert_output(rb_econv_t *ec, + const unsigned char *str, size_t len, const char *str_encoding) +{ + const char *insert_encoding = rb_econv_encoding_to_insert_output(ec); + unsigned char insert_buf[4096]; + const unsigned char *insert_str = NULL; + size_t insert_len; + + int last_trans_index; + rb_transcoding *tc; + + unsigned char **buf_start_p; + unsigned char **data_start_p; + unsigned char **data_end_p; + unsigned char **buf_end_p; + + size_t need; + + ec->started = 1; + + if (len == 0) + return 0; + + if (encoding_equal(insert_encoding, str_encoding)) { + insert_str = str; + insert_len = len; + } + else { + insert_str = allocate_converted_string(str_encoding, insert_encoding, + str, len, insert_buf, sizeof(insert_buf), &insert_len); + if (insert_str == NULL) + return -1; + } + + need = insert_len; + + last_trans_index = ec->num_trans-1; + if (ec->num_trans == 0) { + tc = NULL; + buf_start_p = &ec->in_buf_start; + data_start_p = &ec->in_data_start; + data_end_p = &ec->in_data_end; + buf_end_p = &ec->in_buf_end; + } + else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) { + tc = ec->elems[last_trans_index].tc; + need += tc->readagain_len; + if (need < insert_len) + goto fail; + if (last_trans_index == 0) { + buf_start_p = &ec->in_buf_start; + data_start_p = &ec->in_data_start; + data_end_p = &ec->in_data_end; + buf_end_p = &ec->in_buf_end; + } + else { + rb_econv_elem_t *ee = &ec->elems[last_trans_index-1]; + buf_start_p = &ee->out_buf_start; + data_start_p = &ee->out_data_start; + data_end_p = &ee->out_data_end; + buf_end_p = &ee->out_buf_end; + } + } + else { + rb_econv_elem_t *ee = &ec->elems[last_trans_index]; + buf_start_p = &ee->out_buf_start; + data_start_p = &ee->out_data_start; + data_end_p = &ee->out_data_end; + buf_end_p = &ee->out_buf_end; + tc = ec->elems[last_trans_index].tc; + } + + if (*buf_start_p == NULL) { + unsigned char *buf = xmalloc(need); + *buf_start_p = buf; + *data_start_p = buf; + *data_end_p = buf; + *buf_end_p = buf+need; + } + else if (*buf_end_p - *data_end_p < need) { + MEMMOVE(*buf_start_p, *data_start_p, unsigned char, *data_end_p - *data_start_p); + *data_end_p = *buf_start_p + (*data_end_p - *data_start_p); + *data_start_p = *buf_start_p; + if (*buf_end_p - *data_end_p < need) { + unsigned char *buf; + size_t s = (*data_end_p - *buf_start_p) + need; + if (s < need) + goto fail; + buf = xrealloc(*buf_start_p, s); + *data_start_p = buf; + *data_end_p = buf + (*data_end_p - *buf_start_p); + *buf_start_p = buf; + *buf_end_p = buf + s; + } + } + + memcpy(*data_end_p, insert_str, insert_len); + *data_end_p += insert_len; + if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) { + memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len); + *data_end_p += tc->readagain_len; + tc->readagain_len = 0; + } + + if (insert_str != str && insert_str != insert_buf) + xfree((void*)insert_str); + return 0; + + fail: + if (insert_str != str && insert_str != insert_buf) + xfree((void*)insert_str); + return -1; +} + +void +rb_econv_close(rb_econv_t *ec) +{ + int i; + + if (ec->replacement_allocated) { + xfree((void *)ec->replacement_str); + } + for (i = 0; i < ec->num_trans; i++) { + rb_transcoding_close(ec->elems[i].tc); + if (ec->elems[i].out_buf_start) + xfree(ec->elems[i].out_buf_start); + } + xfree(ec->in_buf_start); + xfree(ec->elems); + xfree(ec); +} + +int +rb_econv_putbackable(rb_econv_t *ec) +{ + if (ec->num_trans == 0) + return 0; + return ec->elems[0].tc->readagain_len; +} + +void +rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n) +{ + rb_transcoding *tc; + if (ec->num_trans == 0 || n == 0) + return; + tc = ec->elems[0].tc; + memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n); + tc->readagain_len -= n; +} + +struct asciicompat_encoding_t { + const char *ascii_compat_name; + const char *ascii_incompat_name; +}; + +static int +asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg) +{ + struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg; + transcoder_entry_t *entry = (transcoder_entry_t *)val; + const rb_transcoder *tr; + + if (DECORATOR_P(entry->sname, entry->dname)) + return ST_CONTINUE; + tr = load_transcoder_entry(entry); + if (tr && tr->asciicompat_type == asciicompat_decoder) { + data->ascii_compat_name = tr->dst_encoding; + return ST_STOP; + } + return ST_CONTINUE; +} + +const char * +rb_econv_asciicompat_encoding(const char *ascii_incompat_name) +{ + st_data_t v; + st_table *table2; + struct asciicompat_encoding_t data; + + if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) + return NULL; + table2 = (st_table *)v; + + /* + * Assumption: + * There is at most one transcoder for + * converting from ASCII incompatible encoding. + * + * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others. + */ + if (table2->num_entries != 1) + return NULL; + + data.ascii_incompat_name = ascii_incompat_name; + data.ascii_compat_name = NULL; + st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data); + return data.ascii_compat_name; +} + +VALUE +rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags) +{ + unsigned const char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + int max_output; + + if (NIL_P(dst)) { + dst = rb_str_buf_new(len); + if (ec->destination_encoding) + rb_enc_associate(dst, ec->destination_encoding); + } + + if (ec->last_tc) + max_output = ec->last_tc->transcoder->max_output; + else + max_output = 1; + + res = econv_destination_buffer_full; + while (res == econv_destination_buffer_full) { + long dlen = RSTRING_LEN(dst); + if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) { + unsigned long new_capa = (unsigned long)dlen + len + max_output; + if (LONG_MAX < new_capa) + rb_raise(rb_eArgError, "too long string"); + rb_str_resize(dst, new_capa); + rb_str_set_len(dst, dlen); + } + ss = sp = (const unsigned char *)RSTRING_PTR(src) + off; + se = ss + len; + ds = (unsigned char *)RSTRING_PTR(dst); + de = ds + rb_str_capacity(dst); + dp = ds += dlen; + res = rb_econv_convert(ec, &sp, se, &dp, de, flags); + off += sp - ss; + len -= sp - ss; + rb_str_set_len(dst, dlen + (dp - ds)); + rb_econv_check_error(ec); + } + + return dst; +} + +VALUE +rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags) +{ + return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), dst, flags); +} + +VALUE +rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags) +{ + return rb_econv_substr_append(ec, src, byteoff, bytesize, Qnil, flags); +} + +VALUE +rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags) +{ + return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags); +} + +static int +rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n) +{ + transcoder_entry_t *entry; + const rb_transcoder *tr; + + if (ec->started != 0) + return -1; + + entry = get_transcoder_entry(sname, dname); + if (!entry) + return -1; + + tr = load_transcoder_entry(entry); + + return rb_econv_add_transcoder_at(ec, tr, n); +} + +static int +rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n) +{ + return rb_econv_add_converter(ec, "", decorator_name, n); +} + +int +rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name) +{ + const rb_transcoder *tr; + + if (ec->num_trans == 0) + return rb_econv_decorate_at(ec, decorator_name, 0); + + tr = ec->elems[0].tc->transcoder; + + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) && + tr->asciicompat_type == asciicompat_decoder) + return rb_econv_decorate_at(ec, decorator_name, 1); + + return rb_econv_decorate_at(ec, decorator_name, 0); +} + +int +rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name) +{ + const rb_transcoder *tr; + + if (ec->num_trans == 0) + return rb_econv_decorate_at(ec, decorator_name, 0); + + tr = ec->elems[ec->num_trans-1].tc->transcoder; + + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) && + tr->asciicompat_type == asciicompat_encoder) + return rb_econv_decorate_at(ec, decorator_name, ec->num_trans-1); + + return rb_econv_decorate_at(ec, decorator_name, ec->num_trans); +} + +void +rb_econv_binmode(rb_econv_t *ec) +{ + const rb_transcoder *trs[3]; + int n, i, j; + transcoder_entry_t *entry; + int num_trans; + + n = 0; + if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) { + entry = get_transcoder_entry("", "universal_newline"); + if (entry->transcoder) + trs[n++] = entry->transcoder; + } + if (ec->flags & ECONV_CRLF_NEWLINE_DECORATOR) { + entry = get_transcoder_entry("", "crlf_newline"); + if (entry->transcoder) + trs[n++] = entry->transcoder; + } + if (ec->flags & ECONV_CR_NEWLINE_DECORATOR) { + entry = get_transcoder_entry("", "cr_newline"); + if (entry->transcoder) + trs[n++] = entry->transcoder; + } + + num_trans = ec->num_trans; + j = 0; + for (i = 0; i < num_trans; i++) { + int k; + for (k = 0; k < n; k++) + if (trs[k] == ec->elems[i].tc->transcoder) + break; + if (k == n) { + ec->elems[j] = ec->elems[i]; + j++; + } + else { + rb_transcoding_close(ec->elems[i].tc); + xfree(ec->elems[i].out_buf_start); + ec->num_trans--; + } + } + + ec->flags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR); + +} + +static VALUE +econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg) +{ + int has_description = 0; + + if (NIL_P(mesg)) + mesg = rb_str_new(NULL, 0); + + if (*sname != '\0' || *dname != '\0') { + if (*sname == '\0') + rb_str_cat2(mesg, dname); + else if (*dname == '\0') + rb_str_cat2(mesg, sname); + else + rb_str_catf(mesg, "%s to %s", sname, dname); + has_description = 1; + } + + if (ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR| + ECONV_CRLF_NEWLINE_DECORATOR| + ECONV_CR_NEWLINE_DECORATOR| + ECONV_XML_TEXT_DECORATOR| + ECONV_XML_ATTR_CONTENT_DECORATOR| + ECONV_XML_ATTR_QUOTE_DECORATOR)) { + const char *pre = ""; + if (has_description) + rb_str_cat2(mesg, " with "); + if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "universal_newline"); + } + if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "crlf_newline"); + } + if (ecflags & ECONV_CR_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "cr_newline"); + } + if (ecflags & ECONV_XML_TEXT_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "xml_text"); + } + if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "xml_attr_content"); + } + if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "xml_attr_quote"); + } + has_description = 1; + } + if (!has_description) { + rb_str_cat2(mesg, "no-conversion"); + } + + return mesg; +} + +VALUE +rb_econv_open_exc(const char *sname, const char *dname, int ecflags) +{ + VALUE mesg, exc; + mesg = rb_str_new_cstr("code converter not found ("); + econv_description(sname, dname, ecflags, mesg); + rb_str_cat2(mesg, ")"); + exc = rb_exc_new3(rb_eConverterNotFoundError, mesg); + return exc; +} + +static VALUE +make_econv_exception(rb_econv_t *ec) +{ + VALUE mesg, exc; + if (ec->last_error.result == econv_invalid_byte_sequence || + ec->last_error.result == econv_incomplete_input) { + const char *err = (const char *)ec->last_error.error_bytes_start; + size_t error_len = ec->last_error.error_bytes_len; + VALUE bytes = rb_str_new(err, error_len); + VALUE dumped = rb_str_dump(bytes); + size_t readagain_len = ec->last_error.readagain_len; + VALUE bytes2 = Qnil; + VALUE dumped2; + int idx; + if (ec->last_error.result == econv_incomplete_input) { + mesg = rb_sprintf("incomplete %s on %s", + StringValueCStr(dumped), + ec->last_error.source_encoding); + } + else if (readagain_len) { + bytes2 = rb_str_new(err+error_len, readagain_len); + dumped2 = rb_str_dump(bytes2); + mesg = rb_sprintf("%s followed by %s on %s", + StringValueCStr(dumped), + StringValueCStr(dumped2), + ec->last_error.source_encoding); + } + else { + mesg = rb_sprintf("%s on %s", + StringValueCStr(dumped), + ec->last_error.source_encoding); + } + + exc = rb_exc_new3(rb_eInvalidByteSequenceError, mesg); + rb_ivar_set(exc, rb_intern("error_bytes"), bytes); + rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2); + rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse); + + set_encs: + rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding)); + rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding)); + idx = rb_enc_find_index(ec->last_error.source_encoding); + if (0 <= idx) + rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx))); + idx = rb_enc_find_index(ec->last_error.destination_encoding); + if (0 <= idx) + rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx))); + return exc; + } + if (ec->last_error.result == econv_undefined_conversion) { + VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start, + ec->last_error.error_bytes_len); + VALUE dumped; + int idx; + dumped = rb_str_dump(bytes); + mesg = rb_sprintf("%s from %s to %s", + StringValueCStr(dumped), + ec->last_error.source_encoding, + ec->last_error.destination_encoding); + if (strcmp(ec->last_error.source_encoding, + ec->source_encoding_name) != 0 || + strcmp(ec->last_error.destination_encoding, + ec->destination_encoding_name) != 0) { + rb_str_catf(mesg, " in conversion from %s to %s", + ec->source_encoding_name, + ec->destination_encoding_name); + } + exc = rb_exc_new3(rb_eUndefinedConversionError, mesg); + idx = rb_enc_find_index(ec->last_error.source_encoding); + if (0 <= idx) + rb_enc_associate_index(bytes, idx); + rb_ivar_set(exc, rb_intern("error_char"), bytes); + goto set_encs; + } + return Qnil; +} + +static void +more_output_buffer( + VALUE destination, + unsigned char *(*resize_destination)(VALUE, int, int), + int max_output, + unsigned char **out_start_ptr, + unsigned char **out_pos, + unsigned char **out_stop_ptr) +{ + size_t len = (*out_pos - *out_start_ptr); + size_t new_len = (len + max_output) * 2; + *out_start_ptr = resize_destination(destination, len, new_len); + *out_pos = *out_start_ptr + len; + *out_stop_ptr = *out_start_ptr + new_len; +} + +static int +make_replacement(rb_econv_t *ec) +{ + rb_transcoding *tc; + const rb_transcoder *tr; + rb_encoding *enc; + const unsigned char *replacement; + const char *repl_enc; + const char *ins_enc; + size_t len; + + if (ec->replacement_str) + return 0; + + ins_enc = rb_econv_encoding_to_insert_output(ec); + + tc = ec->last_tc; + if (*ins_enc) { + tr = tc->transcoder; + enc = rb_enc_find(tr->dst_encoding); + replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc); + } + else { + replacement = (unsigned char *)"?"; + len = 1; + repl_enc = ""; + } + + ec->replacement_str = replacement; + ec->replacement_len = len; + ec->replacement_enc = repl_enc; + ec->replacement_allocated = 0; + return 0; +} + +int +rb_econv_set_replacement(rb_econv_t *ec, + const unsigned char *str, size_t len, const char *encname) +{ + unsigned char *str2; + size_t len2; + const char *encname2; + + encname2 = rb_econv_encoding_to_insert_output(ec); + + if (encoding_equal(encname, encname2)) { + str2 = xmalloc(len); + MEMCPY(str2, str, unsigned char, len); /* xxx: str may be invalid */ + len2 = len; + encname2 = encname; + } + else { + str2 = allocate_converted_string(encname, encname2, str, len, NULL, 0, &len2); + if (!str2) + return -1; + } + + if (ec->replacement_allocated) { + xfree((void *)ec->replacement_str); + } + ec->replacement_allocated = 1; + ec->replacement_str = str2; + ec->replacement_len = len2; + ec->replacement_enc = encname2; + return 0; +} + +static int +output_replacement_character(rb_econv_t *ec) +{ + int ret; + + if (make_replacement(ec) == -1) + return -1; + + ret = rb_econv_insert_output(ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc); + if (ret == -1) + return -1; + + return 0; +} + +#if 1 +static void +transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + VALUE destination, + unsigned char *(*resize_destination)(VALUE, int, int), + const char *src_encoding, + const char *dst_encoding, + int ecflags, + VALUE ecopts) +{ + rb_econv_t *ec; + rb_transcoding *last_tc; + rb_econv_result_t ret; + unsigned char *out_start = *out_pos; + int max_output; + VALUE exc; + + ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts); + if (!ec) + rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags)); + + last_tc = ec->last_tc; + max_output = last_tc ? last_tc->transcoder->max_output : 1; + + resume: + ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0); + + if (ret == econv_invalid_byte_sequence || + ret == econv_incomplete_input || + ret == econv_undefined_conversion) { + exc = make_econv_exception(ec); + rb_econv_close(ec); + rb_exc_raise(exc); + } + + if (ret == econv_destination_buffer_full) { + more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop); + goto resume; + } + + rb_econv_close(ec); + return; +} +#else +/* sample transcode_loop implementation in byte-by-byte stream style */ +static void +transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, + const unsigned char *in_stop, unsigned char *out_stop, + VALUE destination, + unsigned char *(*resize_destination)(VALUE, int, int), + const char *src_encoding, + const char *dst_encoding, + int ecflags, + VALUE ecopts) +{ + rb_econv_t *ec; + rb_transcoding *last_tc; + rb_econv_result_t ret; + unsigned char *out_start = *out_pos; + const unsigned char *ptr; + int max_output; + VALUE exc; + + ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts); + if (!ec) + rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags)); + + last_tc = ec->last_tc; + max_output = last_tc ? last_tc->transcoder->max_output : 1; + + ret = econv_source_buffer_empty; + ptr = *in_pos; + while (ret != econv_finished) { + unsigned char input_byte; + const unsigned char *p = &input_byte; + + if (ret == econv_source_buffer_empty) { + if (ptr < in_stop) { + input_byte = *ptr; + ret = rb_econv_convert(ec, &p, p+1, out_pos, out_stop, ECONV_PARTIAL_INPUT); + } + else { + ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, 0); + } + } + else { + ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, ECONV_PARTIAL_INPUT); + } + if (&input_byte != p) + ptr += p - &input_byte; + switch (ret) { + case econv_invalid_byte_sequence: + case econv_incomplete_input: + case econv_undefined_conversion: + exc = make_econv_exception(ec); + rb_econv_close(ec); + rb_exc_raise(exc); + break; + + case econv_destination_buffer_full: + more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop); + break; + + case econv_source_buffer_empty: + break; + + case econv_finished: + break; + } + } + rb_econv_close(ec); + *in_pos = in_stop; + return; +} +#endif + + +/* + * String-specific code + */ + +static unsigned char * +str_transcoding_resize(VALUE destination, int len, int new_len) +{ + rb_str_resize(destination, new_len); + return (unsigned char *)RSTRING_PTR(destination); +} + +static int +econv_opts(VALUE opt) +{ + VALUE v; + int ecflags = 0; + + v = rb_hash_aref(opt, sym_invalid); + if (NIL_P(v)) { + } + else if (v==sym_replace) { + ecflags |= ECONV_INVALID_REPLACE; + } + else { + rb_raise(rb_eArgError, "unknown value for invalid character option"); + } + + v = rb_hash_aref(opt, sym_undef); + if (NIL_P(v)) { + } + else if (v==sym_replace) { + ecflags |= ECONV_UNDEF_REPLACE; + } + else { + rb_raise(rb_eArgError, "unknown value for undefined character option"); + } + + v = rb_hash_aref(opt, sym_xml); + if (!NIL_P(v)) { + if (v==sym_text) { + ecflags |= ECONV_XML_TEXT_DECORATOR|ECONV_UNDEF_HEX_CHARREF; + } + else if (v==sym_attr) { + ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF; + } + else if (TYPE(v) == T_SYMBOL) { + rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v))); + } + else { + rb_raise(rb_eArgError, "unexpected value for xml option"); + } + } + + v = rb_hash_aref(opt, sym_universal_newline); + if (RTEST(v)) + ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + + v = rb_hash_aref(opt, sym_crlf_newline); + if (RTEST(v)) + ecflags |= ECONV_CRLF_NEWLINE_DECORATOR; + + v = rb_hash_aref(opt, sym_cr_newline); + if (RTEST(v)) + ecflags |= ECONV_CR_NEWLINE_DECORATOR; + + return ecflags; +} + +int +rb_econv_prepare_opts(VALUE opthash, VALUE *opts) +{ + int ecflags; + VALUE newhash = Qnil; + VALUE v; + + if (NIL_P(opthash)) { + *opts = Qnil; + return 0; + } + ecflags = econv_opts(opthash); + v = rb_hash_aref(opthash, sym_replace); + if (!NIL_P(v)) { + StringValue(v); + if (rb_enc_str_coderange(v) == ENC_CODERANGE_BROKEN) { + VALUE dumped = rb_str_dump(v); + rb_raise(rb_eArgError, "replacement string is broken: %s as %s", + StringValueCStr(dumped), + rb_enc_name(rb_enc_get(v))); + } + v = rb_str_new_frozen(v); + newhash = rb_hash_new(); + rb_hash_aset(newhash, sym_replace, v); + } + + if (!NIL_P(newhash)) + rb_hash_freeze(newhash); + *opts = newhash; + + return ecflags; +} + +rb_econv_t * +rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash) +{ + rb_econv_t *ec; + VALUE replacement; + + if (NIL_P(opthash)) { + replacement = Qnil; + } + else { + if (TYPE(opthash) != T_HASH || !OBJ_FROZEN(opthash)) + rb_bug("rb_econv_open_opts called with invalid opthash"); + replacement = rb_hash_aref(opthash, sym_replace); + } + + ec = rb_econv_open(source_encoding, destination_encoding, ecflags); + if (!ec) + return ec; + + if (!NIL_P(replacement)) { + int ret; + rb_encoding *enc = rb_enc_get(replacement); + + ret = rb_econv_set_replacement(ec, + (const unsigned char *)RSTRING_PTR(replacement), + RSTRING_LEN(replacement), + rb_enc_name(enc)); + if (ret == -1) { + rb_econv_close(ec); + return NULL; + } + } + return ec; +} + +static int +enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p) +{ + rb_encoding *enc; + const char *n; + int encidx; + VALUE encval; + + if (((encidx = rb_to_encoding_index(encval = *arg)) < 0) || + !(enc = rb_enc_from_index(encidx))) { + enc = NULL; + encidx = 0; + n = StringValueCStr(*arg); + } + else { + n = rb_enc_name(enc); + } + + *name_p = n; + *enc_p = enc; + + return encidx; +} + +static int +str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2, + const char **sname_p, rb_encoding **senc_p, + const char **dname_p, rb_encoding **denc_p) +{ + rb_encoding *senc, *denc; + const char *sname, *dname; + int sencidx, dencidx; + + dencidx = enc_arg(arg1, &dname, &denc); + + if (NIL_P(*arg2)) { + sencidx = rb_enc_get_index(str); + senc = rb_enc_from_index(sencidx); + sname = rb_enc_name(senc); + } + else { + sencidx = enc_arg(arg2, &sname, &senc); + } + + *sname_p = sname; + *senc_p = senc; + *dname_p = dname; + *denc_p = denc; + return dencidx; +} + +static int +str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts) +{ + VALUE dest; + VALUE str = *self; + volatile VALUE arg1, arg2; + long blen, slen; + unsigned char *buf, *bp, *sp; + const unsigned char *fromp; + rb_encoding *senc, *denc; + const char *sname, *dname; + int dencidx; + + if (argc <0 || argc > 2) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0..2)", argc); + } + + if (argc == 0) { + arg1 = rb_enc_default_internal(); + if (NIL_P(arg1)) { + return -1; + } + ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE; + } + else { + arg1 = argv[0]; + } + arg2 = argc<=1 ? Qnil : argv[1]; + dencidx = str_transcode_enc_args(str, &arg1, &arg2, &sname, &senc, &dname, &denc); + + if ((ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR| + ECONV_CRLF_NEWLINE_DECORATOR| + ECONV_CR_NEWLINE_DECORATOR| + ECONV_XML_TEXT_DECORATOR| + ECONV_XML_ATTR_CONTENT_DECORATOR| + ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) { + if (senc && senc == denc) { + return NIL_P(arg2) ? -1 : dencidx; + } + if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) { + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { + return dencidx; + } + } + if (encoding_equal(sname, dname)) { + return NIL_P(arg2) ? -1 : dencidx; + } + } + else { + if (encoding_equal(sname, dname)) { + sname = ""; + dname = ""; + } + } + + fromp = sp = (unsigned char *)RSTRING_PTR(str); + slen = RSTRING_LEN(str); + blen = slen + 30; /* len + margin */ + dest = rb_str_tmp_new(blen); + bp = (unsigned char *)RSTRING_PTR(dest); + + transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts); + if (fromp != sp+slen) { + rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp); + } + buf = (unsigned char *)RSTRING_PTR(dest); + *bp = '\0'; + rb_str_set_len(dest, bp - buf); + + /* set encoding */ + if (!denc) { + dencidx = rb_define_dummy_encoding(dname); + } + *self = dest; + + return dencidx; +} + +static int +str_transcode(int argc, VALUE *argv, VALUE *self) +{ + VALUE opt; + int ecflags = 0; + VALUE ecopts = Qnil; + + if (0 < argc) { + opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash"); + if (!NIL_P(opt)) { + argc--; + ecflags = rb_econv_prepare_opts(opt, &ecopts); + } + } + return str_transcode0(argc, argv, self, ecflags, ecopts); +} + +static inline VALUE +str_encode_associate(VALUE str, int encidx) +{ + int cr = 0; + + rb_enc_associate_index(str, encidx); + + /* transcoded string never be broken. */ + if (rb_enc_asciicompat(rb_enc_from_index(encidx))) { + rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr); + } + else { + cr = ENC_CODERANGE_VALID; + } + ENC_CODERANGE_SET(str, cr); + return str; +} + +/* + * call-seq: + * str.encode!(encoding [, options] ) => str + * str.encode!(dst_encoding, src_encoding [, options] ) => str + * + * The first form transcodes the contents of str from + * str.encoding to +encoding+. + * The second form transcodes the contents of str from + * src_encoding to dst_encoding. + * The options Hash gives details for conversion. See String#encode + * for details. + * Returns the string even if no changes were made. + */ + +static VALUE +str_encode_bang(int argc, VALUE *argv, VALUE str) +{ + VALUE newstr = str; + int encidx = str_transcode(argc, argv, &newstr); + + if (encidx < 0) return str; + rb_str_shared_replace(str, newstr); + return str_encode_associate(str, encidx); +} + +/* + * call-seq: + * str.encode(encoding [, options] ) => str + * str.encode(dst_encoding, src_encoding [, options] ) => str + * str.encode([options]) => str + * + * The first form returns a copy of str transcoded + * to encoding +encoding+. + * The second form returns a copy of str transcoded + * from src_encoding to dst_encoding. + * The last form returns a copy of str transcoded to + * Encoding.default_internal. + * By default, the first and second form raise + * Encoding::UndefinedConversionError for characters that are + * undefined in the destination encoding, and + * Encoding::InvalidByteSequenceError for invalid byte sequences + * in the source encoding. The last form by default does not raise + * exceptions but uses replacement strings. + * The options Hash gives details for conversion. + * + * === options + * The hash options can have the following keys: + * :invalid :: + * If the value is :replace, #encode replaces + * invalid byte sequences in str with the replacement character. + * The default is to raise the exception + * :undef :: + * If the value is :replace, #encode replaces + * characters which are undefined in the destination encoding with + * the replacement character. + * :replace :: + * Sets the replacement string to the value. The default replacement + * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise. + * :xml :: + * The value must be :text or :attr. + * If the value is :text #encode replaces + * undefined characters with their (upper-case hexadecimal) numeric + * character references. '&', '<', and '>' are converted to "&", + * "<", and ">", respectively. + * If the value is :attr, #encode also quotes + * the replacement result (using '"'), and replaces '"' with """. + * :cr_newline :: + * Replaces LF ("\n") with CR ("\r") if value is true. + * :crlf_newline :: + * Replaces LF ("\n") with CRLF ("\r\n") if value is true. + * :universal_newline :: + * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true. + */ + +static VALUE +str_encode(int argc, VALUE *argv, VALUE str) +{ + VALUE newstr = str; + int encidx = str_transcode(argc, argv, &newstr); + + if (encidx < 0) return rb_str_dup(str); + if (newstr == str) { + newstr = rb_str_dup(str); + } + else { + RBASIC(newstr)->klass = rb_obj_class(str); + } + return str_encode_associate(newstr, encidx); +} + +VALUE +rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts) +{ + int argc = 1; + VALUE *argv = &to; + VALUE newstr = str; + int encidx = str_transcode0(argc, argv, &newstr, ecflags, ecopts); + + if (encidx < 0) return rb_str_dup(str); + RBASIC(newstr)->klass = rb_obj_class(str); + return str_encode_associate(newstr, encidx); +} + +static void +econv_free(rb_econv_t *ec) +{ + rb_econv_close(ec); +} + +static VALUE +econv_s_allocate(VALUE klass) +{ + return Data_Wrap_Struct(klass, NULL, econv_free, NULL); +} + +static rb_encoding * +make_dummy_encoding(const char *name) +{ + rb_encoding *enc; + int idx; + idx = rb_define_dummy_encoding(name); + enc = rb_enc_from_index(idx); + return enc; +} + +static rb_encoding * +make_encoding(const char *name) +{ + rb_encoding *enc; + enc = rb_enc_find(name); + if (!enc) + enc = make_dummy_encoding(name); + return enc; +} + +static VALUE +make_encobj(const char *name) +{ + return rb_enc_from_encoding(make_encoding(name)); +} + +/* + * call-seq: + * Encoding::Converter.asciicompat_encoding(string) => encoding or nil + * Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil + * + * Returns the corresponding ASCII compatible encoding. + * + * Returns nil if the argument is an ASCII compatible encoding. + * + * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which + * can represents exactly the same characters as the given ASCII incompatible encoding. + * So, no conversion undefined error occurs when converting between the two encodings. + * + * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> # + * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> # + * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil + * + */ +static VALUE +econv_s_asciicompat_encoding(VALUE klass, VALUE arg) +{ + const char *arg_name, *result_name; + rb_encoding *arg_enc, *result_enc; + + enc_arg(&arg, &arg_name, &arg_enc); + + result_name = rb_econv_asciicompat_encoding(arg_name); + + if (result_name == NULL) + return Qnil; + + result_enc = make_encoding(result_name); + + return rb_enc_from_encoding(result_enc); +} + +static void +econv_args(int argc, VALUE *argv, + volatile VALUE *snamev_p, volatile VALUE *dnamev_p, + const char **sname_p, const char **dname_p, + rb_encoding **senc_p, rb_encoding **denc_p, + int *ecflags_p, + VALUE *ecopts_p) +{ + VALUE opt, opthash, flags_v, ecopts; + int sidx, didx; + const char *sname, *dname; + rb_encoding *senc, *denc; + int ecflags; + + rb_scan_args(argc, argv, "21", snamev_p, dnamev_p, &opt); + + if (NIL_P(opt)) { + ecflags = 0; + ecopts = Qnil; + } + else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) { + ecflags = NUM2INT(flags_v); + ecopts = Qnil; + } + else { + opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); + ecflags = rb_econv_prepare_opts(opthash, &ecopts); + } + + senc = NULL; + sidx = rb_to_encoding_index(*snamev_p); + if (0 <= sidx) { + senc = rb_enc_from_index(sidx); + } + else { + StringValue(*snamev_p); + } + + denc = NULL; + didx = rb_to_encoding_index(*dnamev_p); + if (0 <= didx) { + denc = rb_enc_from_index(didx); + } + else { + StringValue(*dnamev_p); + } + + sname = senc ? rb_enc_name(senc) : StringValueCStr(*snamev_p); + dname = denc ? rb_enc_name(denc) : StringValueCStr(*dnamev_p); + + *sname_p = sname; + *dname_p = dname; + *senc_p = senc; + *denc_p = denc; + *ecflags_p = ecflags; + *ecopts_p = ecopts; +} + +static int +decorate_convpath(VALUE convpath, int ecflags) +{ + int num_decorators; + const char *decorators[MAX_ECFLAGS_DECORATORS]; + int i; + int n, len; + + num_decorators = decorator_names(ecflags, decorators); + if (num_decorators == -1) + return -1; + + len = n = RARRAY_LEN(convpath); + if (n != 0) { + VALUE pair = RARRAY_PTR(convpath)[n-1]; + if (TYPE(pair) == T_ARRAY) { + const char *sname = rb_enc_name(rb_to_encoding(RARRAY_PTR(pair)[0])); + const char *dname = rb_enc_name(rb_to_encoding(RARRAY_PTR(pair)[1])); + transcoder_entry_t *entry = get_transcoder_entry(sname, dname); + const rb_transcoder *tr = load_transcoder_entry(entry); + if (!tr) + return -1; + if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) && + tr->asciicompat_type == asciicompat_encoder) { + n--; + rb_ary_store(convpath, len + num_decorators - 1, pair); + } + } + else { + rb_ary_store(convpath, len + num_decorators - 1, pair); + } + } + + for (i = 0; i < num_decorators; i++) + rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i])); + + return 0; +} + +static void +search_convpath_i(const char *sname, const char *dname, int depth, void *arg) +{ + VALUE *ary_p = arg; + VALUE v; + + if (*ary_p == Qnil) { + *ary_p = rb_ary_new(); + } + + if (DECORATOR_P(sname, dname)) { + v = rb_str_new_cstr(dname); + } + else { + v = rb_assoc_new(make_encobj(sname), make_encobj(dname)); + } + rb_ary_store(*ary_p, depth, v); +} + +/* + * call-seq: + * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary + * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary + * + * Returns a conversion path. + * + * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP") + * #=> [[#, #], + * # [#, #]] + * + * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true) + * #=> [[#, #], + * # [#, #], + * # "universal_newline"] + * + * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true) + * #=> [[#, #], + * # "universal_newline", + * # [#, #]] + */ +static VALUE +econv_s_search_convpath(int argc, VALUE *argv, VALUE klass) +{ + volatile VALUE snamev, dnamev; + const char *sname, *dname; + rb_encoding *senc, *denc; + int ecflags; + VALUE ecopts; + VALUE convpath; + + econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts); + + convpath = Qnil; + transcode_search_path(sname, dname, search_convpath_i, &convpath); + + if (NIL_P(convpath)) + rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags)); + + if (decorate_convpath(convpath, ecflags) == -1) + rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags)); + + return convpath; +} + +/* + * Check the existence of a conversion path. + * Returns the number of converters in the conversion path. + * result: >=0:success -1:failure + */ +int +rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding) +{ + VALUE convpath = Qnil; + transcode_search_path(from_encoding, to_encoding, search_convpath_i, + &convpath); + return RTEST(convpath); +} + +struct rb_econv_init_by_convpath_t { + rb_econv_t *ec; + int index; + int ret; +}; + +void rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg) +{ + struct rb_econv_init_by_convpath_t *a = (struct rb_econv_init_by_convpath_t *)arg; + int ret; + + if (a->ret == -1) + return; + + ret = rb_econv_add_converter(a->ec, sname, dname, a->index); + + a->ret = ret; + return; +} + +static rb_econv_t * +rb_econv_init_by_convpath(VALUE self, VALUE convpath, + const char **sname_p, const char **dname_p, + rb_encoding **senc_p, rb_encoding**denc_p) +{ + rb_econv_t *ec; + long i; + int ret, first=1; + VALUE elt; + rb_encoding *senc = 0, *denc = 0; + const char *sname, *dname; + + ec = rb_econv_alloc(RARRAY_LEN(convpath)); + DATA_PTR(self) = ec; + + for (i = 0; i < RARRAY_LEN(convpath); i++) { + volatile VALUE snamev, dnamev; + VALUE pair; + elt = rb_ary_entry(convpath, i); + if (!NIL_P(pair = rb_check_array_type(elt))) { + if (RARRAY_LEN(pair) != 2) + rb_raise(rb_eArgError, "not a 2-element array in convpath"); + snamev = rb_ary_entry(pair, 0); + enc_arg(&snamev, &sname, &senc); + dnamev = rb_ary_entry(pair, 1); + enc_arg(&dnamev, &dname, &denc); + } + else { + sname = ""; + dname = StringValueCStr(elt); + } + if (DECORATOR_P(sname, dname)) { + ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans); + if (ret == -1) + rb_raise(rb_eArgError, "decoration failed: %s", dname); + } + else { + int j = ec->num_trans; + struct rb_econv_init_by_convpath_t arg; + arg.ec = ec; + arg.index = ec->num_trans; + arg.ret = 0; + ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg); + if (ret == -1 || arg.ret == -1) + rb_raise(rb_eArgError, "adding conversion failed: %s to %s", sname, dname); + if (first) { + first = 0; + *senc_p = senc; + *sname_p = ec->elems[j].tc->transcoder->src_encoding; + } + *denc_p = denc; + *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding; + } + } + + if (first) { + *senc_p = NULL; + *denc_p = NULL; + *sname_p = ""; + *dname_p = ""; + } + + ec->source_encoding_name = *sname_p; + ec->destination_encoding_name = *dname_p; + + return ec; +} + +/* + * call-seq: + * Encoding::Converter.new(source_encoding, destination_encoding) + * Encoding::Converter.new(source_encoding, destination_encoding, opt) + * Encoding::Converter.new(convpath) + * + * possible options elements: + * hash form: + * :invalid => nil # raise error on invalid byte sequence (default) + * :invalid => :replace # replace invalid byte sequence + * :undef => nil # raise error on undefined conversion (default) + * :undef => :replace # replace undefined conversion + * :replace => string # replacement string ("?" or "\uFFFD" if not specified) + * :universal_newline => true # decorator for converting CRLF and CR to LF + * :crlf_newline => true # decorator for converting LF to CRLF + * :cr_newline => true # decorator for converting LF to CR + * :xml => :text # escape as XML CharData. + * :xml => :attr # escape as XML AttValue + * integer form: + * Encoding::Converter::INVALID_REPLACE + * Encoding::Converter::UNDEF_REPLACE + * Encoding::Converter::UNDEF_HEX_CHARREF + * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR + * Encoding::Converter::CRLF_NEWLINE_DECORATOR + * Encoding::Converter::CR_NEWLINE_DECORATOR + * Encoding::Converter::XML_TEXT_DECORATOR + * Encoding::Converter::XML_ATTR_CONTENT_DECORATOR + * Encoding::Converter::XML_ATTR_QUOTE_DECORATOR + * + * Encoding::Converter.new creates an instance of Encoding::Converter. + * + * Source_encoding and destination_encoding should be a string or + * Encoding object. + * + * opt should be nil, a hash or an integer. + * + * convpath should be an array. + * convpath may contain + * - two-element arrays which contain encodings or encoding names, or + * - strings representing decorator names. + * + * Encoding::Converter.new optionally takes an option. + * The option should be a hash or an integer. + * The option hash can contain :invalid => nil, etc. + * The option integer should be logical-or of constants such as + * Encoding::Converter::INVALID_REPLACE, etc. + * + * [:invalid => nil] + * Raise error on invalid byte sequence. This is a default behavior. + * [:invalid => :replace] + * Replace invalid byte sequence by replacement string. + * [:undef => nil] + * Raise an error if a character in source_encoding is not defined in destination_encoding. + * This is a default behavior. + * [:undef => :replace] + * Replace undefined character in destination_encoding with replacement string. + * [:replace => string] + * Specify the replacement string. + * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others. + * [:universal_newline => true] + * Convert CRLF and CR to LF. + * [:crlf_newline => true] + * Convert LF to CRLF. + * [:cr_newline => true] + * Convert LF to CR. + * [:xml => :text] + * Escape as XML CharData. + * This form can be used as a HTML 4.0 #PCDATA. + * - '&' -> '&' + * - '<' -> '<' + * - '>' -> '>' + * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH; + * [:xml => :attr] + * Escape as XML AttValue. + * The converted result is quoted as "...". + * This form can be used as a HTML 4.0 attribute value. + * - '&' -> '&' + * - '<' -> '<' + * - '>' -> '>' + * - '"' -> '"' + * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH; + * + * Examples: + * # UTF-16BE to UTF-8 + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8") + * + * # Usually, decorators such as newline conversion are inserted last. + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true) + * p ec.convpath #=> [[#, #], + * # "universal_newline"] + * + * # But, if the last encoding is ASCII incompatible, + * # decorators are inserted before the last conversion. + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true) + * p ec.convpath #=> ["crlf_newline", + * # [#, #]] + * + * # Conversion path can be specified directly. + * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]]) + * p ec.convpath #=> ["universal_newline", + * # [#, #], + * # [#, #]] + */ +static VALUE +econv_init(int argc, VALUE *argv, VALUE self) +{ + VALUE ecopts; + volatile VALUE snamev, dnamev; + const char *sname, *dname; + rb_encoding *senc, *denc; + rb_econv_t *ec; + int ecflags; + VALUE convpath; + + if (DATA_PTR(self)) { + rb_raise(rb_eTypeError, "already initialized"); + } + + if (argc == 1 && !NIL_P(convpath = rb_check_array_type(argv[0]))) { + ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc); + ecflags = 0; + ecopts = Qnil; + } + else { + econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts); + ec = rb_econv_open_opts(sname, dname, ecflags, ecopts); + } + + if (!ec) { + rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags)); + } + + if (!DECORATOR_P(sname, dname)) { + if (!senc) + senc = make_dummy_encoding(sname); + if (!denc) + denc = make_dummy_encoding(dname); + } + + ec->source_encoding = senc; + ec->destination_encoding = denc; + + DATA_PTR(self) = ec; + + return self; +} + +/* + * call-seq: + * ec.inspect -> string + * + * Returns a printable version of ec + * + * ec = Encoding::Converter.new("iso-8859-1", "utf-8") + * puts ec.inspect #=> # + * + */ +static VALUE +econv_inspect(VALUE self) +{ + const char *cname = rb_obj_classname(self); + rb_econv_t *ec = DATA_PTR(self); + + if (!ec) + return rb_sprintf("#<%s: uninitialized>", cname); + else { + const char *sname = ec->source_encoding_name; + const char *dname = ec->destination_encoding_name; + VALUE str; + str = rb_sprintf("#<%s: ", cname); + econv_description(sname, dname, ec->flags, str); + rb_str_cat2(str, ">"); + return str; + } +} + +#define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free) + +static rb_econv_t * +check_econv(VALUE self) +{ + Check_Type(self, T_DATA); + if (!IS_ECONV(self)) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding::Converter)", + rb_class2name(CLASS_OF(self))); + } + if (!DATA_PTR(self)) { + rb_raise(rb_eTypeError, "uninitialized encoding converter"); + } + return DATA_PTR(self); +} + +/* + * call-seq: + * ec.source_encoding -> encoding + * + * Returns the source encoding as an Encoding object. + */ +static VALUE +econv_source_encoding(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + if (!ec->source_encoding) + return Qnil; + return rb_enc_from_encoding(ec->source_encoding); +} + +/* + * call-seq: + * ec.destination_encoding -> encoding + * + * Returns the destination encoding as an Encoding object. + */ +static VALUE +econv_destination_encoding(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + if (!ec->destination_encoding) + return Qnil; + return rb_enc_from_encoding(ec->destination_encoding); +} + +/* + * call-seq: + * ec.convpath -> ary + * + * Returns the conversion path of ec. + * + * The result is an array of conversions. + * + * ec = Encoding::Converter.new("ISo-8859-1", "EUC-JP", crlf_newline: true) + * p ec.convpath + * #=> [[#, #], + * # [#, #], + * # "crlf_newline"] + * + * Each element of the array is a pair of encodings or a string. + * A pair means an encoding conversion. + * A string means a decorator. + * + * In the above example, [#, #] means + * a converter from ISO-8859-1 to UTF-8. + * "crlf_newline" means newline converter from LF to CRLF. + */ +static VALUE +econv_convpath(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + VALUE result; + int i; + + result = rb_ary_new(); + for (i = 0; i < ec->num_trans; i++) { + const rb_transcoder *tr = ec->elems[i].tc->transcoder; + VALUE v; + if (DECORATOR_P(tr->src_encoding, tr->dst_encoding)) + v = rb_str_new_cstr(tr->dst_encoding); + else + v = rb_assoc_new(make_encobj(tr->src_encoding), make_encobj(tr->dst_encoding)); + rb_ary_push(result, v); + } + return result; +} + +static VALUE +econv_result_to_symbol(rb_econv_result_t res) +{ + switch (res) { + case econv_invalid_byte_sequence: return sym_invalid_byte_sequence; + case econv_incomplete_input: return sym_incomplete_input; + case econv_undefined_conversion: return sym_undefined_conversion; + case econv_destination_buffer_full: return sym_destination_buffer_full; + case econv_source_buffer_empty: return sym_source_buffer_empty; + case econv_finished: return sym_finished; + case econv_after_output: return sym_after_output; + default: return INT2NUM(res); /* should not be reached */ + } +} + +/* + * call-seq: + * ec.primitive_convert(source_buffer, destination_buffer) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol + * + * possible opt elements: + * hash form: + * :partial_input => true # source buffer may be part of larger source + * :after_output => true # stop conversion after output before input + * integer form: + * Encoding::Converter::PARTIAL_INPUT + * Encoding::Converter::AFTER_OUTPUT + * + * possible results: + * :invalid_byte_sequence + * :incomplete_input + * :undefined_conversion + * :after_output + * :destination_buffer_full + * :source_buffer_empty + * :finished + * + * primitive_convert converts source_buffer into destination_buffer. + * + * source_buffer should be a string or nil. + * nil means a empty string. + * + * destination_buffer should be a string. + * + * destination_byteoffset should be an integer or nil. + * nil means the end of destination_buffer. + * If it is omitted, nil is assumed. + * + * destination_bytesize should be an integer or nil. + * nil means unlimited. + * If it is omitted, nil is assumed. + * + * opt should be nil, a hash or an integer. + * nil means no flags. + * If it is omitted, nil is assumed. + * + * primitive_convert converts the content of source_buffer from beginning + * and store the result into destination_buffer. + * + * destination_byteoffset and destination_bytesize specify the region which + * the converted result is stored. + * destination_byteoffset specifies the start position in destination_buffer in bytes. + * If destination_byteoffset is nil, + * destination_buffer.bytesize is used for appending the result. + * destination_bytesize specifies maximum number of bytes. + * If destination_bytesize is nil, + * destination size is unlimited. + * After conversion, destination_buffer is resized to + * destination_byteoffset + actually produced number of bytes. + * Also destination_buffer's encoding is set to destination_encoding. + * + * primitive_convert drops the converted part of source_buffer. + * the dropped part is converted in destination_buffer or + * buffered in Encoding::Converter object. + * + * primitive_convert stops conversion when one of following condition met. + * - invalid byte sequence found in source buffer (:invalid_byte_sequence) + * - unexpected end of source buffer (:incomplete_input) + * this occur only when :partial_input is not specified. + * - character not representable in output encoding (:undefined_conversion) + * - after some output is generated, before input is done (:after_output) + * this occur only when :after_output is specified. + * - destination buffer is full (:destination_buffer_full) + * this occur only when destination_bytesize is non-nil. + * - source buffer is empty (:source_buffer_empty) + * this occur only when :partial_input is specified. + * - conversion is finished (:finished) + * + * example: + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE") + * ret = ec.primitive_convert(src="pi", dst="", nil, 100) + * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"] + * + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE") + * ret = ec.primitive_convert(src="pi", dst="", nil, 1) + * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"] + * ret = ec.primitive_convert(src, dst="", nil, 1) + * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"] + * ret = ec.primitive_convert(src, dst="", nil, 1) + * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"] + * ret = ec.primitive_convert(src, dst="", nil, 1) + * p [ret, src, dst] #=> [:finished, "", "i"] + * + */ +static VALUE +econv_primitive_convert(int argc, VALUE *argv, VALUE self) +{ + VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v; + rb_econv_t *ec = check_econv(self); + rb_econv_result_t res; + const unsigned char *ip, *is; + unsigned char *op, *os; + long output_byteoffset, output_bytesize; + unsigned long output_byteend; + int flags; + + rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt); + + if (NIL_P(output_byteoffset_v)) + output_byteoffset = 0; /* dummy */ + else + output_byteoffset = NUM2LONG(output_byteoffset_v); + + if (NIL_P(output_bytesize_v)) + output_bytesize = 0; /* dummy */ + else + output_bytesize = NUM2LONG(output_bytesize_v); + + if (NIL_P(opt)) { + flags = 0; + } + else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) { + flags = NUM2INT(flags_v); + } + else { + VALUE v; + opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); + flags = 0; + v = rb_hash_aref(opt, sym_partial_input); + if (RTEST(v)) + flags |= ECONV_PARTIAL_INPUT; + v = rb_hash_aref(opt, sym_after_output); + if (RTEST(v)) + flags |= ECONV_AFTER_OUTPUT; + } + + StringValue(output); + if (!NIL_P(input)) + StringValue(input); + rb_str_modify(output); + + if (NIL_P(output_bytesize_v)) { + output_bytesize = RSTRING_EMBED_LEN_MAX; + if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input)) + output_bytesize = RSTRING_LEN(input); + } + + retry: + + if (NIL_P(output_byteoffset_v)) + output_byteoffset = RSTRING_LEN(output); + + if (output_byteoffset < 0) + rb_raise(rb_eArgError, "negative output_byteoffset"); + + if (RSTRING_LEN(output) < output_byteoffset) + rb_raise(rb_eArgError, "output_byteoffset too big"); + + if (output_bytesize < 0) + rb_raise(rb_eArgError, "negative output_bytesize"); + + output_byteend = (unsigned long)output_byteoffset + + (unsigned long)output_bytesize; + + if (output_byteend < (unsigned long)output_byteoffset || + LONG_MAX < output_byteend) + rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big"); + + if (rb_str_capacity(output) < output_byteend) + rb_str_resize(output, output_byteend); + + if (NIL_P(input)) { + ip = is = NULL; + } + else { + ip = (const unsigned char *)RSTRING_PTR(input); + is = ip + RSTRING_LEN(input); + } + + op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset; + os = op + output_bytesize; + + res = rb_econv_convert(ec, &ip, is, &op, os, flags); + rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output)); + if (!NIL_P(input)) + rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input)); + + if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) { + if (LONG_MAX / 2 < output_bytesize) + rb_raise(rb_eArgError, "too long conversion result"); + output_bytesize *= 2; + output_byteoffset_v = Qnil; + goto retry; + } + + if (ec->destination_encoding) { + rb_enc_associate(output, ec->destination_encoding); + } + + return econv_result_to_symbol(res); +} + +/* + * call-seq: + * ec.convert(source_string) -> destination_string + * + * Convert source_string and return destination_string. + * + * source_string is assumed as a part of source. + * i.e. :partial_input=>true is specified internally. + * finish method should be used last. + * + * ec = Encoding::Converter.new("utf-8", "euc-jp") + * puts ec.convert("\u3042").dump #=> "\xA4\xA2" + * puts ec.finish.dump #=> "" + * + * ec = Encoding::Converter.new("euc-jp", "utf-8") + * puts ec.convert("\xA4").dump #=> "" + * puts ec.convert("\xA2").dump #=> "\xE3\x81\x82" + * puts ec.finish.dump #=> "" + * + * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + * puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP") + * puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP") + * puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP") + * puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP") + * + * If a conversion error occur, + * Encoding::UndefinedConversionError or + * Encoding::InvalidByteSequenceError is raised. + * Encoding::Converter#convert doesn't supply methods to recover or restart + * from these exceptions. + * When you want to handle these conversion errors, + * use Encoding::Converter#primitive_convert. + * + */ +static VALUE +econv_convert(VALUE self, VALUE source_string) +{ + VALUE ret, dst; + VALUE av[5]; + int ac; + rb_econv_t *ec = check_econv(self); + + StringValue(source_string); + + dst = rb_str_new(NULL, 0); + + av[0] = rb_str_dup(source_string); + av[1] = dst; + av[2] = Qnil; + av[3] = Qnil; + av[4] = INT2NUM(ECONV_PARTIAL_INPUT); + ac = 5; + + ret = econv_primitive_convert(ac, av, self); + + if (ret == sym_invalid_byte_sequence || + ret == sym_undefined_conversion || + ret == sym_incomplete_input) { + VALUE exc = make_econv_exception(ec); + rb_exc_raise(exc); + } + + if (ret == sym_finished) { + rb_raise(rb_eArgError, "converter already finished"); + } + + if (ret != sym_source_buffer_empty) { + rb_bug("unexpected result of econv_primitive_convert"); + } + + return dst; +} + +/* + * call-seq: + * ec.finish -> string + * + * Finishes the converter. + * It returns the last part of the converted string. + * + * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + * p ec.convert("\u3042") #=> "\e$B$\"" + * p ec.finish #=> "\e(B" + */ +static VALUE +econv_finish(VALUE self) +{ + VALUE ret, dst; + VALUE av[5]; + int ac; + rb_econv_t *ec = check_econv(self); + + dst = rb_str_new(NULL, 0); + + av[0] = Qnil; + av[1] = dst; + av[2] = Qnil; + av[3] = Qnil; + av[4] = INT2NUM(0); + ac = 5; + + ret = econv_primitive_convert(ac, av, self); + + if (ret == sym_invalid_byte_sequence || + ret == sym_undefined_conversion || + ret == sym_incomplete_input) { + VALUE exc = make_econv_exception(ec); + rb_exc_raise(exc); + } + + if (ret != sym_finished) { + rb_bug("unexpected result of econv_primitive_convert"); + } + + return dst; +} + +/* + * call-seq: + * ec.primitive_errinfo -> array + * + * primitive_errinfo returns important information regarding the last error + * as a 5-element array: + * + * [result, enc1, enc2, error_bytes, readagain_bytes] + * + * result is the last result of primitive_convert. + * + * Other elements are only meaningful when result is + * :invalid_byte_sequence, :incomplete_input or :undefined_conversion. + * + * enc1 and enc2 indicate a conversion step as a pair of strings. + * For example, a converter from EUC-JP to ISO-8859-1 converts + * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1. + * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"]. + * + * error_bytes and readagain_bytes indicate the byte sequences which caused the error. + * error_bytes is discarded portion. + * readagain_bytes is buffered portion which is read again on next conversion. + * + * Example: + * + * # \xff is invalid as EUC-JP. + * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS") + * ec.primitive_convert(src="\xff", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""] + * + * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1. + * # Since this error is occur in UTF-8 to ISO-8859-1 conversion, + * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82). + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""] + * + * # partial character is invalid + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * ec.primitive_convert(src="\xa4", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""] + * + * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by + * # partial characters. + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT) + * p ec.primitive_errinfo + * #=> [:source_buffer_empty, nil, nil, nil, nil] + * + * # \xd8\x00\x00@ is invalid as UTF-16BE because + * # no low surrogate after high surrogate (\xd8\x00). + * # It is detected by 3rd byte (\00) which is part of next character. + * # So the high surrogate (\xd8\x00) is discarded and + * # the 3rd byte is read again later. + * # Since the byte is buffered in ec, it is dropped from src. + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8") + * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"] + * p src + * #=> "@" + * + * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE. + * # The problem is detected by 4th byte. + * ec = Encoding::Converter.new("UTF-16LE", "UTF-8") + * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10) + * p ec.primitive_errinfo + * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"] + * p src + * #=> "" + * + */ +static VALUE +econv_primitive_errinfo(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + + VALUE ary; + + ary = rb_ary_new2(5); + + rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result)); + rb_ary_store(ary, 4, Qnil); + + if (ec->last_error.source_encoding) + rb_ary_store(ary, 1, rb_str_new2(ec->last_error.source_encoding)); + + if (ec->last_error.destination_encoding) + rb_ary_store(ary, 2, rb_str_new2(ec->last_error.destination_encoding)); + + if (ec->last_error.error_bytes_start) { + rb_ary_store(ary, 3, rb_str_new((const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len)); + rb_ary_store(ary, 4, rb_str_new((const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len)); + } + + return ary; +} + +/* + * call-seq: + * ec.insert_output(string) -> nil + * + * Inserts string into the encoding converter. + * The string will be converted to the destination encoding and + * output on later conversions. + * + * If the destination encoding is stateful, + * string is converted according to the state and the state is updated. + * + * This method should be used only when a conversion error occurs. + * + * ec = Encoding::Converter.new("utf-8", "iso-8859-1") + * src = "HIRAGANA LETTER A is \u{3042}." + * dst = "" + * p ec.primitive_convert(src, dst) #=> :undefined_conversion + * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."] + * ec.insert_output("") + * p ec.primitive_convert(src, dst) #=> :finished + * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is .", ""] + * + * ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + * src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp + * dst = "" + * p ec.primitive_convert(src, dst) #=> :undefined_conversion + * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"] + * ec.insert_output "?" # state change required to output "?". + * p ec.primitive_convert(src, dst) #=> :finished + * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""] + * + */ +static VALUE +econv_insert_output(VALUE self, VALUE string) +{ + const char *insert_enc; + + int ret; + + rb_econv_t *ec = check_econv(self); + + StringValue(string); + insert_enc = rb_econv_encoding_to_insert_output(ec); + string = rb_str_encode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0, Qnil); + + ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc); + if (ret == -1) { + rb_raise(rb_eArgError, "too big string"); + } + + return Qnil; +} + +/* + * call-seq + * ec.putback => string + * ec.putback(max_numbytes) => string + * + * Put back the bytes which will be converted. + * + * The bytes are caused by invalid_byte_sequence error. + * When invalid_byte_sequence error, some bytes are discarded and + * some bytes are buffered to be converted later. + * The latter bytes can be put back. + * It can be observed by + * Encoding::InvalidByteSequenceError#readagain_bytes and + * Encoding::Converter#primitive_errinfo. + * + * ec = Encoding::Converter.new("utf-16le", "iso-8859-1") + * src = "\x00\xd8\x61\x00" + * dst = "" + * p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence + * p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"] + * p ec.putback #=> "a\x00" + * p ec.putback #=> "" # no more bytes to put back + * + */ +static VALUE +econv_putback(int argc, VALUE *argv, VALUE self) +{ + rb_econv_t *ec = check_econv(self); + int n; + int putbackable; + VALUE str, max; + + rb_scan_args(argc, argv, "01", &max); + + if (NIL_P(max)) + n = rb_econv_putbackable(ec); + else { + n = NUM2INT(max); + putbackable = rb_econv_putbackable(ec); + if (putbackable < n) + n = putbackable; + } + + str = rb_str_new(NULL, n); + rb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n); + + if (ec->source_encoding) { + rb_enc_associate(str, ec->source_encoding); + } + + return str; +} + +/* + * call-seq: + * ec.last_error -> exception or nil + * + * Returns an exception object for the last conversion. + * Returns nil if the last conversion did not produce an error. + * + * "error" means that + * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for + * Encoding::Converter#convert and + * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for + * Encoding::Converter#primitive_convert. + * + * ec = Encoding::Converter.new("utf-8", "iso-8859-1") + * p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence + * p ec.last_error #=> # + * p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full + * p ec.last_error #=> nil + * + */ +static VALUE +econv_last_error(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + VALUE exc; + + exc = make_econv_exception(ec); + if (NIL_P(exc)) + return Qnil; + return exc; +} + +/* + * call-seq: + * ec.replacement -> string + * + * Returns the replacement string. + * + * ec = Encoding::Converter.new("euc-jp", "us-ascii") + * p ec.replacement #=> "?" + * + * ec = Encoding::Converter.new("euc-jp", "utf-8") + * p ec.replacement #=> "\uFFFD" + */ +static VALUE +econv_get_replacement(VALUE self) +{ + rb_econv_t *ec = check_econv(self); + int ret; + rb_encoding *enc; + + ret = make_replacement(ec); + if (ret == -1) { + rb_raise(rb_eUndefinedConversionError, "replacement character setup failed"); + } + + enc = rb_enc_find(ec->replacement_enc); + return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc); +} + +/* + * call-seq: + * ec.replacement = string + * + * Sets the replacement string. + * + * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace) + * ec.replacement = "" + * p ec.convert("a \u3042 b") #=> "a b" + */ +static VALUE +econv_set_replacement(VALUE self, VALUE arg) +{ + rb_econv_t *ec = check_econv(self); + VALUE string = arg; + int ret; + rb_encoding *enc; + + StringValue(string); + enc = rb_enc_get(string); + + ret = rb_econv_set_replacement(ec, + (const unsigned char *)RSTRING_PTR(string), + RSTRING_LEN(string), + rb_enc_name(enc)); + + if (ret == -1) { + /* xxx: rb_eInvalidByteSequenceError? */ + rb_raise(rb_eUndefinedConversionError, "replacement character setup failed"); + } + + return arg; +} + +VALUE +rb_econv_make_exception(rb_econv_t *ec) +{ + return make_econv_exception(ec); +} + +void +rb_econv_check_error(rb_econv_t *ec) +{ + VALUE exc; + + exc = make_econv_exception(ec); + if (NIL_P(exc)) + return; + rb_exc_raise(exc); +} + +/* + * call-seq: + * ecerr.source_encoding_name -> string + * + * Returns the source encoding name as a string. + */ +static VALUE +ecerr_source_encoding_name(VALUE self) +{ + return rb_attr_get(self, rb_intern("source_encoding_name")); +} + +/* + * call-seq: + * ecerr.source_encoding -> encoding + * + * Returns the source encoding as an encoding object. + * + * Note that the result may not be equal to the source encoding of + * the encoding converter if the conversion has multiple steps. + * + * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP + * begin + * ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP. + * rescue Encoding::UndefinedConversionError + * p $!.source_encoding #=> # + * p $!.destination_encoding #=> # + * p $!.source_encoding_name #=> "UTF-8" + * p $!.destination_encoding_name #=> "EUC-JP" + * end + * + */ +static VALUE +ecerr_source_encoding(VALUE self) +{ + return rb_attr_get(self, rb_intern("source_encoding")); +} + +/* + * call-seq: + * ecerr.destination_encoding_name -> string + * + * Returns the destination encoding name as a string. + */ +static VALUE +ecerr_destination_encoding_name(VALUE self) +{ + return rb_attr_get(self, rb_intern("destination_encoding_name")); +} + +/* + * call-seq: + * ecerr.destination_encoding -> string + * + * Returns the destination encoding as an encoding object. + */ +static VALUE +ecerr_destination_encoding(VALUE self) +{ + return rb_attr_get(self, rb_intern("destination_encoding")); +} + +/* + * call-seq: + * ecerr.error_char -> string + * + * Returns the one-character string which cause Encoding::UndefinedConversionError. + * + * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") + * begin + * ec.convert("\xa0") + * rescue Encoding::UndefinedConversionError + * puts $!.error_char.dump #=> "\xC2\xA0" + * p $!.error_char.encoding #=> # + * end + * + */ +static VALUE +ecerr_error_char(VALUE self) +{ + return rb_attr_get(self, rb_intern("error_char")); +} + +/* + * call-seq: + * ecerr.error_bytes -> string + * + * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs. + * + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * begin + * ec.convert("abc\xA1\xFFdef") + * rescue Encoding::InvalidByteSequenceError + * p $! #=> # + * puts $!.error_bytes.dump #=> "\xA1" + * puts $!.readagain_bytes.dump #=> "\xFF" + * end + */ +static VALUE +ecerr_error_bytes(VALUE self) +{ + return rb_attr_get(self, rb_intern("error_bytes")); +} + +/* + * call-seq: + * ecerr.readagain_bytes -> string + * + * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs. + */ +static VALUE +ecerr_readagain_bytes(VALUE self) +{ + return rb_attr_get(self, rb_intern("readagain_bytes")); +} + +/* + * call-seq: + * ecerr.incomplete_input? -> true or false + * + * Returns true if the invalid byte sequence error is caused by + * premature end of string. + * + * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + * + * begin + * ec.convert("abc\xA1z") + * rescue Encoding::InvalidByteSequenceError + * p $! #=> # + * p $!.incomplete_input? #=> false + * end + * + * begin + * ec.convert("abc\xA1") + * ec.finish + * rescue Encoding::InvalidByteSequenceError + * p $! #=> # + * p $!.incomplete_input? #=> true + * end + */ +static VALUE +ecerr_incomplete_input(VALUE self) +{ + return rb_attr_get(self, rb_intern("incomplete_input")); +} + +extern void Init_newline(void); + +void +Init_transcode(void) +{ + rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError); + rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError); + rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError); + + transcoder_table = st_init_strcasetable(); + + sym_invalid = ID2SYM(rb_intern("invalid")); + sym_undef = ID2SYM(rb_intern("undef")); + sym_replace = ID2SYM(rb_intern("replace")); + sym_xml = ID2SYM(rb_intern("xml")); + sym_text = ID2SYM(rb_intern("text")); + sym_attr = ID2SYM(rb_intern("attr")); + + sym_invalid_byte_sequence = ID2SYM(rb_intern("invalid_byte_sequence")); + sym_undefined_conversion = ID2SYM(rb_intern("undefined_conversion")); + sym_destination_buffer_full = ID2SYM(rb_intern("destination_buffer_full")); + sym_source_buffer_empty = ID2SYM(rb_intern("source_buffer_empty")); + sym_finished = ID2SYM(rb_intern("finished")); + sym_after_output = ID2SYM(rb_intern("after_output")); + sym_incomplete_input = ID2SYM(rb_intern("incomplete_input")); + sym_universal_newline = ID2SYM(rb_intern("universal_newline")); + sym_crlf_newline = ID2SYM(rb_intern("crlf_newline")); + sym_cr_newline = ID2SYM(rb_intern("cr_newline")); + sym_partial_input = ID2SYM(rb_intern("partial_input")); + + rb_define_method(rb_cString, "encode", str_encode, -1); + rb_define_method(rb_cString, "encode!", str_encode_bang, -1); + + rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData); + rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); + rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1); + rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1); + rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); + rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); + rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0); + rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); + rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0); + rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1); + rb_define_method(rb_cEncodingConverter, "convert", econv_convert, 1); + rb_define_method(rb_cEncodingConverter, "finish", econv_finish, 0); + rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0); + rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1); + rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1); + rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0); + rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0); + rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1); + + rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK)); + rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK)); + rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_HEX_CHARREF", INT2FIX(ECONV_UNDEF_HEX_CHARREF)); + rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); + rb_define_const(rb_cEncodingConverter, "AFTER_OUTPUT", INT2FIX(ECONV_AFTER_OUTPUT)); + rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); + rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_DECORATOR", INT2FIX(ECONV_CRLF_NEWLINE_DECORATOR)); + rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_DECORATOR", INT2FIX(ECONV_CR_NEWLINE_DECORATOR)); + rb_define_const(rb_cEncodingConverter, "XML_TEXT_DECORATOR", INT2FIX(ECONV_XML_TEXT_DECORATOR)); + rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_DECORATOR", INT2FIX(ECONV_XML_ATTR_CONTENT_DECORATOR)); + rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_DECORATOR", INT2FIX(ECONV_XML_ATTR_QUOTE_DECORATOR)); + + rb_define_method(rb_eUndefinedConversionError, "source_encoding_name", ecerr_source_encoding_name, 0); + rb_define_method(rb_eUndefinedConversionError, "destination_encoding_name", ecerr_destination_encoding_name, 0); + rb_define_method(rb_eUndefinedConversionError, "source_encoding", ecerr_source_encoding, 0); + rb_define_method(rb_eUndefinedConversionError, "destination_encoding", ecerr_destination_encoding, 0); + rb_define_method(rb_eUndefinedConversionError, "error_char", ecerr_error_char, 0); + + rb_define_method(rb_eInvalidByteSequenceError, "source_encoding_name", ecerr_source_encoding_name, 0); + rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding_name", ecerr_destination_encoding_name, 0); + rb_define_method(rb_eInvalidByteSequenceError, "source_encoding", ecerr_source_encoding, 0); + rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding", ecerr_destination_encoding, 0); + rb_define_method(rb_eInvalidByteSequenceError, "error_bytes", ecerr_error_bytes, 0); + rb_define_method(rb_eInvalidByteSequenceError, "readagain_bytes", ecerr_readagain_bytes, 0); + rb_define_method(rb_eInvalidByteSequenceError, "incomplete_input?", ecerr_incomplete_input, 0); + + Init_newline(); +} diff --git a/transcode_data.h b/transcode_data.h new file mode 100644 index 0000000..7a40456 --- /dev/null +++ b/transcode_data.h @@ -0,0 +1,106 @@ +/********************************************************************** + + transcode_data.h - + + $Author: yugui $ + created at: Mon 10 Dec 2007 14:01:47 JST 2007 + + Copyright (C) 2007 Martin Duerst + +**********************************************************************/ + +#include "ruby/ruby.h" + +#ifndef RUBY_TRANSCODE_DATA_H +#define RUBY_TRANSCODE_DATA_H 1 + +#define WORDINDEX_SHIFT_BITS 2 +#define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS) +#define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS) +#define BYTE_LOOKUP_BASE(bl) ((bl)[0]) +#define BYTE_LOOKUP_INFO(bl) ((bl)[1]) + +#define PType (unsigned int) + +#define NOMAP (PType 0x01) /* single byte direct map */ +#define ONEbt (0x02) /* one byte payload */ +#define TWObt (0x03) /* two bytes payload */ +#define THREEbt (0x05) /* three bytes payload */ +#define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */ +#define INVALID (PType 0x07) /* invalid byte sequence */ +#define UNDEF (PType 0x09) /* legal but undefined */ +#define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */ +#define FUNii (PType 0x0B) /* function from info to info */ +#define FUNsi (PType 0x0D) /* function from start to info */ +#define FUNio (PType 0x0E) /* function from info to output */ +#define FUNso (PType 0x0F) /* function from start to output */ +#define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */ +#define GB4bt (PType 0x12) /* GB18030 four bytes payload */ + +#define STR1_LENGTH(byte_addr) (*(byte_addr) + 4) +#define STR1_BYTEINDEX(w) ((w) >> 6) +#define makeSTR1(bi) (((bi) << 6) | STR1) +#define makeSTR1LEN(len) ((len)-4) + +#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt)) +#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt)) +#define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt)&0xffffffffU)) +#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU)) +#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU)) + +#define getBT1(a) (((a)>> 8)&0xFF) +#define getBT2(a) (((a)>>16)&0xFF) +#define getBT3(a) (((a)>>24)&0xFF) +#define getBT0(a) ((((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */ + +#define getGB4bt0(a) (((a)>> 8)&0xFF) +#define getGB4bt1(a) (((a)>>24)&0x0F|0x30) +#define getGB4bt2(a) (((a)>>16)&0xFF) +#define getGB4bt3(a) (((a)>>28)&0x0F|0x30) + +#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii)) + +/* do we need these??? maybe not, can be done with simple tables */ +#define ONETRAIL /* legal but undefined if one more trailing UTF-8 */ +#define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */ +#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */ + +typedef enum { + asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */ + asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */ + asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */ + /* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */ +} rb_transcoder_asciicompat_type_t; + +typedef struct rb_transcoder rb_transcoder; + +/* static structure, one per supported encoding pair */ +struct rb_transcoder { + const char *src_encoding; + const char *dst_encoding; + unsigned int conv_tree_start; + const unsigned char *byte_array; + unsigned int byte_array_length; + const unsigned int *word_array; + unsigned int word_array_length; + int word_size; + int input_unit_length; + int max_input; + int max_output; + rb_transcoder_asciicompat_type_t asciicompat_type; + size_t state_size; + int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */ + int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */ + VALUE (*func_ii)(void*, VALUE); /* info -> info */ + VALUE (*func_si)(void*, const unsigned char*, size_t); /* start -> info */ + ssize_t (*func_io)(void*, VALUE, const unsigned char*, size_t); /* info -> output */ + ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */ + ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */ + ssize_t (*resetsize_func)(void*); /* -> len */ + ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */ +}; + +void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib); +void rb_register_transcoder(const rb_transcoder *); + +#endif /* RUBY_TRANSCODE_DATA_H */ diff --git a/util.c b/util.c new file mode 100644 index 0000000..1669adc --- /dev/null +++ b/util.c @@ -0,0 +1,3830 @@ +/********************************************************************** + + util.c - + + $Author: yugui $ + created at: Fri Mar 10 17:22:34 JST 1995 + + Copyright (C) 1993-2008 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include "missing/file.h" +#endif +#if defined(__CYGWIN32__) +#define _open open +#define _close close +#define _unlink unlink +#define _access access +#elif defined(_WIN32) +#include +#endif + +#include "ruby/util.h" + +unsigned long +ruby_scan_oct(const char *start, int len, int *retlen) +{ + register const char *s = start; + register unsigned long retval = 0; + + while (len-- && *s >= '0' && *s <= '7') { + retval <<= 3; + retval |= *s++ - '0'; + } + *retlen = s - start; + return retval; +} + +unsigned long +ruby_scan_hex(const char *start, int len, int *retlen) +{ + static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF"; + register const char *s = start; + register unsigned long retval = 0; + char *tmp; + + while (len-- && *s && (tmp = strchr(hexdigit, *s))) { + retval <<= 4; + retval |= (tmp - hexdigit) & 15; + s++; + } + *retlen = s - start; + return retval; +} + +static unsigned long +scan_digits(const char *str, int base, size_t *retlen, int *overflow) +{ + static signed char table[] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /*0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*1*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*2*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*3*/ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, + /*4*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, + /*5*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, + /*6*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, + /*7*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, + /*8*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*9*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*a*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*b*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*c*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*d*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*e*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + /*f*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + }; + + const char *start = str; + unsigned long ret = 0, x; + unsigned long mul_overflow = (~(unsigned long)0) / base; + int c; + *overflow = 0; + + while ((c = (unsigned char)*str++) != '\0') { + int d = table[c]; + if (d == -1 || base <= d) { + *retlen = (str-1) - start; + return ret; + } + if (mul_overflow < ret) + *overflow = 1; + ret *= base; + x = ret; + ret += d; + if (ret < x) + *overflow = 1; + } + *retlen = (str-1) - start; + return ret; +} + +unsigned long +ruby_strtoul(const char *str, char **endptr, int base) +{ + int c, b, overflow; + int sign = 0; + size_t len; + unsigned long ret; + const char *subject_found = str; + + if (base == 1 || 36 < base) { + errno = EINVAL; + return 0; + } + + while ((c = *str) && ISSPACE(c)) + str++; + + if (c == '+') { + sign = 1; + str++; + } + else if (c == '-') { + sign = -1; + str++; + } + + if (str[0] == '0') { + subject_found = str+1; + if (base == 0 || base == 16) { + if (str[1] == 'x' || str[1] == 'X') { + b = 16; + str += 2; + } + else { + b = base == 0 ? 8 : 16; + str++; + } + } + else { + b = base; + str++; + } + } + else { + b = base == 0 ? 10 : base; + } + + ret = scan_digits(str, b, &len, &overflow); + + if (0 < len) + subject_found = str+len; + + if (endptr) + *endptr = (char*)subject_found; + + if (overflow) { + errno = ERANGE; + return ULONG_MAX; + } + + if (sign < 0) { + ret = -ret; + return ret; + } + else { + return ret; + } +} + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#if defined(HAVE_FCNTL_H) +#include +#endif + +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + +#if defined(__CYGWIN32__) || defined(_WIN32) +/* + * Copyright (c) 1993, Intergraph Corporation + * + * You may distribute under the terms of either the GNU General Public + * License or the Artistic License, as specified in the perl README file. + * + * Various Unix compatibility functions and NT specific functions. + * + * Some of this code was derived from the MSDOS port(s) and the OS/2 port. + * + */ + + +/* + * Suffix appending for in-place editing under MS-DOS and OS/2 (and now NT!). + * + * Here are the rules: + * + * Style 0: Append the suffix exactly as standard perl would do it. + * If the filesystem groks it, use it. (HPFS will always + * grok it. So will NTFS. FAT will rarely accept it.) + * + * Style 1: The suffix begins with a '.'. The extension is replaced. + * If the name matches the original name, use the fallback method. + * + * Style 2: The suffix is a single character, not a '.'. Try to add the + * suffix to the following places, using the first one that works. + * [1] Append to extension. + * [2] Append to filename, + * [3] Replace end of extension, + * [4] Replace end of filename. + * If the name matches the original name, use the fallback method. + * + * Style 3: Any other case: Ignore the suffix completely and use the + * fallback method. + * + * Fallback method: Change the extension to ".$$$". If that matches the + * original name, then change the extension to ".~~~". + * + * If filename is more than 1000 characters long, we die a horrible + * death. Sorry. + * + * The filename restriction is a cheat so that we can use buf[] to store + * assorted temporary goo. + * + * Examples, assuming style 0 failed. + * + * suffix = ".bak" (style 1) + * foo.bar => foo.bak + * foo.bak => foo.$$$ (fallback) + * foo.$$$ => foo.~~~ (fallback) + * makefile => makefile.bak + * + * suffix = "~" (style 2) + * foo.c => foo.c~ + * foo.c~ => foo.c~~ + * foo.c~~ => foo~.c~~ + * foo~.c~~ => foo~~.c~~ + * foo~~~~~.c~~ => foo~~~~~.$$$ (fallback) + * + * foo.pas => foo~.pas + * makefile => makefile.~ + * longname.fil => longname.fi~ + * longname.fi~ => longnam~.fi~ + * longnam~.fi~ => longnam~.$$$ + * + */ + + +static int valid_filename(const char *s); + +static const char suffix1[] = ".$$$"; +static const char suffix2[] = ".~~~"; + +#define ext (&buf[1000]) + +#define strEQ(s1,s2) (strcmp(s1,s2) == 0) + +void +ruby_add_suffix(VALUE str, const char *suffix) +{ + int baselen; + int extlen = strlen(suffix); + char *s, *t, *p; + long slen; + char buf[1024]; + + if (RSTRING_LEN(str) > 1000) + rb_fatal("Cannot do inplace edit on long filename (%ld characters)", + RSTRING_LEN(str)); + +#if defined(__CYGWIN32__) || defined(_WIN32) + /* Style 0 */ + slen = RSTRING_LEN(str); + rb_str_cat(str, suffix, extlen); + if (valid_filename(RSTRING_PTR(str))) return; + + /* Fooey, style 0 failed. Fix str before continuing. */ + rb_str_resize(str, slen); +#endif + + slen = extlen; + t = buf; baselen = 0; s = RSTRING_PTR(str); + while ((*t = *s) && *s != '.') { + baselen++; + if (*s == '\\' || *s == '/') baselen = 0; + s++; t++; + } + p = t; + + t = ext; extlen = 0; + while ((*t++ = *s++) != 0) extlen++; + if (extlen == 0) { ext[0] = '.'; ext[1] = 0; extlen++; } + + if (*suffix == '.') { /* Style 1 */ + if (strEQ(ext, suffix)) goto fallback; + strcpy(p, suffix); + } + else if (suffix[1] == '\0') { /* Style 2 */ + if (extlen < 4) { + ext[extlen] = *suffix; + ext[++extlen] = '\0'; + } + else if (baselen < 8) { + *p++ = *suffix; + } + else if (ext[3] != *suffix) { + ext[3] = *suffix; + } + else if (buf[7] != *suffix) { + buf[7] = *suffix; + } + else goto fallback; + strcpy(p, ext); + } + else { /* Style 3: Panic */ +fallback: + (void)memcpy(p, strEQ(ext, suffix1) ? suffix2 : suffix1, 5); + } + rb_str_resize(str, strlen(buf)); + memcpy(RSTRING_PTR(str), buf, RSTRING_LEN(str)); +} + +#if defined(__CYGWIN32__) || defined(_WIN32) +static int +valid_filename(const char *s) +{ + int fd; + + /* + // It doesn't exist, so see if we can open it. + */ + + if ((fd = _open(s, O_CREAT|O_EXCL, 0666)) >= 0) { + _close(fd); + _unlink(s); /* don't leave it laying around */ + return 1; + } + else if (errno == EEXIST) { + /* if the file exists, then it's a valid filename! */ + return 1; + } + return 0; +} +#endif +#endif + + +/* mm.c */ + +#define A ((int*)a) +#define B ((int*)b) +#define C ((int*)c) +#define D ((int*)d) + +#define mmprepare(base, size) do {\ + if (((long)base & (0x3)) == 0)\ + if (size >= 16) mmkind = 1;\ + else mmkind = 0;\ + else mmkind = -1;\ + high = (size & (~0xf));\ + low = (size & 0x0c);\ +} while (0)\ + +#define mmarg mmkind, size, high, low + +static void mmswap_(register char *a, register char *b, int mmkind, int size, int high, int low) +{ + register int s; + if (a == b) return; + if (mmkind >= 0) { + if (mmkind > 0) { + register char *t = a + high; + do { + s = A[0]; A[0] = B[0]; B[0] = s; + s = A[1]; A[1] = B[1]; B[1] = s; + s = A[2]; A[2] = B[2]; B[2] = s; + s = A[3]; A[3] = B[3]; B[3] = s; a += 16; b += 16; + } while (a < t); + } + if (low != 0) { s = A[0]; A[0] = B[0]; B[0] = s; + if (low >= 8) { s = A[1]; A[1] = B[1]; B[1] = s; + if (low == 12) {s = A[2]; A[2] = B[2]; B[2] = s;}}} + } + else { + register char *t = a + size; + do {s = *a; *a++ = *b; *b++ = s;} while (a < t); + } +} +#define mmswap(a,b) mmswap_((a),(b),mmarg) + +static void mmrot3_(register char *a, register char *b, register char *c, int mmkind, int size, int high, int low) +{ + register int s; + if (mmkind >= 0) { + if (mmkind > 0) { + register char *t = a + high; + do { + s = A[0]; A[0] = B[0]; B[0] = C[0]; C[0] = s; + s = A[1]; A[1] = B[1]; B[1] = C[1]; C[1] = s; + s = A[2]; A[2] = B[2]; B[2] = C[2]; C[2] = s; + s = A[3]; A[3] = B[3]; B[3] = C[3]; C[3] = s; a += 16; b += 16; c += 16; + } while (a < t); + } + if (low != 0) { s = A[0]; A[0] = B[0]; B[0] = C[0]; C[0] = s; + if (low >= 8) { s = A[1]; A[1] = B[1]; B[1] = C[1]; C[1] = s; + if (low == 12) {s = A[2]; A[2] = B[2]; B[2] = C[2]; C[2] = s;}}} + } + else { + register char *t = a + size; + do {s = *a; *a++ = *b; *b++ = *c; *c++ = s;} while (a < t); + } +} +#define mmrot3(a,b,c) mmrot3_((a),(b),(c),mmarg) + +/* qs6.c */ +/*****************************************************/ +/* */ +/* qs6 (Quick sort function) */ +/* */ +/* by Tomoyuki Kawamura 1995.4.21 */ +/* kawamura@tokuyama.ac.jp */ +/*****************************************************/ + +typedef struct { char *LL, *RR; } stack_node; /* Stack structure for L,l,R,r */ +#define PUSH(ll,rr) do { top->LL = (ll); top->RR = (rr); ++top; } while (0) /* Push L,l,R,r */ +#define POP(ll,rr) do { --top; ll = top->LL; rr = top->RR; } while (0) /* Pop L,l,R,r */ + +#define med3(a,b,c) ((*cmp)(a,b,d)<0 ? \ + ((*cmp)(b,c,d)<0 ? b : ((*cmp)(a,c,d)<0 ? c : a)) : \ + ((*cmp)(b,c,d)>0 ? b : ((*cmp)(a,c,d)<0 ? a : c))) + +void +ruby_qsort(void* base, const int nel, const int size, + int (*cmp)(const void*, const void*, void*), void *d) +{ + register char *l, *r, *m; /* l,r:left,right group m:median point */ + register int t, eq_l, eq_r; /* eq_l: all items in left group are equal to S */ + char *L = base; /* left end of curren region */ + char *R = (char*)base + size*(nel-1); /* right end of current region */ + int chklim = 63; /* threshold of ordering element check */ + stack_node stack[32], *top = stack; /* 32 is enough for 32bit CPU */ + int mmkind, high, low; + + if (nel <= 1) return; /* need not to sort */ + mmprepare(base, size); + goto start; + + nxt: + if (stack == top) return; /* return if stack is empty */ + POP(L,R); + + for (;;) { + start: + if (L + size == R) { /* 2 elements */ + if ((*cmp)(L,R,d) > 0) mmswap(L,R); goto nxt; + } + + l = L; r = R; + t = (r - l + size) / size; /* number of elements */ + m = l + size * (t >> 1); /* calculate median value */ + + if (t >= 60) { + register char *m1; + register char *m3; + if (t >= 200) { + t = size*(t>>3); /* number of bytes in splitting 8 */ + { + register char *p1 = l + t; + register char *p2 = p1 + t; + register char *p3 = p2 + t; + m1 = med3(p1, p2, p3); + p1 = m + t; + p2 = p1 + t; + p3 = p2 + t; + m3 = med3(p1, p2, p3); + } + } + else { + t = size*(t>>2); /* number of bytes in splitting 4 */ + m1 = l + t; + m3 = m + t; + } + m = med3(m1, m, m3); + } + + if ((t = (*cmp)(l,m,d)) < 0) { /*3-5-?*/ + if ((t = (*cmp)(m,r,d)) < 0) { /*3-5-7*/ + if (chklim && nel >= chklim) { /* check if already ascending order */ + char *p; + chklim = 0; + for (p=l; p 0) goto fail; + goto nxt; + } + fail: goto loopA; /*3-5-7*/ + } + if (t > 0) { + if ((*cmp)(l,r,d) <= 0) {mmswap(m,r); goto loopA;} /*3-5-4*/ + mmrot3(r,m,l); goto loopA; /*3-5-2*/ + } + goto loopB; /*3-5-5*/ + } + + if (t > 0) { /*7-5-?*/ + if ((t = (*cmp)(m,r,d)) > 0) { /*7-5-3*/ + if (chklim && nel >= chklim) { /* check if already ascending order */ + char *p; + chklim = 0; + for (p=l; p 0) {mmswap(l,r); goto loopB;} /*5-5-3*/ + + /* determining splitting type in case 5-5-5 */ /*5-5-5*/ + for (;;) { + if ((l += size) == r) goto nxt; /*5-5-5*/ + if (l == m) continue; + if ((t = (*cmp)(l,m,d)) > 0) {mmswap(l,r); l = L; goto loopA;}/*575-5*/ + if (t < 0) {mmswap(L,l); l = L; goto loopB;} /*535-5*/ + } + + loopA: eq_l = 1; eq_r = 1; /* splitting type A */ /* left <= median < right */ + for (;;) { + for (;;) { + if ((l += size) == r) + {l -= size; if (l != m) mmswap(m,l); l -= size; goto fin;} + if (l == m) continue; + if ((t = (*cmp)(l,m,d)) > 0) {eq_r = 0; break;} + if (t < 0) eq_l = 0; + } + for (;;) { + if (l == (r -= size)) + {l -= size; if (l != m) mmswap(m,l); l -= size; goto fin;} + if (r == m) {m = l; break;} + if ((t = (*cmp)(r,m,d)) < 0) {eq_l = 0; break;} + if (t == 0) break; + } + mmswap(l,r); /* swap left and right */ + } + + loopB: eq_l = 1; eq_r = 1; /* splitting type B */ /* left < median <= right */ + for (;;) { + for (;;) { + if (l == (r -= size)) + {r += size; if (r != m) mmswap(r,m); r += size; goto fin;} + if (r == m) continue; + if ((t = (*cmp)(r,m,d)) < 0) {eq_l = 0; break;} + if (t > 0) eq_r = 0; + } + for (;;) { + if ((l += size) == r) + {r += size; if (r != m) mmswap(r,m); r += size; goto fin;} + if (l == m) {m = r; break;} + if ((t = (*cmp)(l,m,d)) > 0) {eq_r = 0; break;} + if (t == 0) break; + } + mmswap(l,r); /* swap left and right */ + } + + fin: + if (eq_l == 0) /* need to sort left side */ + if (eq_r == 0) /* need to sort right side */ + if (l-L < R-r) {PUSH(r,R); R = l;} /* sort left side first */ + else {PUSH(L,l); L = r;} /* sort right side first */ + else R = l; /* need to sort left side only */ + else if (eq_r == 0) L = r; /* need to sort right side only */ + else goto nxt; /* need not to sort both sides */ + } +} + +char * +ruby_strdup(const char *str) +{ + char *tmp; + int len = strlen(str) + 1; + + tmp = xmalloc(len); + memcpy(tmp, str, len); + + return tmp; +} + +char * +ruby_getcwd(void) +{ +#ifdef HAVE_GETCWD + int size = 200; + char *buf = xmalloc(size); + + while (!getcwd(buf, size)) { + if (errno != ERANGE) { + xfree(buf); + rb_sys_fail("getcwd"); + } + size *= 2; + buf = xrealloc(buf, size); + } +#else +# ifndef PATH_MAX +# define PATH_MAX 8192 +# endif + char *buf = xmalloc(PATH_MAX+1); + + if (!getwd(buf)) { + xfree(buf); + rb_sys_fail("getwd"); + } +#endif + return buf; +} + +/**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + +/* Please send bug reports to David M. Gay (dmg at acm dot org, + * with " at " changed at "@" and " dot " changed to "."). */ + +/* On a machine with IEEE extended-precision registers, it is + * necessary to specify double-precision (53-bit) rounding precision + * before invoking strtod or dtoa. If the machine uses (the equivalent + * of) Intel 80x87 arithmetic, the call + * _control87(PC_53, MCW_PC); + * does this with many compilers. Whether this or another call is + * appropriate depends on the compiler; for this to work, it may be + * necessary to #include "float.h" or another system-dependent header + * file. + */ + +/* strtod for IEEE-, VAX-, and IBM-arithmetic machines. + * + * This strtod returns a nearest machine number to the input decimal + * string (or sets errno to ERANGE). With IEEE arithmetic, ties are + * broken by the IEEE round-even rule. Otherwise ties are broken by + * biased rounding (add half and chop). + * + * Inspired loosely by William D. Clinger's paper "How to Read Floating + * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101]. + * + * Modifications: + * + * 1. We only require IEEE, IBM, or VAX double-precision + * arithmetic (not IEEE double-extended). + * 2. We get by with floating-point arithmetic in a case that + * Clinger missed -- when we're computing d * 10^n + * for a small integer d and the integer n is not too + * much larger than 22 (the maximum integer k for which + * we can represent 10^k exactly), we may be able to + * compute (d*10^k) * 10^(e-k) with just one roundoff. + * 3. Rather than a bit-at-a-time adjustment of the binary + * result in the hard case, we use floating-point + * arithmetic to determine the adjustment to within + * one bit; only in really hard cases do we need to + * compute a second residual. + * 4. Because of 3., we don't need a large table of powers of 10 + * for ten-to-e (just some small tables, e.g. of 10^k + * for 0 <= k <= 22). + */ + +/* + * #define IEEE_LITTLE_ENDIAN for IEEE-arithmetic machines where the least + * significant byte has the lowest address. + * #define IEEE_BIG_ENDIAN for IEEE-arithmetic machines where the most + * significant byte has the lowest address. + * #define Long int on machines with 32-bit ints and 64-bit longs. + * #define IBM for IBM mainframe-style floating-point arithmetic. + * #define VAX for VAX-style floating-point arithmetic (D_floating). + * #define No_leftright to omit left-right logic in fast floating-point + * computation of dtoa. + * #define Honor_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3 + * and strtod and dtoa should round accordingly. + * #define Check_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3 + * and Honor_FLT_ROUNDS is not #defined. + * #define RND_PRODQUOT to use rnd_prod and rnd_quot (assembly routines + * that use extended-precision instructions to compute rounded + * products and quotients) with IBM. + * #define ROUND_BIASED for IEEE-format with biased rounding. + * #define Inaccurate_Divide for IEEE-format with correctly rounded + * products but inaccurate quotients, e.g., for Intel i860. + * #define NO_LONG_LONG on machines that do not have a "long long" + * integer type (of >= 64 bits). On such machines, you can + * #define Just_16 to store 16 bits per 32-bit Long when doing + * high-precision integer arithmetic. Whether this speeds things + * up or slows things down depends on the machine and the number + * being converted. If long long is available and the name is + * something other than "long long", #define Llong to be the name, + * and if "unsigned Llong" does not work as an unsigned version of + * Llong, #define #ULLong to be the corresponding unsigned type. + * #define KR_headers for old-style C function headers. + * #define Bad_float_h if your system lacks a float.h or if it does not + * define some or all of DBL_DIG, DBL_MAX_10_EXP, DBL_MAX_EXP, + * FLT_RADIX, FLT_ROUNDS, and DBL_MAX. + * #define MALLOC your_malloc, where your_malloc(n) acts like malloc(n) + * if memory is available and otherwise does something you deem + * appropriate. If MALLOC is undefined, malloc will be invoked + * directly -- and assumed always to succeed. + * #define Omit_Private_Memory to omit logic (added Jan. 1998) for making + * memory allocations from a private pool of memory when possible. + * When used, the private pool is PRIVATE_MEM bytes long: 2304 bytes, + * unless #defined to be a different length. This default length + * suffices to get rid of MALLOC calls except for unusual cases, + * such as decimal-to-binary conversion of a very long string of + * digits. The longest string dtoa can return is about 751 bytes + * long. For conversions by strtod of strings of 800 digits and + * all dtoa conversions in single-threaded executions with 8-byte + * pointers, PRIVATE_MEM >= 7400 appears to suffice; with 4-byte + * pointers, PRIVATE_MEM >= 7112 appears adequate. + * #define INFNAN_CHECK on IEEE systems to cause strtod to check for + * Infinity and NaN (case insensitively). On some systems (e.g., + * some HP systems), it may be necessary to #define NAN_WORD0 + * appropriately -- to the most significant word of a quiet NaN. + * (On HP Series 700/800 machines, -DNAN_WORD0=0x7ff40000 works.) + * When INFNAN_CHECK is #defined and No_Hex_NaN is not #defined, + * strtod also accepts (case insensitively) strings of the form + * NaN(x), where x is a string of hexadecimal digits and spaces; + * if there is only one string of hexadecimal digits, it is taken + * for the 52 fraction bits of the resulting NaN; if there are two + * or more strings of hex digits, the first is for the high 20 bits, + * the second and subsequent for the low 32 bits, with intervening + * white space ignored; but if this results in none of the 52 + * fraction bits being on (an IEEE Infinity symbol), then NAN_WORD0 + * and NAN_WORD1 are used instead. + * #define MULTIPLE_THREADS if the system offers preemptively scheduled + * multiple threads. In this case, you must provide (or suitably + * #define) two locks, acquired by ACQUIRE_DTOA_LOCK(n) and freed + * by FREE_DTOA_LOCK(n) for n = 0 or 1. (The second lock, accessed + * in pow5mult, ensures lazy evaluation of only one copy of high + * powers of 5; omitting this lock would introduce a small + * probability of wasting memory, but would otherwise be harmless.) + * You must also invoke freedtoa(s) to free the value s returned by + * dtoa. You may do so whether or not MULTIPLE_THREADS is #defined. + * #define NO_IEEE_Scale to disable new (Feb. 1997) logic in strtod that + * avoids underflows on inputs whose result does not underflow. + * If you #define NO_IEEE_Scale on a machine that uses IEEE-format + * floating-point numbers and flushes underflows to zero rather + * than implementing gradual underflow, then you must also #define + * Sudden_Underflow. + * #define YES_ALIAS to permit aliasing certain double values with + * arrays of ULongs. This leads to slightly better code with + * some compilers and was always used prior to 19990916, but it + * is not strictly legal and can cause trouble with aggressively + * optimizing compilers (e.g., gcc 2.95.1 under -O2). + * #define USE_LOCALE to use the current locale's decimal_point value. + * #define SET_INEXACT if IEEE arithmetic is being used and extra + * computation should be done to set the inexact flag when the + * result is inexact and avoid setting inexact when the result + * is exact. In this case, dtoa.c must be compiled in + * an environment, perhaps provided by #include "dtoa.c" in a + * suitable wrapper, that defines two functions, + * int get_inexact(void); + * void clear_inexact(void); + * such that get_inexact() returns a nonzero value if the + * inexact bit is already set, and clear_inexact() sets the + * inexact bit to 0. When SET_INEXACT is #defined, strtod + * also does extra computations to set the underflow and overflow + * flags when appropriate (i.e., when the result is tiny and + * inexact or when it is a numeric value rounded to +-infinity). + * #define NO_ERRNO if strtod should not assign errno = ERANGE when + * the result overflows to +-Infinity or underflows to 0. + */ + +#ifdef WORDS_BIGENDIAN +#define IEEE_BIG_ENDIAN +#else +#define IEEE_LITTLE_ENDIAN +#endif + +#ifdef __vax__ +#define VAX +#undef IEEE_BIG_ENDIAN +#undef IEEE_LITTLE_ENDIAN +#endif + +#if defined(__arm__) && !defined(__VFP_FP__) +#define IEEE_BIG_ENDIAN +#undef IEEE_LITTLE_ENDIAN +#endif + +#undef Long +#undef ULong + +#if SIZEOF_INT == 4 +#define Long int +#define ULong unsigned int +#elif SIZEOF_LONG == 4 +#define Long long int +#define ULong unsigned long int +#endif + +#if HAVE_LONG_LONG +#define Llong LONG_LONG +#endif + +#ifdef DEBUG +#include "stdio.h" +#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);} +#endif + +#include "stdlib.h" +#include "string.h" + +#ifdef USE_LOCALE +#include "locale.h" +#endif + +#ifdef MALLOC +extern void *MALLOC(size_t); +#else +#define MALLOC malloc +#endif + +#ifndef Omit_Private_Memory +#ifndef PRIVATE_MEM +#define PRIVATE_MEM 2304 +#endif +#define PRIVATE_mem ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double)) +static double private_mem[PRIVATE_mem], *pmem_next = private_mem; +#endif + +#undef IEEE_Arith +#undef Avoid_Underflow +#ifdef IEEE_BIG_ENDIAN +#define IEEE_Arith +#endif +#ifdef IEEE_LITTLE_ENDIAN +#define IEEE_Arith +#endif + +#ifdef Bad_float_h + +#ifdef IEEE_Arith +#define DBL_DIG 15 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define FLT_RADIX 2 +#endif /*IEEE_Arith*/ + +#ifdef IBM +#define DBL_DIG 16 +#define DBL_MAX_10_EXP 75 +#define DBL_MAX_EXP 63 +#define FLT_RADIX 16 +#define DBL_MAX 7.2370055773322621e+75 +#endif + +#ifdef VAX +#define DBL_DIG 16 +#define DBL_MAX_10_EXP 38 +#define DBL_MAX_EXP 127 +#define FLT_RADIX 2 +#define DBL_MAX 1.7014118346046923e+38 +#endif + +#ifndef LONG_MAX +#define LONG_MAX 2147483647 +#endif + +#else /* ifndef Bad_float_h */ +#include "float.h" +#endif /* Bad_float_h */ + +#ifndef __MATH_H__ +#include "math.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(IEEE_LITTLE_ENDIAN) + defined(IEEE_BIG_ENDIAN) + defined(VAX) + defined(IBM) != 1 +Exactly one of IEEE_LITTLE_ENDIAN, IEEE_BIG_ENDIAN, VAX, or IBM should be defined. +#endif + +typedef union { double d; ULong L[2]; } U; + +#ifdef YES_ALIAS +typedef double double_u; +# define dval(x) x +# ifdef IEEE_LITTLE_ENDIAN +# define word0(x) (((ULong *)&x)[1]) +# define word1(x) (((ULong *)&x)[0]) +# else +# define word0(x) (((ULong *)&x)[0]) +# define word1(x) (((ULong *)&x)[1]) +# endif +#else +typedef U double_u; +# ifdef IEEE_LITTLE_ENDIAN +# define word0(x) (x.L[1]) +# define word1(x) (x.L[0]) +# else +# define word0(x) (x.L[0]) +# define word1(x) (x.L[1]) +# endif +# define dval(x) (x.d) +#endif + +/* The following definition of Storeinc is appropriate for MIPS processors. + * An alternative that might be better on some machines is + * #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff) + */ +#if defined(IEEE_LITTLE_ENDIAN) + defined(VAX) + defined(__arm__) +#define Storeinc(a,b,c) (((unsigned short *)a)[1] = (unsigned short)b, \ +((unsigned short *)a)[0] = (unsigned short)c, a++) +#else +#define Storeinc(a,b,c) (((unsigned short *)a)[0] = (unsigned short)b, \ +((unsigned short *)a)[1] = (unsigned short)c, a++) +#endif + +/* #define P DBL_MANT_DIG */ +/* Ten_pmax = floor(P*log(2)/log(5)) */ +/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */ +/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */ +/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */ + +#ifdef IEEE_Arith +#define Exp_shift 20 +#define Exp_shift1 20 +#define Exp_msk1 0x100000 +#define Exp_msk11 0x100000 +#define Exp_mask 0x7ff00000 +#define P 53 +#define Bias 1023 +#define Emin (-1022) +#define Exp_1 0x3ff00000 +#define Exp_11 0x3ff00000 +#define Ebits 11 +#define Frac_mask 0xfffff +#define Frac_mask1 0xfffff +#define Ten_pmax 22 +#define Bletch 0x10 +#define Bndry_mask 0xfffff +#define Bndry_mask1 0xfffff +#define LSB 1 +#define Sign_bit 0x80000000 +#define Log2P 1 +#define Tiny0 0 +#define Tiny1 1 +#define Quick_max 14 +#define Int_max 14 +#ifndef NO_IEEE_Scale +#define Avoid_Underflow +#ifdef Flush_Denorm /* debugging option */ +#undef Sudden_Underflow +#endif +#endif + +#ifndef Flt_Rounds +#ifdef FLT_ROUNDS +#define Flt_Rounds FLT_ROUNDS +#else +#define Flt_Rounds 1 +#endif +#endif /*Flt_Rounds*/ + +#ifdef Honor_FLT_ROUNDS +#define Rounding rounding +#undef Check_FLT_ROUNDS +#define Check_FLT_ROUNDS +#else +#define Rounding Flt_Rounds +#endif + +#else /* ifndef IEEE_Arith */ +#undef Check_FLT_ROUNDS +#undef Honor_FLT_ROUNDS +#undef SET_INEXACT +#undef Sudden_Underflow +#define Sudden_Underflow +#ifdef IBM +#undef Flt_Rounds +#define Flt_Rounds 0 +#define Exp_shift 24 +#define Exp_shift1 24 +#define Exp_msk1 0x1000000 +#define Exp_msk11 0x1000000 +#define Exp_mask 0x7f000000 +#define P 14 +#define Bias 65 +#define Exp_1 0x41000000 +#define Exp_11 0x41000000 +#define Ebits 8 /* exponent has 7 bits, but 8 is the right value in b2d */ +#define Frac_mask 0xffffff +#define Frac_mask1 0xffffff +#define Bletch 4 +#define Ten_pmax 22 +#define Bndry_mask 0xefffff +#define Bndry_mask1 0xffffff +#define LSB 1 +#define Sign_bit 0x80000000 +#define Log2P 4 +#define Tiny0 0x100000 +#define Tiny1 0 +#define Quick_max 14 +#define Int_max 15 +#else /* VAX */ +#undef Flt_Rounds +#define Flt_Rounds 1 +#define Exp_shift 23 +#define Exp_shift1 7 +#define Exp_msk1 0x80 +#define Exp_msk11 0x800000 +#define Exp_mask 0x7f80 +#define P 56 +#define Bias 129 +#define Exp_1 0x40800000 +#define Exp_11 0x4080 +#define Ebits 8 +#define Frac_mask 0x7fffff +#define Frac_mask1 0xffff007f +#define Ten_pmax 24 +#define Bletch 2 +#define Bndry_mask 0xffff007f +#define Bndry_mask1 0xffff007f +#define LSB 0x10000 +#define Sign_bit 0x8000 +#define Log2P 1 +#define Tiny0 0x80 +#define Tiny1 0 +#define Quick_max 15 +#define Int_max 15 +#endif /* IBM, VAX */ +#endif /* IEEE_Arith */ + +#ifndef IEEE_Arith +#define ROUND_BIASED +#endif + +#ifdef RND_PRODQUOT +#define rounded_product(a,b) a = rnd_prod(a, b) +#define rounded_quotient(a,b) a = rnd_quot(a, b) +extern double rnd_prod(double, double), rnd_quot(double, double); +#else +#define rounded_product(a,b) a *= b +#define rounded_quotient(a,b) a /= b +#endif + +#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1)) +#define Big1 0xffffffff + +#ifndef Pack_32 +#define Pack_32 +#endif + +#define FFFFFFFF 0xffffffffUL + +#ifdef NO_LONG_LONG +#undef ULLong +#ifdef Just_16 +#undef Pack_32 +/* When Pack_32 is not defined, we store 16 bits per 32-bit Long. + * This makes some inner loops simpler and sometimes saves work + * during multiplications, but it often seems to make things slightly + * slower. Hence the default is now to store 32 bits per Long. + */ +#endif +#else /* long long available */ +#ifndef Llong +#define Llong long long +#endif +#ifndef ULLong +#define ULLong unsigned Llong +#endif +#endif /* NO_LONG_LONG */ + +#define MULTIPLE_THREADS 1 + +#ifndef MULTIPLE_THREADS +#define ACQUIRE_DTOA_LOCK(n) /*nothing*/ +#define FREE_DTOA_LOCK(n) /*nothing*/ +#else +#define ACQUIRE_DTOA_LOCK(n) /*unused right now*/ +#define FREE_DTOA_LOCK(n) /*unused right now*/ +#endif + +#define Kmax 15 + +struct Bigint { + struct Bigint *next; + int k, maxwds, sign, wds; + ULong x[1]; +}; + +typedef struct Bigint Bigint; + +static Bigint *freelist[Kmax+1]; + +static Bigint * +Balloc(int k) +{ + int x; + Bigint *rv; +#ifndef Omit_Private_Memory + unsigned int len; +#endif + + ACQUIRE_DTOA_LOCK(0); + if ((rv = freelist[k]) != 0) { + freelist[k] = rv->next; + } + else { + x = 1 << k; +#ifdef Omit_Private_Memory + rv = (Bigint *)MALLOC(sizeof(Bigint) + (x-1)*sizeof(ULong)); +#else + len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1) + /sizeof(double); + if (pmem_next - private_mem + len <= PRIVATE_mem) { + rv = (Bigint*)pmem_next; + pmem_next += len; + } + else + rv = (Bigint*)MALLOC(len*sizeof(double)); +#endif + rv->k = k; + rv->maxwds = x; + } + FREE_DTOA_LOCK(0); + rv->sign = rv->wds = 0; + return rv; +} + +static void +Bfree(Bigint *v) +{ + if (v) { + ACQUIRE_DTOA_LOCK(0); + v->next = freelist[v->k]; + freelist[v->k] = v; + FREE_DTOA_LOCK(0); + } +} + +#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \ +y->wds*sizeof(Long) + 2*sizeof(int)) + +static Bigint * +multadd(Bigint *b, int m, int a) /* multiply by m and add a */ +{ + int i, wds; +#ifdef ULLong + ULong *x; + ULLong carry, y; +#else + ULong carry, *x, y; +#ifdef Pack_32 + ULong xi, z; +#endif +#endif + Bigint *b1; + + wds = b->wds; + x = b->x; + i = 0; + carry = a; + do { +#ifdef ULLong + y = *x * (ULLong)m + carry; + carry = y >> 32; + *x++ = y & FFFFFFFF; +#else +#ifdef Pack_32 + xi = *x; + y = (xi & 0xffff) * m + carry; + z = (xi >> 16) * m + (y >> 16); + carry = z >> 16; + *x++ = (z << 16) + (y & 0xffff); +#else + y = *x * m + carry; + carry = y >> 16; + *x++ = y & 0xffff; +#endif +#endif + } while (++i < wds); + if (carry) { + if (wds >= b->maxwds) { + b1 = Balloc(b->k+1); + Bcopy(b1, b); + Bfree(b); + b = b1; + } + b->x[wds++] = carry; + b->wds = wds; + } + return b; +} + +static Bigint * +s2b(const char *s, int nd0, int nd, ULong y9) +{ + Bigint *b; + int i, k; + Long x, y; + + x = (nd + 8) / 9; + for (k = 0, y = 1; x > y; y <<= 1, k++) ; +#ifdef Pack_32 + b = Balloc(k); + b->x[0] = y9; + b->wds = 1; +#else + b = Balloc(k+1); + b->x[0] = y9 & 0xffff; + b->wds = (b->x[1] = y9 >> 16) ? 2 : 1; +#endif + + i = 9; + if (9 < nd0) { + s += 9; + do { + b = multadd(b, 10, *s++ - '0'); + } while (++i < nd0); + s++; + } + else + s += 10; + for (; i < nd; i++) + b = multadd(b, 10, *s++ - '0'); + return b; +} + +static int +hi0bits(register ULong x) +{ + register int k = 0; + + if (!(x & 0xffff0000)) { + k = 16; + x <<= 16; + } + if (!(x & 0xff000000)) { + k += 8; + x <<= 8; + } + if (!(x & 0xf0000000)) { + k += 4; + x <<= 4; + } + if (!(x & 0xc0000000)) { + k += 2; + x <<= 2; + } + if (!(x & 0x80000000)) { + k++; + if (!(x & 0x40000000)) + return 32; + } + return k; +} + +static int +lo0bits(ULong *y) +{ + register int k; + register ULong x = *y; + + if (x & 7) { + if (x & 1) + return 0; + if (x & 2) { + *y = x >> 1; + return 1; + } + *y = x >> 2; + return 2; + } + k = 0; + if (!(x & 0xffff)) { + k = 16; + x >>= 16; + } + if (!(x & 0xff)) { + k += 8; + x >>= 8; + } + if (!(x & 0xf)) { + k += 4; + x >>= 4; + } + if (!(x & 0x3)) { + k += 2; + x >>= 2; + } + if (!(x & 1)) { + k++; + x >>= 1; + if (!x) + return 32; + } + *y = x; + return k; +} + +static Bigint * +i2b(int i) +{ + Bigint *b; + + b = Balloc(1); + b->x[0] = i; + b->wds = 1; + return b; +} + +static Bigint * +mult(Bigint *a, Bigint *b) +{ + Bigint *c; + int k, wa, wb, wc; + ULong *x, *xa, *xae, *xb, *xbe, *xc, *xc0; + ULong y; +#ifdef ULLong + ULLong carry, z; +#else + ULong carry, z; +#ifdef Pack_32 + ULong z2; +#endif +#endif + + if (a->wds < b->wds) { + c = a; + a = b; + b = c; + } + k = a->k; + wa = a->wds; + wb = b->wds; + wc = wa + wb; + if (wc > a->maxwds) + k++; + c = Balloc(k); + for (x = c->x, xa = x + wc; x < xa; x++) + *x = 0; + xa = a->x; + xae = xa + wa; + xb = b->x; + xbe = xb + wb; + xc0 = c->x; +#ifdef ULLong + for (; xb < xbe; xc0++) { + if ((y = *xb++) != 0) { + x = xa; + xc = xc0; + carry = 0; + do { + z = *x++ * (ULLong)y + *xc + carry; + carry = z >> 32; + *xc++ = z & FFFFFFFF; + } while (x < xae); + *xc = carry; + } + } +#else +#ifdef Pack_32 + for (; xb < xbe; xb++, xc0++) { + if (y = *xb & 0xffff) { + x = xa; + xc = xc0; + carry = 0; + do { + z = (*x & 0xffff) * y + (*xc & 0xffff) + carry; + carry = z >> 16; + z2 = (*x++ >> 16) * y + (*xc >> 16) + carry; + carry = z2 >> 16; + Storeinc(xc, z2, z); + } while (x < xae); + *xc = carry; + } + if (y = *xb >> 16) { + x = xa; + xc = xc0; + carry = 0; + z2 = *xc; + do { + z = (*x & 0xffff) * y + (*xc >> 16) + carry; + carry = z >> 16; + Storeinc(xc, z, z2); + z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry; + carry = z2 >> 16; + } while (x < xae); + *xc = z2; + } + } +#else + for (; xb < xbe; xc0++) { + if (y = *xb++) { + x = xa; + xc = xc0; + carry = 0; + do { + z = *x++ * y + *xc + carry; + carry = z >> 16; + *xc++ = z & 0xffff; + } while (x < xae); + *xc = carry; + } + } +#endif +#endif + for (xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ; + c->wds = wc; + return c; +} + +static Bigint *p5s; + +static Bigint * +pow5mult(Bigint *b, int k) +{ + Bigint *b1, *p5, *p51; + int i; + static int p05[3] = { 5, 25, 125 }; + + if ((i = k & 3) != 0) + b = multadd(b, p05[i-1], 0); + + if (!(k >>= 2)) + return b; + if (!(p5 = p5s)) { + /* first time */ +#ifdef MULTIPLE_THREADS + ACQUIRE_DTOA_LOCK(1); + if (!(p5 = p5s)) { + p5 = p5s = i2b(625); + p5->next = 0; + } + FREE_DTOA_LOCK(1); +#else + p5 = p5s = i2b(625); + p5->next = 0; +#endif + } + for (;;) { + if (k & 1) { + b1 = mult(b, p5); + Bfree(b); + b = b1; + } + if (!(k >>= 1)) + break; + if (!(p51 = p5->next)) { +#ifdef MULTIPLE_THREADS + ACQUIRE_DTOA_LOCK(1); + if (!(p51 = p5->next)) { + p51 = p5->next = mult(p5,p5); + p51->next = 0; + } + FREE_DTOA_LOCK(1); +#else + p51 = p5->next = mult(p5,p5); + p51->next = 0; +#endif + } + p5 = p51; + } + return b; +} + +static Bigint * +lshift(Bigint *b, int k) +{ + int i, k1, n, n1; + Bigint *b1; + ULong *x, *x1, *xe, z; + +#ifdef Pack_32 + n = k >> 5; +#else + n = k >> 4; +#endif + k1 = b->k; + n1 = n + b->wds + 1; + for (i = b->maxwds; n1 > i; i <<= 1) + k1++; + b1 = Balloc(k1); + x1 = b1->x; + for (i = 0; i < n; i++) + *x1++ = 0; + x = b->x; + xe = x + b->wds; +#ifdef Pack_32 + if (k &= 0x1f) { + k1 = 32 - k; + z = 0; + do { + *x1++ = *x << k | z; + z = *x++ >> k1; + } while (x < xe); + if ((*x1 = z) != 0) + ++n1; + } +#else + if (k &= 0xf) { + k1 = 16 - k; + z = 0; + do { + *x1++ = *x << k & 0xffff | z; + z = *x++ >> k1; + } while (x < xe); + if (*x1 = z) + ++n1; + } +#endif + else + do { + *x1++ = *x++; + } while (x < xe); + b1->wds = n1 - 1; + Bfree(b); + return b1; +} + +static int +cmp(Bigint *a, Bigint *b) +{ + ULong *xa, *xa0, *xb, *xb0; + int i, j; + + i = a->wds; + j = b->wds; +#ifdef DEBUG + if (i > 1 && !a->x[i-1]) + Bug("cmp called with a->x[a->wds-1] == 0"); + if (j > 1 && !b->x[j-1]) + Bug("cmp called with b->x[b->wds-1] == 0"); +#endif + if (i -= j) + return i; + xa0 = a->x; + xa = xa0 + j; + xb0 = b->x; + xb = xb0 + j; + for (;;) { + if (*--xa != *--xb) + return *xa < *xb ? -1 : 1; + if (xa <= xa0) + break; + } + return 0; +} + +static Bigint * +diff(Bigint *a, Bigint *b) +{ + Bigint *c; + int i, wa, wb; + ULong *xa, *xae, *xb, *xbe, *xc; +#ifdef ULLong + ULLong borrow, y; +#else + ULong borrow, y; +#ifdef Pack_32 + ULong z; +#endif +#endif + + i = cmp(a,b); + if (!i) { + c = Balloc(0); + c->wds = 1; + c->x[0] = 0; + return c; + } + if (i < 0) { + c = a; + a = b; + b = c; + i = 1; + } + else + i = 0; + c = Balloc(a->k); + c->sign = i; + wa = a->wds; + xa = a->x; + xae = xa + wa; + wb = b->wds; + xb = b->x; + xbe = xb + wb; + xc = c->x; + borrow = 0; +#ifdef ULLong + do { + y = (ULLong)*xa++ - *xb++ - borrow; + borrow = y >> 32 & (ULong)1; + *xc++ = y & FFFFFFFF; + } while (xb < xbe); + while (xa < xae) { + y = *xa++ - borrow; + borrow = y >> 32 & (ULong)1; + *xc++ = y & FFFFFFFF; + } +#else +#ifdef Pack_32 + do { + y = (*xa & 0xffff) - (*xb & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*xa++ >> 16) - (*xb++ >> 16) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(xc, z, y); + } while (xb < xbe); + while (xa < xae) { + y = (*xa & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*xa++ >> 16) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(xc, z, y); + } +#else + do { + y = *xa++ - *xb++ - borrow; + borrow = (y & 0x10000) >> 16; + *xc++ = y & 0xffff; + } while (xb < xbe); + while (xa < xae) { + y = *xa++ - borrow; + borrow = (y & 0x10000) >> 16; + *xc++ = y & 0xffff; + } +#endif +#endif + while (!*--xc) + wa--; + c->wds = wa; + return c; +} + +static double +ulp(double x_) +{ + register Long L; + double_u x, a; + dval(x) = x_; + + L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1; +#ifndef Avoid_Underflow +#ifndef Sudden_Underflow + if (L > 0) { +#endif +#endif +#ifdef IBM + L |= Exp_msk1 >> 4; +#endif + word0(a) = L; + word1(a) = 0; +#ifndef Avoid_Underflow +#ifndef Sudden_Underflow + } + else { + L = -L >> Exp_shift; + if (L < Exp_shift) { + word0(a) = 0x80000 >> L; + word1(a) = 0; + } + else { + word0(a) = 0; + L -= Exp_shift; + word1(a) = L >= 31 ? 1 : 1 << 31 - L; + } + } +#endif +#endif + return dval(a); +} + +static double +b2d(Bigint *a, int *e) +{ + ULong *xa, *xa0, w, y, z; + int k; + double_u d; +#ifdef VAX + ULong d0, d1; +#else +#define d0 word0(d) +#define d1 word1(d) +#endif + + xa0 = a->x; + xa = xa0 + a->wds; + y = *--xa; +#ifdef DEBUG + if (!y) Bug("zero y in b2d"); +#endif + k = hi0bits(y); + *e = 32 - k; +#ifdef Pack_32 + if (k < Ebits) { + d0 = Exp_1 | y >> (Ebits - k); + w = xa > xa0 ? *--xa : 0; + d1 = y << ((32-Ebits) + k) | w >> (Ebits - k); + goto ret_d; + } + z = xa > xa0 ? *--xa : 0; + if (k -= Ebits) { + d0 = Exp_1 | y << k | z >> (32 - k); + y = xa > xa0 ? *--xa : 0; + d1 = z << k | y >> (32 - k); + } + else { + d0 = Exp_1 | y; + d1 = z; + } +#else + if (k < Ebits + 16) { + z = xa > xa0 ? *--xa : 0; + d0 = Exp_1 | y << k - Ebits | z >> Ebits + 16 - k; + w = xa > xa0 ? *--xa : 0; + y = xa > xa0 ? *--xa : 0; + d1 = z << k + 16 - Ebits | w << k - Ebits | y >> 16 + Ebits - k; + goto ret_d; + } + z = xa > xa0 ? *--xa : 0; + w = xa > xa0 ? *--xa : 0; + k -= Ebits + 16; + d0 = Exp_1 | y << k + 16 | z << k | w >> 16 - k; + y = xa > xa0 ? *--xa : 0; + d1 = w << k + 16 | y << k; +#endif +ret_d: +#ifdef VAX + word0(d) = d0 >> 16 | d0 << 16; + word1(d) = d1 >> 16 | d1 << 16; +#else +#undef d0 +#undef d1 +#endif + return dval(d); +} + +static Bigint * +d2b(double d_, int *e, int *bits) +{ + double_u d; + Bigint *b; + int de, k; + ULong *x, y, z; +#ifndef Sudden_Underflow + int i; +#endif +#ifdef VAX + ULong d0, d1; +#endif + dval(d) = d_; +#ifdef VAX + d0 = word0(d) >> 16 | word0(d) << 16; + d1 = word1(d) >> 16 | word1(d) << 16; +#else +#define d0 word0(d) +#define d1 word1(d) +#endif + +#ifdef Pack_32 + b = Balloc(1); +#else + b = Balloc(2); +#endif + x = b->x; + + z = d0 & Frac_mask; + d0 &= 0x7fffffff; /* clear sign bit, which we ignore */ +#ifdef Sudden_Underflow + de = (int)(d0 >> Exp_shift); +#ifndef IBM + z |= Exp_msk11; +#endif +#else + if ((de = (int)(d0 >> Exp_shift)) != 0) + z |= Exp_msk1; +#endif +#ifdef Pack_32 + if ((y = d1) != 0) { + if ((k = lo0bits(&y)) != 0) { + x[0] = y | z << (32 - k); + z >>= k; + } + else + x[0] = y; +#ifndef Sudden_Underflow + i = +#endif + b->wds = (x[1] = z) ? 2 : 1; + } + else { +#ifdef DEBUG + if (!z) + Bug("Zero passed to d2b"); +#endif + k = lo0bits(&z); + x[0] = z; +#ifndef Sudden_Underflow + i = +#endif + b->wds = 1; + k += 32; + } +#else + if (y = d1) { + if (k = lo0bits(&y)) + if (k >= 16) { + x[0] = y | z << 32 - k & 0xffff; + x[1] = z >> k - 16 & 0xffff; + x[2] = z >> k; + i = 2; + } + else { + x[0] = y & 0xffff; + x[1] = y >> 16 | z << 16 - k & 0xffff; + x[2] = z >> k & 0xffff; + x[3] = z >> k+16; + i = 3; + } + else { + x[0] = y & 0xffff; + x[1] = y >> 16; + x[2] = z & 0xffff; + x[3] = z >> 16; + i = 3; + } + } + else { +#ifdef DEBUG + if (!z) + Bug("Zero passed to d2b"); +#endif + k = lo0bits(&z); + if (k >= 16) { + x[0] = z; + i = 0; + } + else { + x[0] = z & 0xffff; + x[1] = z >> 16; + i = 1; + } + k += 32; + } + while (!x[i]) + --i; + b->wds = i + 1; +#endif +#ifndef Sudden_Underflow + if (de) { +#endif +#ifdef IBM + *e = (de - Bias - (P-1) << 2) + k; + *bits = 4*P + 8 - k - hi0bits(word0(d) & Frac_mask); +#else + *e = de - Bias - (P-1) + k; + *bits = P - k; +#endif +#ifndef Sudden_Underflow + } + else { + *e = de - Bias - (P-1) + 1 + k; +#ifdef Pack_32 + *bits = 32*i - hi0bits(x[i-1]); +#else + *bits = (i+2)*16 - hi0bits(x[i]); +#endif + } +#endif + return b; +} +#undef d0 +#undef d1 + +static double +ratio(Bigint *a, Bigint *b) +{ + double_u da, db; + int k, ka, kb; + + dval(da) = b2d(a, &ka); + dval(db) = b2d(b, &kb); +#ifdef Pack_32 + k = ka - kb + 32*(a->wds - b->wds); +#else + k = ka - kb + 16*(a->wds - b->wds); +#endif +#ifdef IBM + if (k > 0) { + word0(da) += (k >> 2)*Exp_msk1; + if (k &= 3) + dval(da) *= 1 << k; + } + else { + k = -k; + word0(db) += (k >> 2)*Exp_msk1; + if (k &= 3) + dval(db) *= 1 << k; + } +#else + if (k > 0) + word0(da) += k*Exp_msk1; + else { + k = -k; + word0(db) += k*Exp_msk1; + } +#endif + return dval(da) / dval(db); +} + +static const double +tens[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22 +#ifdef VAX + , 1e23, 1e24 +#endif +}; + +static const double +#ifdef IEEE_Arith +bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 }; +static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, +#ifdef Avoid_Underflow + 9007199254740992.*9007199254740992.e-256 + /* = 2^106 * 1e-53 */ +#else + 1e-256 +#endif +}; +/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */ +/* flag unnecessarily. It leads to a song and dance at the end of strtod. */ +#define Scale_Bit 0x10 +#define n_bigtens 5 +#else +#ifdef IBM +bigtens[] = { 1e16, 1e32, 1e64 }; +static const double tinytens[] = { 1e-16, 1e-32, 1e-64 }; +#define n_bigtens 3 +#else +bigtens[] = { 1e16, 1e32 }; +static const double tinytens[] = { 1e-16, 1e-32 }; +#define n_bigtens 2 +#endif +#endif + +#ifndef IEEE_Arith +#undef INFNAN_CHECK +#endif + +#ifdef INFNAN_CHECK + +#ifndef NAN_WORD0 +#define NAN_WORD0 0x7ff80000 +#endif + +#ifndef NAN_WORD1 +#define NAN_WORD1 0 +#endif + +static int +match(const char **sp, char *t) +{ + int c, d; + const char *s = *sp; + + while (d = *t++) { + if ((c = *++s) >= 'A' && c <= 'Z') + c += 'a' - 'A'; + if (c != d) + return 0; + } + *sp = s + 1; + return 1; +} + +#ifndef No_Hex_NaN +static void +hexnan(double *rvp, const char **sp) +{ + ULong c, x[2]; + const char *s; + int havedig, udx0, xshift; + + x[0] = x[1] = 0; + havedig = xshift = 0; + udx0 = 1; + s = *sp; + while (c = *(const unsigned char*)++s) { + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'f') + c += 10 - 'a'; + else if (c >= 'A' && c <= 'F') + c += 10 - 'A'; + else if (c <= ' ') { + if (udx0 && havedig) { + udx0 = 0; + xshift = 1; + } + continue; + } + else if (/*(*/ c == ')' && havedig) { + *sp = s + 1; + break; + } + else + return; /* invalid form: don't change *sp */ + havedig = 1; + if (xshift) { + xshift = 0; + x[0] = x[1]; + x[1] = 0; + } + if (udx0) + x[0] = (x[0] << 4) | (x[1] >> 28); + x[1] = (x[1] << 4) | c; + } + if ((x[0] &= 0xfffff) || x[1]) { + word0(*rvp) = Exp_mask | x[0]; + word1(*rvp) = x[1]; + } +} +#endif /*No_Hex_NaN*/ +#endif /* INFNAN_CHECK */ + +double +ruby_strtod(const char *s00, char **se) +{ +#ifdef Avoid_Underflow + int scale; +#endif + int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, dsign, + e, e1, esign, i, j, k, nd, nd0, nf, nz, nz0, sign; + const char *s, *s0, *s1; + double aadj, adj; + double_u aadj1, rv, rv0; + Long L; + ULong y, z; + Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; +#ifdef SET_INEXACT + int inexact, oldinexact; +#endif +#ifdef Honor_FLT_ROUNDS + int rounding; +#endif +#ifdef USE_LOCALE + const char *s2; +#endif + + errno = 0; + sign = nz0 = nz = 0; + dval(rv) = 0.; + for (s = s00;;s++) + switch (*s) { + case '-': + sign = 1; + /* no break */ + case '+': + if (*++s) + goto break2; + /* no break */ + case 0: + goto ret0; + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + case ' ': + continue; + default: + goto break2; + } +break2: + if (*s == '0') { + nz0 = 1; + while (*++s == '0') ; + if (!*s) + goto ret; + } + s0 = s; + y = z = 0; + for (nd = nf = 0; (c = *s) >= '0' && c <= '9'; nd++, s++) + if (nd < 9) + y = 10*y + c - '0'; + else if (nd < 16) + z = 10*z + c - '0'; + nd0 = nd; +#ifdef USE_LOCALE + s1 = localeconv()->decimal_point; + if (c == *s1) { + c = '.'; + if (*++s1) { + s2 = s; + for (;;) { + if (*++s2 != *s1) { + c = 0; + break; + } + if (!*++s1) { + s = s2; + break; + } + } + } + } +#endif + if (c == '.') { + if (!ISDIGIT(s[1])) + goto dig_done; + c = *++s; + if (!nd) { + for (; c == '0'; c = *++s) + nz++; + if (c > '0' && c <= '9') { + s0 = s; + nf += nz; + nz = 0; + goto have_dig; + } + goto dig_done; + } + for (; c >= '0' && c <= '9'; c = *++s) { +have_dig: + nz++; + if (c -= '0') { + nf += nz; + for (i = 1; i < nz; i++) + if (nd++ < 9) + y *= 10; + else if (nd <= DBL_DIG + 1) + z *= 10; + if (nd++ < 9) + y = 10*y + c; + else if (nd <= DBL_DIG + 1) + z = 10*z + c; + nz = 0; + } + } + } +dig_done: + e = 0; + if (c == 'e' || c == 'E') { + if (!nd && !nz && !nz0) { + goto ret0; + } + s00 = s; + esign = 0; + switch (c = *++s) { + case '-': + esign = 1; + case '+': + c = *++s; + } + if (c >= '0' && c <= '9') { + while (c == '0') + c = *++s; + if (c > '0' && c <= '9') { + L = c - '0'; + s1 = s; + while ((c = *++s) >= '0' && c <= '9') + L = 10*L + c - '0'; + if (s - s1 > 8 || L > 19999) + /* Avoid confusion from exponents + * so large that e might overflow. + */ + e = 19999; /* safe for 16 bit ints */ + else + e = (int)L; + if (esign) + e = -e; + } + else + e = 0; + } + else + s = s00; + } + if (!nd) { + if (!nz && !nz0) { +#ifdef INFNAN_CHECK + /* Check for Nan and Infinity */ + switch (c) { + case 'i': + case 'I': + if (match(&s,"nf")) { + --s; + if (!match(&s,"inity")) + ++s; + word0(rv) = 0x7ff00000; + word1(rv) = 0; + goto ret; + } + break; + case 'n': + case 'N': + if (match(&s, "an")) { + word0(rv) = NAN_WORD0; + word1(rv) = NAN_WORD1; +#ifndef No_Hex_NaN + if (*s == '(') /*)*/ + hexnan(&rv, &s); +#endif + goto ret; + } + } +#endif /* INFNAN_CHECK */ +ret0: + s = s00; + sign = 0; + } + goto ret; + } + e1 = e -= nf; + + /* Now we have nd0 digits, starting at s0, followed by a + * decimal point, followed by nd-nd0 digits. The number we're + * after is the integer represented by those digits times + * 10**e */ + + if (!nd0) + nd0 = nd; + k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1; + dval(rv) = y; + if (k > 9) { +#ifdef SET_INEXACT + if (k > DBL_DIG) + oldinexact = get_inexact(); +#endif + dval(rv) = tens[k - 9] * dval(rv) + z; + } + bd0 = bb = bd = bs = delta = 0; + if (nd <= DBL_DIG +#ifndef RND_PRODQUOT +#ifndef Honor_FLT_ROUNDS + && Flt_Rounds == 1 +#endif +#endif + ) { + if (!e) + goto ret; + if (e > 0) { + if (e <= Ten_pmax) { +#ifdef VAX + goto vax_ovfl_check; +#else +#ifdef Honor_FLT_ROUNDS + /* round correctly FLT_ROUNDS = 2 or 3 */ + if (sign) { + rv = -rv; + sign = 0; + } +#endif + /* rv = */ rounded_product(dval(rv), tens[e]); + goto ret; +#endif + } + i = DBL_DIG - nd; + if (e <= Ten_pmax + i) { + /* A fancier test would sometimes let us do + * this for larger i values. + */ +#ifdef Honor_FLT_ROUNDS + /* round correctly FLT_ROUNDS = 2 or 3 */ + if (sign) { + rv = -rv; + sign = 0; + } +#endif + e -= i; + dval(rv) *= tens[i]; +#ifdef VAX + /* VAX exponent range is so narrow we must + * worry about overflow here... + */ +vax_ovfl_check: + word0(rv) -= P*Exp_msk1; + /* rv = */ rounded_product(dval(rv), tens[e]); + if ((word0(rv) & Exp_mask) + > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) + goto ovfl; + word0(rv) += P*Exp_msk1; +#else + /* rv = */ rounded_product(dval(rv), tens[e]); +#endif + goto ret; + } + } +#ifndef Inaccurate_Divide + else if (e >= -Ten_pmax) { +#ifdef Honor_FLT_ROUNDS + /* round correctly FLT_ROUNDS = 2 or 3 */ + if (sign) { + rv = -rv; + sign = 0; + } +#endif + /* rv = */ rounded_quotient(dval(rv), tens[-e]); + goto ret; + } +#endif + } + e1 += nd - k; + +#ifdef IEEE_Arith +#ifdef SET_INEXACT + inexact = 1; + if (k <= DBL_DIG) + oldinexact = get_inexact(); +#endif +#ifdef Avoid_Underflow + scale = 0; +#endif +#ifdef Honor_FLT_ROUNDS + if ((rounding = Flt_Rounds) >= 2) { + if (sign) + rounding = rounding == 2 ? 0 : 2; + else + if (rounding != 2) + rounding = 0; + } +#endif +#endif /*IEEE_Arith*/ + + /* Get starting approximation = rv * 10**e1 */ + + if (e1 > 0) { + if ((i = e1 & 15) != 0) + dval(rv) *= tens[i]; + if (e1 &= ~15) { + if (e1 > DBL_MAX_10_EXP) { +ovfl: +#ifndef NO_ERRNO + errno = ERANGE; +#endif + /* Can't trust HUGE_VAL */ +#ifdef IEEE_Arith +#ifdef Honor_FLT_ROUNDS + switch (rounding) { + case 0: /* toward 0 */ + case 3: /* toward -infinity */ + word0(rv) = Big0; + word1(rv) = Big1; + break; + default: + word0(rv) = Exp_mask; + word1(rv) = 0; + } +#else /*Honor_FLT_ROUNDS*/ + word0(rv) = Exp_mask; + word1(rv) = 0; +#endif /*Honor_FLT_ROUNDS*/ +#ifdef SET_INEXACT + /* set overflow bit */ + dval(rv0) = 1e300; + dval(rv0) *= dval(rv0); +#endif +#else /*IEEE_Arith*/ + word0(rv) = Big0; + word1(rv) = Big1; +#endif /*IEEE_Arith*/ + if (bd0) + goto retfree; + goto ret; + } + e1 >>= 4; + for (j = 0; e1 > 1; j++, e1 >>= 1) + if (e1 & 1) + dval(rv) *= bigtens[j]; + /* The last multiplication could overflow. */ + word0(rv) -= P*Exp_msk1; + dval(rv) *= bigtens[j]; + if ((z = word0(rv) & Exp_mask) + > Exp_msk1*(DBL_MAX_EXP+Bias-P)) + goto ovfl; + if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) { + /* set to largest number */ + /* (Can't trust DBL_MAX) */ + word0(rv) = Big0; + word1(rv) = Big1; + } + else + word0(rv) += P*Exp_msk1; + } + } + else if (e1 < 0) { + e1 = -e1; + if ((i = e1 & 15) != 0) + dval(rv) /= tens[i]; + if (e1 >>= 4) { + if (e1 >= 1 << n_bigtens) + goto undfl; +#ifdef Avoid_Underflow + if (e1 & Scale_Bit) + scale = 2*P; + for (j = 0; e1 > 0; j++, e1 >>= 1) + if (e1 & 1) + dval(rv) *= tinytens[j]; + if (scale && (j = 2*P + 1 - ((word0(rv) & Exp_mask) + >> Exp_shift)) > 0) { + /* scaled rv is denormal; zap j low bits */ + if (j >= 32) { + word1(rv) = 0; + if (j >= 53) + word0(rv) = (P+2)*Exp_msk1; + else + word0(rv) &= 0xffffffff << (j-32); + } + else + word1(rv) &= 0xffffffff << j; + } +#else + for (j = 0; e1 > 1; j++, e1 >>= 1) + if (e1 & 1) + dval(rv) *= tinytens[j]; + /* The last multiplication could underflow. */ + dval(rv0) = dval(rv); + dval(rv) *= tinytens[j]; + if (!dval(rv)) { + dval(rv) = 2.*dval(rv0); + dval(rv) *= tinytens[j]; +#endif + if (!dval(rv)) { +undfl: + dval(rv) = 0.; +#ifndef NO_ERRNO + errno = ERANGE; +#endif + if (bd0) + goto retfree; + goto ret; + } +#ifndef Avoid_Underflow + word0(rv) = Tiny0; + word1(rv) = Tiny1; + /* The refinement below will clean + * this approximation up. + */ + } +#endif + } + } + + /* Now the hard part -- adjusting rv to the correct value.*/ + + /* Put digits into bd: true value = bd * 10^e */ + + bd0 = s2b(s0, nd0, nd, y); + + for (;;) { + bd = Balloc(bd0->k); + Bcopy(bd, bd0); + bb = d2b(dval(rv), &bbe, &bbbits); /* rv = bb * 2^bbe */ + bs = i2b(1); + + if (e >= 0) { + bb2 = bb5 = 0; + bd2 = bd5 = e; + } + else { + bb2 = bb5 = -e; + bd2 = bd5 = 0; + } + if (bbe >= 0) + bb2 += bbe; + else + bd2 -= bbe; + bs2 = bb2; +#ifdef Honor_FLT_ROUNDS + if (rounding != 1) + bs2++; +#endif +#ifdef Avoid_Underflow + j = bbe - scale; + i = j + bbbits - 1; /* logb(rv) */ + if (i < Emin) /* denormal */ + j += P - Emin; + else + j = P + 1 - bbbits; +#else /*Avoid_Underflow*/ +#ifdef Sudden_Underflow +#ifdef IBM + j = 1 + 4*P - 3 - bbbits + ((bbe + bbbits - 1) & 3); +#else + j = P + 1 - bbbits; +#endif +#else /*Sudden_Underflow*/ + j = bbe; + i = j + bbbits - 1; /* logb(rv) */ + if (i < Emin) /* denormal */ + j += P - Emin; + else + j = P + 1 - bbbits; +#endif /*Sudden_Underflow*/ +#endif /*Avoid_Underflow*/ + bb2 += j; + bd2 += j; +#ifdef Avoid_Underflow + bd2 += scale; +#endif + i = bb2 < bd2 ? bb2 : bd2; + if (i > bs2) + i = bs2; + if (i > 0) { + bb2 -= i; + bd2 -= i; + bs2 -= i; + } + if (bb5 > 0) { + bs = pow5mult(bs, bb5); + bb1 = mult(bs, bb); + Bfree(bb); + bb = bb1; + } + if (bb2 > 0) + bb = lshift(bb, bb2); + if (bd5 > 0) + bd = pow5mult(bd, bd5); + if (bd2 > 0) + bd = lshift(bd, bd2); + if (bs2 > 0) + bs = lshift(bs, bs2); + delta = diff(bb, bd); + dsign = delta->sign; + delta->sign = 0; + i = cmp(delta, bs); +#ifdef Honor_FLT_ROUNDS + if (rounding != 1) { + if (i < 0) { + /* Error is less than an ulp */ + if (!delta->x[0] && delta->wds <= 1) { + /* exact */ +#ifdef SET_INEXACT + inexact = 0; +#endif + break; + } + if (rounding) { + if (dsign) { + adj = 1.; + goto apply_adj; + } + } + else if (!dsign) { + adj = -1.; + if (!word1(rv) + && !(word0(rv) & Frac_mask)) { + y = word0(rv) & Exp_mask; +#ifdef Avoid_Underflow + if (!scale || y > 2*P*Exp_msk1) +#else + if (y) +#endif + { + delta = lshift(delta,Log2P); + if (cmp(delta, bs) <= 0) + adj = -0.5; + } + } +apply_adj: +#ifdef Avoid_Underflow + if (scale && (y = word0(rv) & Exp_mask) + <= 2*P*Exp_msk1) + word0(adj) += (2*P+1)*Exp_msk1 - y; +#else +#ifdef Sudden_Underflow + if ((word0(rv) & Exp_mask) <= + P*Exp_msk1) { + word0(rv) += P*Exp_msk1; + dval(rv) += adj*ulp(dval(rv)); + word0(rv) -= P*Exp_msk1; + } + else +#endif /*Sudden_Underflow*/ +#endif /*Avoid_Underflow*/ + dval(rv) += adj*ulp(dval(rv)); + } + break; + } + adj = ratio(delta, bs); + if (adj < 1.) + adj = 1.; + if (adj <= 0x7ffffffe) { + /* adj = rounding ? ceil(adj) : floor(adj); */ + y = adj; + if (y != adj) { + if (!((rounding>>1) ^ dsign)) + y++; + adj = y; + } + } +#ifdef Avoid_Underflow + if (scale && (y = word0(rv) & Exp_mask) <= 2*P*Exp_msk1) + word0(adj) += (2*P+1)*Exp_msk1 - y; +#else +#ifdef Sudden_Underflow + if ((word0(rv) & Exp_mask) <= P*Exp_msk1) { + word0(rv) += P*Exp_msk1; + adj *= ulp(dval(rv)); + if (dsign) + dval(rv) += adj; + else + dval(rv) -= adj; + word0(rv) -= P*Exp_msk1; + goto cont; + } +#endif /*Sudden_Underflow*/ +#endif /*Avoid_Underflow*/ + adj *= ulp(dval(rv)); + if (dsign) + dval(rv) += adj; + else + dval(rv) -= adj; + goto cont; + } +#endif /*Honor_FLT_ROUNDS*/ + + if (i < 0) { + /* Error is less than half an ulp -- check for + * special case of mantissa a power of two. + */ + if (dsign || word1(rv) || word0(rv) & Bndry_mask +#ifdef IEEE_Arith +#ifdef Avoid_Underflow + || (word0(rv) & Exp_mask) <= (2*P+1)*Exp_msk1 +#else + || (word0(rv) & Exp_mask) <= Exp_msk1 +#endif +#endif + ) { +#ifdef SET_INEXACT + if (!delta->x[0] && delta->wds <= 1) + inexact = 0; +#endif + break; + } + if (!delta->x[0] && delta->wds <= 1) { + /* exact result */ +#ifdef SET_INEXACT + inexact = 0; +#endif + break; + } + delta = lshift(delta,Log2P); + if (cmp(delta, bs) > 0) + goto drop_down; + break; + } + if (i == 0) { + /* exactly half-way between */ + if (dsign) { + if ((word0(rv) & Bndry_mask1) == Bndry_mask1 + && word1(rv) == ( +#ifdef Avoid_Underflow + (scale && (y = word0(rv) & Exp_mask) <= 2*P*Exp_msk1) + ? (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) : +#endif + 0xffffffff)) { + /*boundary case -- increment exponent*/ + word0(rv) = (word0(rv) & Exp_mask) + + Exp_msk1 +#ifdef IBM + | Exp_msk1 >> 4 +#endif + ; + word1(rv) = 0; +#ifdef Avoid_Underflow + dsign = 0; +#endif + break; + } + } + else if (!(word0(rv) & Bndry_mask) && !word1(rv)) { +drop_down: + /* boundary case -- decrement exponent */ +#ifdef Sudden_Underflow /*{{*/ + L = word0(rv) & Exp_mask; +#ifdef IBM + if (L < Exp_msk1) +#else +#ifdef Avoid_Underflow + if (L <= (scale ? (2*P+1)*Exp_msk1 : Exp_msk1)) +#else + if (L <= Exp_msk1) +#endif /*Avoid_Underflow*/ +#endif /*IBM*/ + goto undfl; + L -= Exp_msk1; +#else /*Sudden_Underflow}{*/ +#ifdef Avoid_Underflow + if (scale) { + L = word0(rv) & Exp_mask; + if (L <= (2*P+1)*Exp_msk1) { + if (L > (P+2)*Exp_msk1) + /* round even ==> */ + /* accept rv */ + break; + /* rv = smallest denormal */ + goto undfl; + } + } +#endif /*Avoid_Underflow*/ + L = (word0(rv) & Exp_mask) - Exp_msk1; +#endif /*Sudden_Underflow}}*/ + word0(rv) = L | Bndry_mask1; + word1(rv) = 0xffffffff; +#ifdef IBM + goto cont; +#else + break; +#endif + } +#ifndef ROUND_BIASED + if (!(word1(rv) & LSB)) + break; +#endif + if (dsign) + dval(rv) += ulp(dval(rv)); +#ifndef ROUND_BIASED + else { + dval(rv) -= ulp(dval(rv)); +#ifndef Sudden_Underflow + if (!dval(rv)) + goto undfl; +#endif + } +#ifdef Avoid_Underflow + dsign = 1 - dsign; +#endif +#endif + break; + } + if ((aadj = ratio(delta, bs)) <= 2.) { + if (dsign) + aadj = dval(aadj1) = 1.; + else if (word1(rv) || word0(rv) & Bndry_mask) { +#ifndef Sudden_Underflow + if (word1(rv) == Tiny1 && !word0(rv)) + goto undfl; +#endif + aadj = 1.; + dval(aadj1) = -1.; + } + else { + /* special case -- power of FLT_RADIX to be */ + /* rounded down... */ + + if (aadj < 2./FLT_RADIX) + aadj = 1./FLT_RADIX; + else + aadj *= 0.5; + dval(aadj1) = -aadj; + } + } + else { + aadj *= 0.5; + dval(aadj1) = dsign ? aadj : -aadj; +#ifdef Check_FLT_ROUNDS + switch (Rounding) { + case 2: /* towards +infinity */ + aadj1 -= 0.5; + break; + case 0: /* towards 0 */ + case 3: /* towards -infinity */ + aadj1 += 0.5; + } +#else + if (Flt_Rounds == 0) + dval(aadj1) += 0.5; +#endif /*Check_FLT_ROUNDS*/ + } + y = word0(rv) & Exp_mask; + + /* Check for overflow */ + + if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) { + dval(rv0) = dval(rv); + word0(rv) -= P*Exp_msk1; + adj = dval(aadj1) * ulp(dval(rv)); + dval(rv) += adj; + if ((word0(rv) & Exp_mask) >= + Exp_msk1*(DBL_MAX_EXP+Bias-P)) { + if (word0(rv0) == Big0 && word1(rv0) == Big1) + goto ovfl; + word0(rv) = Big0; + word1(rv) = Big1; + goto cont; + } + else + word0(rv) += P*Exp_msk1; + } + else { +#ifdef Avoid_Underflow + if (scale && y <= 2*P*Exp_msk1) { + if (aadj <= 0x7fffffff) { + if ((z = aadj) <= 0) + z = 1; + aadj = z; + dval(aadj1) = dsign ? aadj : -aadj; + } + word0(aadj1) += (2*P+1)*Exp_msk1 - y; + } + adj = dval(aadj1) * ulp(dval(rv)); + dval(rv) += adj; +#else +#ifdef Sudden_Underflow + if ((word0(rv) & Exp_mask) <= P*Exp_msk1) { + dval(rv0) = dval(rv); + word0(rv) += P*Exp_msk1; + adj = aadj1 * ulp(dval(rv)); + dval(rv) += adj; +#ifdef IBM + if ((word0(rv) & Exp_mask) < P*Exp_msk1) +#else + if ((word0(rv) & Exp_mask) <= P*Exp_msk1) +#endif + { + if (word0(rv0) == Tiny0 && word1(rv0) == Tiny1) + goto undfl; + word0(rv) = Tiny0; + word1(rv) = Tiny1; + goto cont; + } + else + word0(rv) -= P*Exp_msk1; + } + else { + adj = aadj1 * ulp(dval(rv)); + dval(rv) += adj; + } +#else /*Sudden_Underflow*/ + /* Compute adj so that the IEEE rounding rules will + * correctly round rv + adj in some half-way cases. + * If rv * ulp(rv) is denormalized (i.e., + * y <= (P-1)*Exp_msk1), we must adjust aadj to avoid + * trouble from bits lost to denormalization; + * example: 1.2e-307 . + */ + if (y <= (P-1)*Exp_msk1 && aadj > 1.) { + aadj1 = (double)(int)(aadj + 0.5); + if (!dsign) + aadj1 = -aadj1; + } + adj = aadj1 * ulp(dval(rv)); + dval(rv) += adj; +#endif /*Sudden_Underflow*/ +#endif /*Avoid_Underflow*/ + } + z = word0(rv) & Exp_mask; +#ifndef SET_INEXACT +#ifdef Avoid_Underflow + if (!scale) +#endif + if (y == z) { + /* Can we stop now? */ + L = (Long)aadj; + aadj -= L; + /* The tolerances below are conservative. */ + if (dsign || word1(rv) || word0(rv) & Bndry_mask) { + if (aadj < .4999999 || aadj > .5000001) + break; + } + else if (aadj < .4999999/FLT_RADIX) + break; + } +#endif +cont: + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(delta); + } +#ifdef SET_INEXACT + if (inexact) { + if (!oldinexact) { + word0(rv0) = Exp_1 + (70 << Exp_shift); + word1(rv0) = 0; + dval(rv0) += 1.; + } + } + else if (!oldinexact) + clear_inexact(); +#endif +#ifdef Avoid_Underflow + if (scale) { + word0(rv0) = Exp_1 - 2*P*Exp_msk1; + word1(rv0) = 0; + dval(rv) *= dval(rv0); +#ifndef NO_ERRNO + /* try to avoid the bug of testing an 8087 register value */ + if (word0(rv) == 0 && word1(rv) == 0) + errno = ERANGE; +#endif + } +#endif /* Avoid_Underflow */ +#ifdef SET_INEXACT + if (inexact && !(word0(rv) & Exp_mask)) { + /* set underflow bit */ + dval(rv0) = 1e-300; + dval(rv0) *= dval(rv0); + } +#endif +retfree: + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(bd0); + Bfree(delta); +ret: + if (se) + *se = (char *)s; + return sign ? -dval(rv) : dval(rv); +} + +static int +quorem(Bigint *b, Bigint *S) +{ + int n; + ULong *bx, *bxe, q, *sx, *sxe; +#ifdef ULLong + ULLong borrow, carry, y, ys; +#else + ULong borrow, carry, y, ys; +#ifdef Pack_32 + ULong si, z, zs; +#endif +#endif + + n = S->wds; +#ifdef DEBUG + /*debug*/ if (b->wds > n) + /*debug*/ Bug("oversize b in quorem"); +#endif + if (b->wds < n) + return 0; + sx = S->x; + sxe = sx + --n; + bx = b->x; + bxe = bx + n; + q = *bxe / (*sxe + 1); /* ensure q <= true quotient */ +#ifdef DEBUG + /*debug*/ if (q > 9) + /*debug*/ Bug("oversized quotient in quorem"); +#endif + if (q) { + borrow = 0; + carry = 0; + do { +#ifdef ULLong + ys = *sx++ * (ULLong)q + carry; + carry = ys >> 32; + y = *bx - (ys & FFFFFFFF) - borrow; + borrow = y >> 32 & (ULong)1; + *bx++ = y & FFFFFFFF; +#else +#ifdef Pack_32 + si = *sx++; + ys = (si & 0xffff) * q + carry; + zs = (si >> 16) * q + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*bx >> 16) - (zs & 0xffff) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(bx, z, y); +#else + ys = *sx++ * q + carry; + carry = ys >> 16; + y = *bx - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + *bx++ = y & 0xffff; +#endif +#endif + } while (sx <= sxe); + if (!*bxe) { + bx = b->x; + while (--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + if (cmp(b, S) >= 0) { + q++; + borrow = 0; + carry = 0; + bx = b->x; + sx = S->x; + do { +#ifdef ULLong + ys = *sx++ + carry; + carry = ys >> 32; + y = *bx - (ys & FFFFFFFF) - borrow; + borrow = y >> 32 & (ULong)1; + *bx++ = y & FFFFFFFF; +#else +#ifdef Pack_32 + si = *sx++; + ys = (si & 0xffff) + carry; + zs = (si >> 16) + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*bx >> 16) - (zs & 0xffff) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(bx, z, y); +#else + ys = *sx++ + carry; + carry = ys >> 16; + y = *bx - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + *bx++ = y & 0xffff; +#endif +#endif + } while (sx <= sxe); + bx = b->x; + bxe = bx + n; + if (!*bxe) { + while (--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + return q; +} + +#ifndef MULTIPLE_THREADS +static char *dtoa_result; +#endif + +#ifndef MULTIPLE_THREADS +static char * +rv_alloc(int i) +{ + return dtoa_result = xmalloc(i); +} +#else +#define rv_alloc(i) xmalloc(i) +#endif + +static char * +nrv_alloc(const char *s, char **rve, int n) +{ + char *rv, *t; + + t = rv = rv_alloc(n); + while ((*t = *s++) != 0) t++; + if (rve) + *rve = t; + return rv; +} + +#define rv_strdup(s, rve) nrv_alloc(s, rve, strlen(s)+1) + +#ifndef MULTIPLE_THREADS +/* freedtoa(s) must be used to free values s returned by dtoa + * when MULTIPLE_THREADS is #defined. It should be used in all cases, + * but for consistency with earlier versions of dtoa, it is optional + * when MULTIPLE_THREADS is not defined. + */ + +static void +freedtoa(char *s) +{ + xfree(s); +} +#endif + +/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string. + * + * Inspired by "How to Print Floating-Point Numbers Accurately" by + * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126]. + * + * Modifications: + * 1. Rather than iterating, we use a simple numeric overestimate + * to determine k = floor(log10(d)). We scale relevant + * quantities using O(log2(k)) rather than O(k) multiplications. + * 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't + * try to generate digits strictly left to right. Instead, we + * compute with fewer bits and propagate the carry if necessary + * when rounding the final digit up. This is often faster. + * 3. Under the assumption that input will be rounded nearest, + * mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22. + * That is, we allow equality in stopping tests when the + * round-nearest rule will give the same floating-point value + * as would satisfaction of the stopping test with strict + * inequality. + * 4. We remove common factors of powers of 2 from relevant + * quantities. + * 5. When converting floating-point integers less than 1e16, + * we use floating-point arithmetic rather than resorting + * to multiple-precision integers. + * 6. When asked to produce fewer than 15 digits, we first try + * to get by with floating-point arithmetic; we resort to + * multiple-precision integer arithmetic only if we cannot + * guarantee that the floating-point calculation has given + * the correctly rounded result. For k requested digits and + * "uniformly" distributed input, the probability is + * something like 10^(k-15) that we must resort to the Long + * calculation. + */ + +char * +ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve) +{ + /* Arguments ndigits, decpt, sign are similar to those + of ecvt and fcvt; trailing zeros are suppressed from + the returned string. If not null, *rve is set to point + to the end of the return value. If d is +-Infinity or NaN, + then *decpt is set to 9999. + + mode: + 0 ==> shortest string that yields d when read in + and rounded to nearest. + 1 ==> like 0, but with Steele & White stopping rule; + e.g. with IEEE P754 arithmetic , mode 0 gives + 1e23 whereas mode 1 gives 9.999999999999999e22. + 2 ==> max(1,ndigits) significant digits. This gives a + return value similar to that of ecvt, except + that trailing zeros are suppressed. + 3 ==> through ndigits past the decimal point. This + gives a return value similar to that from fcvt, + except that trailing zeros are suppressed, and + ndigits can be negative. + 4,5 ==> similar to 2 and 3, respectively, but (in + round-nearest mode) with the tests of mode 0 to + possibly return a shorter string that rounds to d. + With IEEE arithmetic and compilation with + -DHonor_FLT_ROUNDS, modes 4 and 5 behave the same + as modes 2 and 3 when FLT_ROUNDS != 1. + 6-9 ==> Debugging modes similar to mode - 4: don't try + fast floating-point estimate (if applicable). + + Values of mode other than 0-9 are treated as mode 0. + + Sufficient space is allocated to the return value + to hold the suppressed trailing zeros. + */ + + int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1, + j, j1, k, k0, k_check, leftright, m2, m5, s2, s5, + spec_case, try_quick; + Long L; +#ifndef Sudden_Underflow + int denorm; + ULong x; +#endif + Bigint *b, *b1, *delta, *mlo = 0, *mhi = 0, *S; + double ds; + double_u d, d2, eps; + char *s, *s0; +#ifdef Honor_FLT_ROUNDS + int rounding; +#endif +#ifdef SET_INEXACT + int inexact, oldinexact; +#endif + + dval(d) = d_; + +#ifndef MULTIPLE_THREADS + if (dtoa_result) { + freedtoa(dtoa_result); + dtoa_result = 0; + } +#endif + + if (word0(d) & Sign_bit) { + /* set sign for everything, including 0's and NaNs */ + *sign = 1; + word0(d) &= ~Sign_bit; /* clear sign bit */ + } + else + *sign = 0; + +#if defined(IEEE_Arith) + defined(VAX) +#ifdef IEEE_Arith + if ((word0(d) & Exp_mask) == Exp_mask) +#else + if (word0(d) == 0x8000) +#endif + { + /* Infinity or NaN */ + *decpt = 9999; +#ifdef IEEE_Arith + if (!word1(d) && !(word0(d) & 0xfffff)) + return rv_strdup("Infinity", rve); +#endif + return rv_strdup("NaN", rve); + } +#endif +#ifdef IBM + dval(d) += 0; /* normalize */ +#endif + if (!dval(d)) { + *decpt = 1; + return rv_strdup("0", rve); + } + +#ifdef SET_INEXACT + try_quick = oldinexact = get_inexact(); + inexact = 1; +#endif +#ifdef Honor_FLT_ROUNDS + if ((rounding = Flt_Rounds) >= 2) { + if (*sign) + rounding = rounding == 2 ? 0 : 2; + else + if (rounding != 2) + rounding = 0; + } +#endif + + b = d2b(dval(d), &be, &bbits); +#ifdef Sudden_Underflow + i = (int)(word0(d) >> Exp_shift1 & (Exp_mask>>Exp_shift1)); +#else + if ((i = (int)(word0(d) >> Exp_shift1 & (Exp_mask>>Exp_shift1))) != 0) { +#endif + dval(d2) = dval(d); + word0(d2) &= Frac_mask1; + word0(d2) |= Exp_11; +#ifdef IBM + if (j = 11 - hi0bits(word0(d2) & Frac_mask)) + dval(d2) /= 1 << j; +#endif + + /* log(x) ~=~ log(1.5) + (x-1.5)/1.5 + * log10(x) = log(x) / log(10) + * ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10)) + * log10(d) = (i-Bias)*log(2)/log(10) + log10(d2) + * + * This suggests computing an approximation k to log10(d) by + * + * k = (i - Bias)*0.301029995663981 + * + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 ); + * + * We want k to be too large rather than too small. + * The error in the first-order Taylor series approximation + * is in our favor, so we just round up the constant enough + * to compensate for any error in the multiplication of + * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077, + * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14, + * adding 1e-13 to the constant term more than suffices. + * Hence we adjust the constant term to 0.1760912590558. + * (We could get a more accurate k by invoking log10, + * but this is probably not worthwhile.) + */ + + i -= Bias; +#ifdef IBM + i <<= 2; + i += j; +#endif +#ifndef Sudden_Underflow + denorm = 0; + } + else { + /* d is denormalized */ + + i = bbits + be + (Bias + (P-1) - 1); + x = i > 32 ? word0(d) << (64 - i) | word1(d) >> (i - 32) + : word1(d) << (32 - i); + dval(d2) = x; + word0(d2) -= 31*Exp_msk1; /* adjust exponent */ + i -= (Bias + (P-1) - 1) + 1; + denorm = 1; + } +#endif + ds = (dval(d2)-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981; + k = (int)ds; + if (ds < 0. && ds != k) + k--; /* want k = floor(ds) */ + k_check = 1; + if (k >= 0 && k <= Ten_pmax) { + if (dval(d) < tens[k]) + k--; + k_check = 0; + } + j = bbits - i - 1; + if (j >= 0) { + b2 = 0; + s2 = j; + } + else { + b2 = -j; + s2 = 0; + } + if (k >= 0) { + b5 = 0; + s5 = k; + s2 += k; + } + else { + b2 -= k; + b5 = -k; + s5 = 0; + } + if (mode < 0 || mode > 9) + mode = 0; + +#ifndef SET_INEXACT +#ifdef Check_FLT_ROUNDS + try_quick = Rounding == 1; +#else + try_quick = 1; +#endif +#endif /*SET_INEXACT*/ + + if (mode > 5) { + mode -= 4; + try_quick = 0; + } + leftright = 1; + ilim = ilim1 = -1; + switch (mode) { + case 0: + case 1: + i = 18; + ndigits = 0; + break; + case 2: + leftright = 0; + /* no break */ + case 4: + if (ndigits <= 0) + ndigits = 1; + ilim = ilim1 = i = ndigits; + break; + case 3: + leftright = 0; + /* no break */ + case 5: + i = ndigits + k + 1; + ilim = i; + ilim1 = i - 1; + if (i <= 0) + i = 1; + } + s = s0 = rv_alloc(i+1); + +#ifdef Honor_FLT_ROUNDS + if (mode > 1 && rounding != 1) + leftright = 0; +#endif + + if (ilim >= 0 && ilim <= Quick_max && try_quick) { + + /* Try to get by with floating-point arithmetic. */ + + i = 0; + dval(d2) = dval(d); + k0 = k; + ilim0 = ilim; + ieps = 2; /* conservative */ + if (k > 0) { + ds = tens[k&0xf]; + j = k >> 4; + if (j & Bletch) { + /* prevent overflows */ + j &= Bletch - 1; + dval(d) /= bigtens[n_bigtens-1]; + ieps++; + } + for (; j; j >>= 1, i++) + if (j & 1) { + ieps++; + ds *= bigtens[i]; + } + dval(d) /= ds; + } + else if ((j1 = -k) != 0) { + dval(d) *= tens[j1 & 0xf]; + for (j = j1 >> 4; j; j >>= 1, i++) + if (j & 1) { + ieps++; + dval(d) *= bigtens[i]; + } + } + if (k_check && dval(d) < 1. && ilim > 0) { + if (ilim1 <= 0) + goto fast_failed; + ilim = ilim1; + k--; + dval(d) *= 10.; + ieps++; + } + dval(eps) = ieps*dval(d) + 7.; + word0(eps) -= (P-1)*Exp_msk1; + if (ilim == 0) { + S = mhi = 0; + dval(d) -= 5.; + if (dval(d) > dval(eps)) + goto one_digit; + if (dval(d) < -dval(eps)) + goto no_digits; + goto fast_failed; + } +#ifndef No_leftright + if (leftright) { + /* Use Steele & White method of only + * generating digits needed. + */ + dval(eps) = 0.5/tens[ilim-1] - dval(eps); + for (i = 0;;) { + L = dval(d); + dval(d) -= L; + *s++ = '0' + (int)L; + if (dval(d) < dval(eps)) + goto ret1; + if (1. - dval(d) < dval(eps)) + goto bump_up; + if (++i >= ilim) + break; + dval(eps) *= 10.; + dval(d) *= 10.; + } + } + else { +#endif + /* Generate ilim digits, then fix them up. */ + dval(eps) *= tens[ilim-1]; + for (i = 1;; i++, dval(d) *= 10.) { + L = (Long)(dval(d)); + if (!(dval(d) -= L)) + ilim = i; + *s++ = '0' + (int)L; + if (i == ilim) { + if (dval(d) > 0.5 + dval(eps)) + goto bump_up; + else if (dval(d) < 0.5 - dval(eps)) { + while (*--s == '0') ; + s++; + goto ret1; + } + break; + } + } +#ifndef No_leftright + } +#endif +fast_failed: + s = s0; + dval(d) = dval(d2); + k = k0; + ilim = ilim0; + } + + /* Do we have a "small" integer? */ + + if (be >= 0 && k <= Int_max) { + /* Yes. */ + ds = tens[k]; + if (ndigits < 0 && ilim <= 0) { + S = mhi = 0; + if (ilim < 0 || dval(d) <= 5*ds) + goto no_digits; + goto one_digit; + } + for (i = 1;; i++, dval(d) *= 10.) { + L = (Long)(dval(d) / ds); + dval(d) -= L*ds; +#ifdef Check_FLT_ROUNDS + /* If FLT_ROUNDS == 2, L will usually be high by 1 */ + if (dval(d) < 0) { + L--; + dval(d) += ds; + } +#endif + *s++ = '0' + (int)L; + if (!dval(d)) { +#ifdef SET_INEXACT + inexact = 0; +#endif + break; + } + if (i == ilim) { +#ifdef Honor_FLT_ROUNDS + if (mode > 1) + switch (rounding) { + case 0: goto ret1; + case 2: goto bump_up; + } +#endif + dval(d) += dval(d); + if (dval(d) > ds || (dval(d) == ds && (L & 1))) { +bump_up: + while (*--s == '9') + if (s == s0) { + k++; + *s = '0'; + break; + } + ++*s++; + } + break; + } + } + goto ret1; + } + + m2 = b2; + m5 = b5; + if (leftright) { + i = +#ifndef Sudden_Underflow + denorm ? be + (Bias + (P-1) - 1 + 1) : +#endif +#ifdef IBM + 1 + 4*P - 3 - bbits + ((bbits + be - 1) & 3); +#else + 1 + P - bbits; +#endif + b2 += i; + s2 += i; + mhi = i2b(1); + } + if (m2 > 0 && s2 > 0) { + i = m2 < s2 ? m2 : s2; + b2 -= i; + m2 -= i; + s2 -= i; + } + if (b5 > 0) { + if (leftright) { + if (m5 > 0) { + mhi = pow5mult(mhi, m5); + b1 = mult(mhi, b); + Bfree(b); + b = b1; + } + if ((j = b5 - m5) != 0) + b = pow5mult(b, j); + } + else + b = pow5mult(b, b5); + } + S = i2b(1); + if (s5 > 0) + S = pow5mult(S, s5); + + /* Check for special case that d is a normalized power of 2. */ + + spec_case = 0; + if ((mode < 2 || leftright) +#ifdef Honor_FLT_ROUNDS + && rounding == 1 +#endif + ) { + if (!word1(d) && !(word0(d) & Bndry_mask) +#ifndef Sudden_Underflow + && word0(d) & (Exp_mask & ~Exp_msk1) +#endif + ) { + /* The special case */ + b2 += Log2P; + s2 += Log2P; + spec_case = 1; + } + } + + /* Arrange for convenient computation of quotients: + * shift left if necessary so divisor has 4 leading 0 bits. + * + * Perhaps we should just compute leading 28 bits of S once + * and for all and pass them and a shift to quorem, so it + * can do shifts and ors to compute the numerator for q. + */ +#ifdef Pack_32 + if ((i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0x1f) != 0) + i = 32 - i; +#else + if ((i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0xf) != 0) + i = 16 - i; +#endif + if (i > 4) { + i -= 4; + b2 += i; + m2 += i; + s2 += i; + } + else if (i < 4) { + i += 28; + b2 += i; + m2 += i; + s2 += i; + } + if (b2 > 0) + b = lshift(b, b2); + if (s2 > 0) + S = lshift(S, s2); + if (k_check) { + if (cmp(b,S) < 0) { + k--; + b = multadd(b, 10, 0); /* we botched the k estimate */ + if (leftright) + mhi = multadd(mhi, 10, 0); + ilim = ilim1; + } + } + if (ilim <= 0 && (mode == 3 || mode == 5)) { + if (ilim < 0 || cmp(b,S = multadd(S,5,0)) <= 0) { + /* no digits, fcvt style */ +no_digits: + k = -1 - ndigits; + goto ret; + } +one_digit: + *s++ = '1'; + k++; + goto ret; + } + if (leftright) { + if (m2 > 0) + mhi = lshift(mhi, m2); + + /* Compute mlo -- check for special case + * that d is a normalized power of 2. + */ + + mlo = mhi; + if (spec_case) { + mhi = Balloc(mhi->k); + Bcopy(mhi, mlo); + mhi = lshift(mhi, Log2P); + } + + for (i = 1;;i++) { + dig = quorem(b,S) + '0'; + /* Do we yet have the shortest decimal string + * that will round to d? + */ + j = cmp(b, mlo); + delta = diff(S, mhi); + j1 = delta->sign ? 1 : cmp(b, delta); + Bfree(delta); +#ifndef ROUND_BIASED + if (j1 == 0 && mode != 1 && !(word1(d) & 1) +#ifdef Honor_FLT_ROUNDS + && rounding >= 1 +#endif + ) { + if (dig == '9') + goto round_9_up; + if (j > 0) + dig++; +#ifdef SET_INEXACT + else if (!b->x[0] && b->wds <= 1) + inexact = 0; +#endif + *s++ = dig; + goto ret; + } +#endif + if (j < 0 || (j == 0 && mode != 1 +#ifndef ROUND_BIASED + && !(word1(d) & 1) +#endif + )) { + if (!b->x[0] && b->wds <= 1) { +#ifdef SET_INEXACT + inexact = 0; +#endif + goto accept_dig; + } +#ifdef Honor_FLT_ROUNDS + if (mode > 1) + switch (rounding) { + case 0: goto accept_dig; + case 2: goto keep_dig; + } +#endif /*Honor_FLT_ROUNDS*/ + if (j1 > 0) { + b = lshift(b, 1); + j1 = cmp(b, S); + if ((j1 > 0 || (j1 == 0 && (dig & 1))) && dig++ == '9') + goto round_9_up; + } +accept_dig: + *s++ = dig; + goto ret; + } + if (j1 > 0) { +#ifdef Honor_FLT_ROUNDS + if (!rounding) + goto accept_dig; +#endif + if (dig == '9') { /* possible if i == 1 */ +round_9_up: + *s++ = '9'; + goto roundoff; + } + *s++ = dig + 1; + goto ret; + } +#ifdef Honor_FLT_ROUNDS +keep_dig: +#endif + *s++ = dig; + if (i == ilim) + break; + b = multadd(b, 10, 0); + if (mlo == mhi) + mlo = mhi = multadd(mhi, 10, 0); + else { + mlo = multadd(mlo, 10, 0); + mhi = multadd(mhi, 10, 0); + } + } + } + else + for (i = 1;; i++) { + *s++ = dig = quorem(b,S) + '0'; + if (!b->x[0] && b->wds <= 1) { +#ifdef SET_INEXACT + inexact = 0; +#endif + goto ret; + } + if (i >= ilim) + break; + b = multadd(b, 10, 0); + } + + /* Round off last digit */ + +#ifdef Honor_FLT_ROUNDS + switch (rounding) { + case 0: goto trimzeros; + case 2: goto roundoff; + } +#endif + b = lshift(b, 1); + j = cmp(b, S); + if (j > 0 || (j == 0 && (dig & 1))) { + roundoff: + while (*--s == '9') + if (s == s0) { + k++; + *s++ = '1'; + goto ret; + } + ++*s++; + } + else { + while (*--s == '0') ; + s++; + } +ret: + Bfree(S); + if (mhi) { + if (mlo && mlo != mhi) + Bfree(mlo); + Bfree(mhi); + } +ret1: +#ifdef SET_INEXACT + if (inexact) { + if (!oldinexact) { + word0(d) = Exp_1 + (70 << Exp_shift); + word1(d) = 0; + dval(d) += 1.; + } + } + else if (!oldinexact) + clear_inexact(); +#endif + Bfree(b); + *s = 0; + *decpt = k + 1; + if (rve) + *rve = s; + return s0; +} + +void +ruby_each_words(const char *str, void (*func)(const char*, int, void*), void *arg) +{ + const char *end; + int len; + + if (!str) return; + for (; *str; str = end) { + while (ISSPACE(*str) || *str == ',') str++; + if (!*str) break; + end = str; + while (*end && !ISSPACE(*end) && *end != ',') end++; + len = end - str; + (*func)(str, len, arg); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/variable.c b/variable.c new file mode 100644 index 0000000..1c1838d --- /dev/null +++ b/variable.c @@ -0,0 +1,2027 @@ +/********************************************************************** + + variable.c - + + $Author: yugui $ + created at: Tue Apr 19 23:55:15 JST 1994 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/util.h" +#include "ruby/encoding.h" +#include "node.h" + +void rb_vm_change_state(void); +void rb_vm_inc_const_missing_count(void); + +st_table *rb_global_tbl; +st_table *rb_class_tbl; +static ID autoload, classpath, tmp_classpath; + +void +Init_var_tables(void) +{ + rb_global_tbl = st_init_numtable(); + rb_class_tbl = st_init_numtable(); + CONST_ID(autoload, "__autoload__"); + CONST_ID(classpath, "__classpath__"); + CONST_ID(tmp_classpath, "__tmp_classpath__"); +} + +struct fc_result { + ID name; + VALUE klass; + VALUE path; + VALUE track; + struct fc_result *prev; +}; + +static VALUE +fc_path(struct fc_result *fc, ID name) +{ + VALUE path, tmp; + + path = rb_str_dup(rb_id2str(name)); + while (fc) { + if (fc->track == rb_cObject) break; + if (RCLASS_IV_TBL(fc->track) && + st_lookup(RCLASS_IV_TBL(fc->track), classpath, &tmp)) { + tmp = rb_str_dup(tmp); + rb_str_cat2(tmp, "::"); + rb_str_append(tmp, path); + path = tmp; + break; + } + tmp = rb_str_dup(rb_id2str(fc->name)); + rb_str_cat2(tmp, "::"); + rb_str_append(tmp, path); + path = tmp; + fc = fc->prev; + } + OBJ_FREEZE(path); + return path; +} + +static int +fc_i(ID key, VALUE value, struct fc_result *res) +{ + if (!rb_is_const_id(key)) return ST_CONTINUE; + + if (value == res->klass) { + res->path = fc_path(res, key); + return ST_STOP; + } + switch (TYPE(value)) { + case T_MODULE: + case T_CLASS: + if (!RCLASS_IV_TBL(value)) return ST_CONTINUE; + else { + struct fc_result arg; + struct fc_result *list; + + list = res; + while (list) { + if (list->track == value) return ST_CONTINUE; + list = list->prev; + } + + arg.name = key; + arg.path = 0; + arg.klass = res->klass; + arg.track = value; + arg.prev = res; + st_foreach(RCLASS_IV_TBL(value), fc_i, (st_data_t)&arg); + if (arg.path) { + res->path = arg.path; + return ST_STOP; + } + } + break; + + default: + break; + } + return ST_CONTINUE; +} + +static VALUE +find_class_path(VALUE klass) +{ + struct fc_result arg; + + arg.name = 0; + arg.path = 0; + arg.klass = klass; + arg.track = rb_cObject; + arg.prev = 0; + if (RCLASS_IV_TBL(rb_cObject)) { + st_foreach_safe(RCLASS_IV_TBL(rb_cObject), fc_i, (st_data_t)&arg); + } + if (arg.path == 0) { + st_foreach_safe(rb_class_tbl, fc_i, (st_data_t)&arg); + } + if (arg.path) { + if (!RCLASS_IV_TBL(klass)) { + RCLASS_IV_TBL(klass) = st_init_numtable(); + } + st_insert(RCLASS_IV_TBL(klass), classpath, arg.path); + st_delete(RCLASS_IV_TBL(klass), &tmp_classpath, 0); + return arg.path; + } + return Qnil; +} + +static VALUE +classname(VALUE klass) +{ + VALUE path = Qnil; + + if (!klass) klass = rb_cObject; + if (RCLASS_IV_TBL(klass)) { + if (!st_lookup(RCLASS_IV_TBL(klass), classpath, &path)) { + ID classid; + st_data_t n; + + CONST_ID(classid, "__classid__"); + + if (!st_lookup(RCLASS_IV_TBL(klass), classid, &path)) { + return find_class_path(klass); + } + path = rb_str_dup(rb_id2str(SYM2ID(path))); + OBJ_FREEZE(path); + st_insert(RCLASS_IV_TBL(klass), classpath, path); + n = classid; + st_delete(RCLASS_IV_TBL(klass), &n, 0); + } + if (TYPE(path) != T_STRING) { + rb_bug("class path is not set properly"); + } + return path; + } + return find_class_path(klass); +} + +/* + * call-seq: + * mod.name => string + * + * Returns the name of the module mod. Returns nil for anonymous modules. + */ + +VALUE +rb_mod_name(VALUE mod) +{ + VALUE path = classname(mod); + + if (!NIL_P(path)) return rb_str_dup(path); + return path; +} + +VALUE +rb_class_path(VALUE klass) +{ + VALUE path = classname(klass); + + if (!NIL_P(path)) return path; + if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass), + tmp_classpath, &path)) { + return path; + } + else { + const char *s = "Class"; + + if (TYPE(klass) == T_MODULE) { + if (rb_obj_class(klass) == rb_cModule) { + s = "Module"; + } + else { + s = rb_class2name(RBASIC(klass)->klass); + } + } + path = rb_sprintf("#<%s:%p>", s, (void*)klass); + OBJ_FREEZE(path); + rb_ivar_set(klass, tmp_classpath, path); + + return path; + } +} + +void +rb_set_class_path_string(VALUE klass, VALUE under, VALUE name) +{ + VALUE str; + + if (under == rb_cObject) { + str = rb_str_new_frozen(name); + } + else { + str = rb_str_dup(rb_class_path(under)); + rb_str_cat2(str, "::"); + rb_str_append(str, name); + OBJ_FREEZE(str); + } + rb_ivar_set(klass, classpath, str); +} + +void +rb_set_class_path(VALUE klass, VALUE under, const char *name) +{ + VALUE str; + + if (under == rb_cObject) { + str = rb_str_new2(name); + } + else { + str = rb_str_dup(rb_class_path(under)); + rb_str_cat2(str, "::"); + rb_str_cat2(str, name); + } + OBJ_FREEZE(str); + rb_ivar_set(klass, classpath, str); +} + +VALUE +rb_path_to_class(VALUE pathname) +{ + rb_encoding *enc = rb_enc_get(pathname); + const char *pbeg, *p, *path = RSTRING_PTR(pathname); + ID id; + VALUE c = rb_cObject; + + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eArgError, "invalid class path encoding (non ASCII)"); + } + pbeg = p = path; + if (path[0] == '#') { + rb_raise(rb_eArgError, "can't retrieve anonymous class %s", path); + } + while (*p) { + while (*p && *p != ':') p++; + id = rb_intern3(pbeg, p-pbeg, enc); + if (p[0] == ':') { + if (p[1] != ':') goto undefined_class; + p += 2; + pbeg = p; + } + if (!rb_const_defined(c, id)) { + undefined_class: + rb_raise(rb_eArgError, "undefined class/module %.*s", (int)(p-path), path); + } + c = rb_const_get_at(c, id); + switch (TYPE(c)) { + case T_MODULE: + case T_CLASS: + break; + default: + rb_raise(rb_eTypeError, "%s does not refer class/module", path); + } + } + + return c; +} + +VALUE +rb_path2class(const char *path) +{ + return rb_path_to_class(rb_usascii_str_new_cstr(path)); +} + +void +rb_name_class(VALUE klass, ID id) +{ + rb_iv_set(klass, "__classid__", ID2SYM(id)); +} + +VALUE +rb_class_name(VALUE klass) +{ + return rb_class_path(rb_class_real(klass)); +} + +const char * +rb_class2name(VALUE klass) +{ + return RSTRING_PTR(rb_class_name(klass)); +} + +const char * +rb_obj_classname(VALUE obj) +{ + return rb_class2name(CLASS_OF(obj)); +} + +#define global_variable rb_global_variable + +#define gvar_getter_t rb_gvar_getter_t +#define gvar_setter_t rb_gvar_setter_t +#define gvar_marker_t rb_gvar_marker_t + +struct trace_var { + int removed; + void (*func)(VALUE arg, VALUE val); + VALUE data; + struct trace_var *next; +}; + +struct global_variable { + int counter; + void *data; + gvar_getter_t *getter; + gvar_setter_t *setter; + gvar_marker_t *marker; + int block_trace; + struct trace_var *trace; +}; + +struct global_entry { + struct global_variable *var; + ID id; +}; + +#define undef_getter rb_gvar_undef_getter +#define undef_setter rb_gvar_undef_setter +#define undef_marker rb_gvar_undef_marker + +#define val_getter rb_gvar_val_getter +#define val_setter rb_gvar_val_setter +#define val_marker rb_gvar_val_marker + +#define var_getter rb_gvar_var_getter +#define var_setter rb_gvar_var_setter +#define var_marker rb_gvar_var_marker + +#define readonly_setter rb_gvar_readonly_setter + +struct global_entry* +rb_global_entry(ID id) +{ + struct global_entry *entry; + st_data_t data; + + if (!st_lookup(rb_global_tbl, id, &data)) { + struct global_variable *var; + entry = ALLOC(struct global_entry); + var = ALLOC(struct global_variable); + entry->id = id; + entry->var = var; + var->counter = 1; + var->data = 0; + var->getter = undef_getter; + var->setter = undef_setter; + var->marker = undef_marker; + + var->block_trace = 0; + var->trace = 0; + st_add_direct(rb_global_tbl, id, (st_data_t)entry); + } + else { + entry = (struct global_entry *)data; + } + return entry; +} + +VALUE +undef_getter(ID id, void *data, struct global_variable *var) +{ + rb_warning("global variable `%s' not initialized", rb_id2name(id)); + + return Qnil; +} + +void +undef_setter(VALUE val, ID id, void *data, struct global_variable *var) +{ + var->getter = val_getter; + var->setter = val_setter; + var->marker = val_marker; + + var->data = (void*)val; +} + +void +undef_marker(VALUE *var) +{ +} + +VALUE +val_getter(ID id, void *data, struct global_variable *var) +{ + return (VALUE)data; +} + +void +val_setter(VALUE val, ID id, void *data, struct global_variable *var) +{ + var->data = (void*)val; +} + +void +val_marker(VALUE *var) +{ + VALUE data = (VALUE)var; + if (data) rb_gc_mark_maybe(data); +} + +VALUE +var_getter(ID id, void *data, struct global_variable *gvar) +{ + VALUE *var = data; + if (!var) return Qnil; + return *var; +} + +void +var_setter(VALUE val, ID id, void *data, struct global_variable *gvar) +{ + *(VALUE *)data = val; +} + +void +var_marker(VALUE *var) +{ + if (var) rb_gc_mark_maybe(*var); +} + +void +readonly_setter(VALUE val, ID id, void *data, struct global_variable *gvar) +{ + rb_name_error(id, "%s is a read-only variable", rb_id2name(id)); +} + +static int +mark_global_entry(ID key, struct global_entry *entry) +{ + struct trace_var *trace; + struct global_variable *var = entry->var; + + (*var->marker)(var->data); + trace = var->trace; + while (trace) { + if (trace->data) rb_gc_mark_maybe(trace->data); + trace = trace->next; + } + return ST_CONTINUE; +} + +void +rb_gc_mark_global_tbl(void) +{ + if (rb_global_tbl) + st_foreach_safe(rb_global_tbl, mark_global_entry, 0); +} + +static ID +global_id(const char *name) +{ + ID id; + + if (name[0] == '$') id = rb_intern(name); + else { + char *buf = ALLOCA_N(char, strlen(name)+2); + buf[0] = '$'; + strcpy(buf+1, name); + id = rb_intern(buf); + } + return id; +} + +void +rb_define_hooked_variable( + const char *name, + VALUE *var, + VALUE (*getter)(ANYARGS), + void (*setter)(ANYARGS)) +{ + volatile VALUE tmp = var ? *var : Qnil; + ID id = global_id(name); + struct global_variable *gvar = rb_global_entry(id)->var; + + gvar->data = (void*)var; + gvar->getter = getter?(gvar_getter_t *)getter:var_getter; + gvar->setter = setter?(gvar_setter_t *)setter:var_setter; + gvar->marker = var_marker; + + RB_GC_GUARD(tmp); +} + +void +rb_define_variable(const char *name, VALUE *var) +{ + rb_define_hooked_variable(name, var, 0, 0); +} + +void +rb_define_readonly_variable(const char *name, VALUE *var) +{ + rb_define_hooked_variable(name, var, 0, readonly_setter); +} + +void +rb_define_virtual_variable( + const char *name, + VALUE (*getter)(ANYARGS), + void (*setter)(ANYARGS)) +{ + if (!getter) getter = val_getter; + if (!setter) setter = readonly_setter; + rb_define_hooked_variable(name, 0, getter, setter); +} + +static void +rb_trace_eval(VALUE cmd, VALUE val) +{ + rb_eval_cmd(cmd, rb_ary_new3(1, val), 0); +} + +/* + * call-seq: + * trace_var(symbol, cmd ) => nil + * trace_var(symbol) {|val| block } => nil + * + * Controls tracing of assignments to global variables. The parameter + * +symbol_ identifies the variable (as either a string name or a + * symbol identifier). _cmd_ (which may be a string or a + * +Proc+ object) or block is executed whenever the variable + * is assigned. The block or +Proc+ object receives the + * variable's new value as a parameter. Also see + * Kernel::untrace_var. + * + * trace_var :$_, proc {|v| puts "$_ is now '#{v}'" } + * $_ = "hello" + * $_ = ' there' + * + * produces: + * + * $_ is now 'hello' + * $_ is now ' there' + */ + +VALUE +rb_f_trace_var(int argc, VALUE *argv) +{ + VALUE var, cmd; + struct global_entry *entry; + struct trace_var *trace; + + rb_secure(4); + if (rb_scan_args(argc, argv, "11", &var, &cmd) == 1) { + cmd = rb_block_proc(); + } + if (NIL_P(cmd)) { + return rb_f_untrace_var(argc, argv); + } + entry = rb_global_entry(rb_to_id(var)); + if (OBJ_TAINTED(cmd)) { + rb_raise(rb_eSecurityError, "Insecure: tainted variable trace"); + } + trace = ALLOC(struct trace_var); + trace->next = entry->var->trace; + trace->func = rb_trace_eval; + trace->data = cmd; + trace->removed = 0; + entry->var->trace = trace; + + return Qnil; +} + +static void +remove_trace(struct global_variable *var) +{ + struct trace_var *trace = var->trace; + struct trace_var t; + struct trace_var *next; + + t.next = trace; + trace = &t; + while (trace->next) { + next = trace->next; + if (next->removed) { + trace->next = next->next; + xfree(next); + } + else { + trace = next; + } + } + var->trace = t.next; +} + +/* + * call-seq: + * untrace_var(symbol [, cmd] ) => array or nil + * + * Removes tracing for the specified command on the given global + * variable and returns +nil+. If no command is specified, + * removes all tracing for that variable and returns an array + * containing the commands actually removed. + */ + +VALUE +rb_f_untrace_var(int argc, VALUE *argv) +{ + VALUE var, cmd; + ID id; + struct global_entry *entry; + struct trace_var *trace; + st_data_t data; + + rb_secure(4); + rb_scan_args(argc, argv, "11", &var, &cmd); + id = rb_to_id(var); + if (!st_lookup(rb_global_tbl, id, &data)) { + rb_name_error(id, "undefined global variable %s", rb_id2name(id)); + } + + trace = (entry = (struct global_entry *)data)->var->trace; + if (NIL_P(cmd)) { + VALUE ary = rb_ary_new(); + + while (trace) { + struct trace_var *next = trace->next; + rb_ary_push(ary, (VALUE)trace->data); + trace->removed = 1; + trace = next; + } + + if (!entry->var->block_trace) remove_trace(entry->var); + return ary; + } + else { + while (trace) { + if (trace->data == cmd) { + trace->removed = 1; + if (!entry->var->block_trace) remove_trace(entry->var); + return rb_ary_new3(1, cmd); + } + trace = trace->next; + } + } + return Qnil; +} + +VALUE +rb_gvar_get(struct global_entry *entry) +{ + struct global_variable *var = entry->var; + return (*var->getter)(entry->id, var->data, var); +} + +struct trace_data { + struct trace_var *trace; + VALUE val; +}; + +static VALUE +trace_ev(struct trace_data *data) +{ + struct trace_var *trace = data->trace; + + while (trace) { + (*trace->func)(trace->data, data->val); + trace = trace->next; + } + return Qnil; /* not reached */ +} + +static VALUE +trace_en(struct global_variable *var) +{ + var->block_trace = 0; + remove_trace(var); + return Qnil; /* not reached */ +} + +VALUE +rb_gvar_set(struct global_entry *entry, VALUE val) +{ + struct trace_data trace; + struct global_variable *var = entry->var; + + if (rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't change global variable value"); + (*var->setter)(val, entry->id, var->data, var); + + if (var->trace && !var->block_trace) { + var->block_trace = 1; + trace.trace = var->trace; + trace.val = val; + rb_ensure(trace_ev, (VALUE)&trace, trace_en, (VALUE)var); + } + return val; +} + +VALUE +rb_gv_set(const char *name, VALUE val) +{ + struct global_entry *entry; + + entry = rb_global_entry(global_id(name)); + return rb_gvar_set(entry, val); +} + +VALUE +rb_gv_get(const char *name) +{ + struct global_entry *entry; + + entry = rb_global_entry(global_id(name)); + return rb_gvar_get(entry); +} + +VALUE +rb_gvar_defined(struct global_entry *entry) +{ + if (entry->var->getter == undef_getter) return Qfalse; + return Qtrue; +} + +static int +gvar_i(ID key, struct global_entry *entry, VALUE ary) +{ + rb_ary_push(ary, ID2SYM(key)); + return ST_CONTINUE; +} + +/* + * call-seq: + * global_variables => array + * + * Returns an array of the names of global variables. + * + * global_variables.grep /std/ #=> [:$stdin, :$stdout, :$stderr] + */ + +VALUE +rb_f_global_variables(void) +{ + VALUE ary = rb_ary_new(); + char buf[4]; + const char *s = "123456789"; + + st_foreach_safe(rb_global_tbl, gvar_i, ary); + while (*s) { + sprintf(buf, "$%c", *s++); + rb_ary_push(ary, ID2SYM(rb_intern(buf))); + } + return ary; +} + +void +rb_alias_variable(ID name1, ID name2) +{ + struct global_entry *entry1, *entry2; + st_data_t data1; + + if (rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't alias global variable"); + + entry2 = rb_global_entry(name2); + if (!st_lookup(rb_global_tbl, name1, &data1)) { + entry1 = ALLOC(struct global_entry); + entry1->id = name1; + st_add_direct(rb_global_tbl, name1, (st_data_t)entry1); + } + else if ((entry1 = (struct global_entry *)data1)->var != entry2->var) { + struct global_variable *var = entry1->var; + if (var->block_trace) { + rb_raise(rb_eRuntimeError, "can't alias in tracer"); + } + var->counter--; + if (var->counter == 0) { + struct trace_var *trace = var->trace; + while (trace) { + struct trace_var *next = trace->next; + xfree(trace); + trace = next; + } + xfree(var); + } + } + else { + return; + } + entry2->var->counter++; + entry1->var = entry2->var; +} + +static int special_generic_ivar = 0; +static st_table *generic_iv_tbl; + +st_table* +rb_generic_ivar_table(VALUE obj) +{ + st_data_t tbl; + + if (!FL_TEST(obj, FL_EXIVAR)) return 0; + if (!generic_iv_tbl) return 0; + if (!st_lookup(generic_iv_tbl, obj, &tbl)) return 0; + return (st_table *)tbl; +} + +static VALUE +generic_ivar_get(VALUE obj, ID id, int warn) +{ + st_data_t tbl; + VALUE val; + + if (generic_iv_tbl) { + if (st_lookup(generic_iv_tbl, obj, &tbl)) { + if (st_lookup((st_table *)tbl, id, &val)) { + return val; + } + } + } + if (warn) { + rb_warning("instance variable %s not initialized", rb_id2name(id)); + } + return Qnil; +} + +static void +generic_ivar_set(VALUE obj, ID id, VALUE val) +{ + st_table *tbl; + st_data_t data; + + if (rb_special_const_p(obj)) { + if (rb_obj_frozen_p(obj)) rb_error_frozen("object"); + special_generic_ivar = 1; + } + if (!generic_iv_tbl) { + generic_iv_tbl = st_init_numtable(); + } + if (!st_lookup(generic_iv_tbl, obj, &data)) { + FL_SET(obj, FL_EXIVAR); + tbl = st_init_numtable(); + st_add_direct(generic_iv_tbl, obj, (st_data_t)tbl); + st_add_direct(tbl, id, val); + return; + } + st_insert((st_table *)data, id, val); +} + +static VALUE +generic_ivar_defined(VALUE obj, ID id) +{ + st_table *tbl; + st_data_t data; + VALUE val; + + if (!generic_iv_tbl) return Qfalse; + if (!st_lookup(generic_iv_tbl, obj, &data)) return Qfalse; + tbl = (st_table *)data; + if (st_lookup(tbl, id, &val)) { + return Qtrue; + } + return Qfalse; +} + +static int +generic_ivar_remove(VALUE obj, ID id, VALUE *valp) +{ + st_table *tbl; + st_data_t data; + int status; + + if (!generic_iv_tbl) return 0; + if (!st_lookup(generic_iv_tbl, obj, &data)) return 0; + tbl = (st_table *)data; + status = st_delete(tbl, &id, valp); + if (tbl->num_entries == 0) { + st_delete(generic_iv_tbl, &obj, &data); + st_free_table((st_table *)data); + } + return status; +} + +void +rb_mark_generic_ivar(VALUE obj) +{ + st_data_t tbl; + + if (!generic_iv_tbl) return; + if (st_lookup(generic_iv_tbl, obj, &tbl)) { + rb_mark_tbl((st_table *)tbl); + } +} + +static int +givar_mark_i(ID key, VALUE value) +{ + rb_gc_mark(value); + return ST_CONTINUE; +} + +static int +givar_i(VALUE obj, st_table *tbl) +{ + if (rb_special_const_p(obj)) { + st_foreach_safe(tbl, givar_mark_i, 0); + } + return ST_CONTINUE; +} + +void +rb_mark_generic_ivar_tbl(void) +{ + if (!generic_iv_tbl) return; + if (special_generic_ivar == 0) return; + st_foreach_safe(generic_iv_tbl, givar_i, 0); +} + +void +rb_free_generic_ivar(VALUE obj) +{ + st_data_t tbl; + + if (!generic_iv_tbl) return; + if (st_delete(generic_iv_tbl, &obj, &tbl)) + st_free_table((st_table *)tbl); +} + +void +rb_copy_generic_ivar(VALUE clone, VALUE obj) +{ + st_data_t data; + + if (!generic_iv_tbl) return; + if (!FL_TEST(obj, FL_EXIVAR)) { +clear: + if (FL_TEST(clone, FL_EXIVAR)) { + rb_free_generic_ivar(clone); + FL_UNSET(clone, FL_EXIVAR); + } + return; + } + if (st_lookup(generic_iv_tbl, obj, &data)) { + st_table *tbl = (st_table *)data; + + if (tbl->num_entries == 0) + goto clear; + + if (st_lookup(generic_iv_tbl, clone, &data)) { + st_free_table((st_table *)data); + st_insert(generic_iv_tbl, clone, (st_data_t)st_copy(tbl)); + } + else { + st_add_direct(generic_iv_tbl, clone, (st_data_t)st_copy(tbl)); + FL_SET(clone, FL_EXIVAR); + } + } +} + +static VALUE +ivar_get(VALUE obj, ID id, int warn) +{ + VALUE val, *ptr; + struct st_table *iv_index_tbl; + long len; + st_data_t index; + + switch (TYPE(obj)) { + case T_OBJECT: + len = ROBJECT_NUMIV(obj); + ptr = ROBJECT_IVPTR(obj); + iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj); + if (!iv_index_tbl) break; + if (!st_lookup(iv_index_tbl, id, &index)) break; + if (len <= index) break; + val = ptr[index]; + if (val != Qundef) + return val; + break; + case T_CLASS: + case T_MODULE: + if (RCLASS_IV_TBL(obj) && st_lookup(RCLASS_IV_TBL(obj), id, &val)) + return val; + break; + default: + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) + return generic_ivar_get(obj, id, warn); + break; + } + if (warn) { + rb_warning("instance variable %s not initialized", rb_id2name(id)); + } + return Qnil; +} + +VALUE +rb_ivar_get(VALUE obj, ID id) +{ + return ivar_get(obj, id, Qtrue); +} + +VALUE +rb_attr_get(VALUE obj, ID id) +{ + return ivar_get(obj, id, Qfalse); +} + +VALUE +rb_ivar_set(VALUE obj, ID id, VALUE val) +{ + struct st_table *iv_index_tbl; + st_data_t index; + long i, len; + int ivar_extended; + + if (!OBJ_UNTRUSTED(obj) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify instance variable"); + if (OBJ_FROZEN(obj)) rb_error_frozen("object"); + switch (TYPE(obj)) { + case T_OBJECT: + iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj); + if (!iv_index_tbl) { + VALUE klass = rb_obj_class(obj); + iv_index_tbl = RCLASS_IV_INDEX_TBL(klass); + if (!iv_index_tbl) { + iv_index_tbl = RCLASS_IV_INDEX_TBL(klass) = st_init_numtable(); + } + } + ivar_extended = 0; + if (!st_lookup(iv_index_tbl, id, &index)) { + index = iv_index_tbl->num_entries; + st_add_direct(iv_index_tbl, id, index); + ivar_extended = 1; + } + len = ROBJECT_NUMIV(obj); + if (len <= index) { + VALUE *ptr = ROBJECT_IVPTR(obj); + if (index < ROBJECT_EMBED_LEN_MAX) { + RBASIC(obj)->flags |= ROBJECT_EMBED; + ptr = ROBJECT(obj)->as.ary; + for (i = 0; i < ROBJECT_EMBED_LEN_MAX; i++) { + ptr[i] = Qundef; + } + } + else { + VALUE *newptr; + long newsize = (index+1) + (index+1)/4; /* (index+1)*1.25 */ + if (!ivar_extended && + iv_index_tbl->num_entries < newsize) { + newsize = iv_index_tbl->num_entries; + } + if (RBASIC(obj)->flags & ROBJECT_EMBED) { + newptr = ALLOC_N(VALUE, newsize); + MEMCPY(newptr, ptr, VALUE, len); + RBASIC(obj)->flags &= ~ROBJECT_EMBED; + ROBJECT(obj)->as.heap.ivptr = newptr; + } + else { + REALLOC_N(ROBJECT(obj)->as.heap.ivptr, VALUE, newsize); + newptr = ROBJECT(obj)->as.heap.ivptr; + } + for (; len < newsize; len++) + newptr[len] = Qundef; + ROBJECT(obj)->as.heap.numiv = newsize; + ROBJECT(obj)->as.heap.iv_index_tbl = iv_index_tbl; + } + } + ROBJECT_IVPTR(obj)[index] = val; + break; + case T_CLASS: + case T_MODULE: + if (!RCLASS_IV_TBL(obj)) RCLASS_IV_TBL(obj) = st_init_numtable(); + st_insert(RCLASS_IV_TBL(obj), id, val); + break; + default: + generic_ivar_set(obj, id, val); + break; + } + return val; +} + +VALUE +rb_ivar_defined(VALUE obj, ID id) +{ + VALUE val; + struct st_table *iv_index_tbl; + st_data_t index; + switch (TYPE(obj)) { + case T_OBJECT: + iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj); + if (!iv_index_tbl) break; + if (!st_lookup(iv_index_tbl, id, &index)) break; + if (ROBJECT_NUMIV(obj) <= index) break; + val = ROBJECT_IVPTR(obj)[index]; + if (val != Qundef) + return Qtrue; + break; + case T_CLASS: + case T_MODULE: + if (RCLASS_IV_TBL(obj) && st_lookup(RCLASS_IV_TBL(obj), id, 0)) + return Qtrue; + break; + default: + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) + return generic_ivar_defined(obj, id); + break; + } + return Qfalse; +} + +struct obj_ivar_tag { + VALUE obj; + int (*func)(ID key, VALUE val, st_data_t arg); + st_data_t arg; +}; + +static int +obj_ivar_i(ID key, VALUE index, struct obj_ivar_tag *data) +{ + if (index < ROBJECT_NUMIV(data->obj)) { + VALUE val = ROBJECT_IVPTR(data->obj)[index]; + if (val != Qundef) { + return (data->func)(key, val, data->arg); + } + } + return ST_CONTINUE; +} + +static void +obj_ivar_each(VALUE obj, int (*func)(ANYARGS), st_data_t arg) +{ + st_table *tbl; + struct obj_ivar_tag data; + + tbl = ROBJECT_IV_INDEX_TBL(obj); + if (!tbl) + return; + + data.obj = obj; + data.func = (int (*)(ID key, VALUE val, st_data_t arg))func; + data.arg = arg; + + st_foreach_safe(tbl, obj_ivar_i, (st_data_t)&data); +} + +void rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg) +{ + switch (TYPE(obj)) { + case T_OBJECT: + obj_ivar_each(obj, func, arg); + break; + case T_CLASS: + case T_MODULE: + if (RCLASS_IV_TBL(obj)) { + st_foreach_safe(RCLASS_IV_TBL(obj), func, arg); + } + break; + default: + if (!generic_iv_tbl) break; + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) { + st_data_t tbl; + + if (st_lookup(generic_iv_tbl, obj, &tbl)) { + st_foreach_safe((st_table *)tbl, func, arg); + } + } + break; + } +} + +static int +ivar_i(ID key, VALUE val, VALUE ary) +{ + if (rb_is_instance_id(key)) { + rb_ary_push(ary, ID2SYM(key)); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * obj.instance_variables => array + * + * Returns an array of instance variable names for the receiver. Note + * that simply defining an accessor does not create the corresponding + * instance variable. + * + * class Fred + * attr_accessor :a1 + * def initialize + * @iv = 3 + * end + * end + * Fred.new.instance_variables #=> [:@iv] + */ + +VALUE +rb_obj_instance_variables(VALUE obj) +{ + VALUE ary; + + ary = rb_ary_new(); + rb_ivar_foreach(obj, ivar_i, ary); + return ary; +} + +/* + * call-seq: + * obj.remove_instance_variable(symbol) => obj + * + * Removes the named instance variable from obj, returning that + * variable's value. + * + * class Dummy + * attr_reader :var + * def initialize + * @var = 99 + * end + * def remove + * remove_instance_variable(:@var) + * end + * end + * d = Dummy.new + * d.var #=> 99 + * d.remove #=> 99 + * d.var #=> nil + */ + +VALUE +rb_obj_remove_instance_variable(VALUE obj, VALUE name) +{ + VALUE val = Qnil; + const ID id = rb_to_id(name); + st_data_t n, v; + struct st_table *iv_index_tbl; + st_data_t index; + + if (!OBJ_UNTRUSTED(obj) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify instance variable"); + if (OBJ_FROZEN(obj)) rb_error_frozen("object"); + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + + switch (TYPE(obj)) { + case T_OBJECT: + iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj); + if (!iv_index_tbl) break; + if (!st_lookup(iv_index_tbl, id, &index)) break; + if (ROBJECT_NUMIV(obj) <= index) break; + val = ROBJECT_IVPTR(obj)[index]; + if (val != Qundef) { + ROBJECT_IVPTR(obj)[index] = Qundef; + return val; + } + break; + case T_CLASS: + case T_MODULE: + n = id; + if (RCLASS_IV_TBL(obj) && st_delete(RCLASS_IV_TBL(obj), &n, &v)) { + return (VALUE)v; + } + break; + default: + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) { + if (generic_ivar_remove(obj, id, &val)) { + return val; + } + } + break; + } + rb_name_error(id, "instance variable %s not defined", rb_id2name(id)); + return Qnil; /* not reached */ +} + +NORETURN(static void uninitialized_constant(VALUE, ID)); +static void +uninitialized_constant(VALUE klass, ID id) +{ + if (klass && klass != rb_cObject) + rb_name_error(id, "uninitialized constant %s::%s", + rb_class2name(klass), + rb_id2name(id)); + else { + rb_name_error(id, "uninitialized constant %s", rb_id2name(id)); + } +} + +static VALUE +const_missing(VALUE klass, ID id) +{ + return rb_funcall(klass, rb_intern("const_missing"), 1, ID2SYM(id)); +} + + +/* + * call-seq: + * mod.const_missing(sym) => obj + * + * Invoked when a reference is made to an undefined constant in + * mod. It is passed a symbol for the undefined constant, and + * returns a value to be used for that constant. The + * following code is a (very bad) example: if reference is made to + * an undefined constant, it attempts to load a file whose name is + * the lowercase version of the constant (thus class Fred is + * assumed to be in file fred.rb). If found, it returns the + * value of the loaded class. It therefore implements a perverse + * kind of autoload facility. + * + * def Object.const_missing(name) + * @looked_for ||= {} + * str_name = name.to_s + * raise "Class not found: #{name}" if @looked_for[str_name] + * @looked_for[str_name] = 1 + * file = str_name.downcase + * require file + * klass = const_get(name) + * return klass if klass + * raise "Class not found: #{name}" + * end + * + */ + +VALUE +rb_mod_const_missing(VALUE klass, VALUE name) +{ + rb_frame_pop(); /* pop frame for "const_missing" */ + uninitialized_constant(klass, rb_to_id(name)); + return Qnil; /* not reached */ +} + +static struct st_table * +check_autoload_table(VALUE av) +{ + Check_Type(av, T_DATA); + if (RDATA(av)->dmark != (RUBY_DATA_FUNC)rb_mark_tbl || + RDATA(av)->dfree != (RUBY_DATA_FUNC)st_free_table) { + VALUE desc = rb_inspect(av); + rb_raise(rb_eTypeError, "wrong autoload table: %s", RSTRING_PTR(desc)); + } + return (struct st_table *)DATA_PTR(av); +} + +void +rb_autoload(VALUE mod, ID id, const char *file) +{ + VALUE av, fn; + struct st_table *tbl; + + if (!rb_is_const_id(id)) { + rb_raise(rb_eNameError, "autoload must be constant name: %s", rb_id2name(id)); + } + if (!file || !*file) { + rb_raise(rb_eArgError, "empty file name"); + } + + if ((tbl = RCLASS_IV_TBL(mod)) && st_lookup(tbl, id, &av) && av != Qundef) + return; + + rb_const_set(mod, id, Qundef); + tbl = RCLASS_IV_TBL(mod); + if (st_lookup(tbl, autoload, &av)) { + tbl = check_autoload_table(av); + } + else { + av = Data_Wrap_Struct(0, rb_mark_tbl, st_free_table, 0); + st_add_direct(tbl, autoload, av); + DATA_PTR(av) = tbl = st_init_numtable(); + } + fn = rb_str_new2(file); + FL_UNSET(fn, FL_TAINT); + OBJ_FREEZE(fn); + st_insert(tbl, id, (st_data_t)rb_node_newnode(NODE_MEMO, fn, rb_safe_level(), 0)); +} + +static NODE* +autoload_delete(VALUE mod, ID id) +{ + st_data_t val, load = 0, n = id; + + st_delete(RCLASS_IV_TBL(mod), &n, 0); + if (st_lookup(RCLASS_IV_TBL(mod), autoload, &val)) { + struct st_table *tbl = check_autoload_table((VALUE)val); + + st_delete(tbl, &n, &load); + + if (tbl->num_entries == 0) { + n = autoload; + st_delete(RCLASS_IV_TBL(mod), &n, &val); + } + } + + return (NODE *)load; +} + +static VALUE +autoload_provided(VALUE arg) +{ + const char **p = (const char **)arg; + return rb_feature_provided(*p, p); +} + +static VALUE +reset_safe(VALUE safe) +{ + rb_set_safe_level_force((int)safe); + return safe; +} + +static NODE * +autoload_node(VALUE mod, ID id, const char **loadingpath) +{ + VALUE file; + struct st_table *tbl; + st_data_t val; + NODE *load; + const char *loading; + int safe; + + if (!st_lookup(RCLASS_IV_TBL(mod), autoload, &val) || + !(tbl = check_autoload_table((VALUE)val)) || !st_lookup(tbl, (st_data_t)id, &val)) { + return 0; + } + load = (NODE *)val; + file = load->nd_lit; + Check_Type(file, T_STRING); + if (!RSTRING_PTR(file) || !*RSTRING_PTR(file)) { + rb_raise(rb_eArgError, "empty file name"); + } + loading = RSTRING_PTR(file); + safe = rb_safe_level(); + rb_set_safe_level_force(0); + if (!rb_ensure(autoload_provided, (VALUE)&loading, reset_safe, (VALUE)safe)) { + return load; + } + if (loadingpath && loading) { + *loadingpath = loading; + return load; + } + return 0; +} + +static int +autoload_node_id(VALUE mod, ID id) +{ + struct st_table *tbl = RCLASS_IV_TBL(mod); + st_data_t val; + + if (!tbl || !st_lookup(tbl, id, &val) || val != Qundef) { + return 0; + } + return 1; +} + +VALUE +rb_autoload_load(VALUE mod, ID id) +{ + VALUE file; + NODE *load; + const char *loading = 0, *src; + + if (!autoload_node_id(mod, id)) return Qfalse; + load = autoload_node(mod, id, &loading); + if (!load) return Qfalse; + src = rb_sourcefile(); + if (src && loading && strcmp(src, loading) == 0) return Qfalse; + file = load->nd_lit; + return rb_require_safe(file, load->nd_nth); +} + +VALUE +rb_autoload_p(VALUE mod, ID id) +{ + VALUE file; + NODE *load; + const char *loading = 0; + + if (!autoload_node_id(mod, id)) return Qnil; + load = autoload_node(mod, id, &loading); + if (!load) return Qnil; + return load && (file = load->nd_lit) ? file : Qnil; +} + +static VALUE +rb_const_get_0(VALUE klass, ID id, int exclude, int recurse) +{ + VALUE value, tmp; + int mod_retry = 0; + + tmp = klass; + retry: + while (RTEST(tmp)) { + VALUE am = 0; + while (RCLASS_IV_TBL(tmp) && st_lookup(RCLASS_IV_TBL(tmp),id,&value)) { + if (value == Qundef) { + if (am == tmp) break; + am = tmp; + rb_autoload_load(tmp, id); + continue; + } + if (exclude && tmp == rb_cObject && klass != rb_cObject) { + rb_warn("toplevel constant %s referenced by %s::%s", + rb_id2name(id), rb_class2name(klass), rb_id2name(id)); + } + return value; + } + if (!recurse && klass != rb_cObject) break; + tmp = RCLASS_SUPER(tmp); + } + if (!exclude && !mod_retry && BUILTIN_TYPE(klass) == T_MODULE) { + mod_retry = 1; + tmp = rb_cObject; + goto retry; + } + + value = const_missing(klass, id); + rb_vm_inc_const_missing_count(); + return value; +} + +VALUE +rb_const_get_from(VALUE klass, ID id) +{ + return rb_const_get_0(klass, id, Qtrue, Qtrue); +} + +VALUE +rb_const_get(VALUE klass, ID id) +{ + return rb_const_get_0(klass, id, Qfalse, Qtrue); +} + +VALUE +rb_const_get_at(VALUE klass, ID id) +{ + return rb_const_get_0(klass, id, Qtrue, Qfalse); +} + +/* + * call-seq: + * remove_const(sym) => obj + * + * Removes the definition of the given constant, returning that + * constant's value. Predefined classes and singleton objects (such as + * true) cannot be removed. + */ + +VALUE +rb_mod_remove_const(VALUE mod, VALUE name) +{ + const ID id = rb_to_id(name); + VALUE val; + st_data_t v, n = id; + + rb_vm_change_state(); + + if (!rb_is_const_id(id)) { + rb_name_error(id, "`%s' is not allowed as a constant name", rb_id2name(id)); + } + if (!OBJ_UNTRUSTED(mod) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't remove constant"); + if (OBJ_FROZEN(mod)) rb_error_frozen("class/module"); + + if (RCLASS_IV_TBL(mod) && st_delete(RCLASS_IV_TBL(mod), &n, &v)) { + val = (VALUE)v; + if (val == Qundef) { + autoload_delete(mod, id); + val = Qnil; + } + return val; + } + if (rb_const_defined_at(mod, id)) { + rb_name_error(id, "cannot remove %s::%s", + rb_class2name(mod), rb_id2name(id)); + } + rb_name_error(id, "constant %s::%s not defined", + rb_class2name(mod), rb_id2name(id)); + return Qnil; /* not reached */ +} + +static int +sv_i(ID key, VALUE value, st_table *tbl) +{ + if (rb_is_const_id(key)) { + if (!st_lookup(tbl, key, 0)) { + st_insert(tbl, key, key); + } + } + return ST_CONTINUE; +} + +void* +rb_mod_const_at(VALUE mod, void *data) +{ + st_table *tbl = data; + if (!tbl) { + tbl = st_init_numtable(); + } + if (RCLASS_IV_TBL(mod)) { + st_foreach_safe(RCLASS_IV_TBL(mod), sv_i, (st_data_t)tbl); + } + return tbl; +} + +void* +rb_mod_const_of(VALUE mod, void *data) +{ + VALUE tmp = mod; + for (;;) { + data = rb_mod_const_at(tmp, data); + tmp = RCLASS_SUPER(tmp); + if (!tmp) break; + if (tmp == rb_cObject && mod != rb_cObject) break; + } + return data; +} + +static int +list_i(ID key, ID value, VALUE ary) +{ + rb_ary_push(ary, ID2SYM(key)); + return ST_CONTINUE; +} + +VALUE +rb_const_list(void *data) +{ + st_table *tbl = data; + VALUE ary; + + if (!tbl) return rb_ary_new2(0); + ary = rb_ary_new2(tbl->num_entries); + st_foreach_safe(tbl, list_i, ary); + st_free_table(tbl); + + return ary; +} + +/* + * call-seq: + * mod.constants(inherit=true) => array + * + * Returns an array of the names of the constants accessible in + * mod. This includes the names of constants in any included + * modules (example at start of section), unless the all + * parameter is set to false. + * + * IO.constants.include?(:SYNC) => true + * IO.constants(false).include?(:SYNC) => false + * + * Also see Module::const_defined?. + */ + +VALUE +rb_mod_constants(int argc, VALUE *argv, VALUE mod) +{ + VALUE inherit; + st_table *tbl; + + if (argc == 0) { + inherit = Qtrue; + } + else { + rb_scan_args(argc, argv, "01", &inherit); + } + if (RTEST(inherit)) { + tbl = rb_mod_const_of(mod, 0); + } + else { + tbl = rb_mod_const_at(mod, 0); + } + return rb_const_list(tbl); +} + +static int +rb_const_defined_0(VALUE klass, ID id, int exclude, int recurse) +{ + VALUE value, tmp; + int mod_retry = 0; + + tmp = klass; + retry: + while (tmp) { + if (RCLASS_IV_TBL(tmp) && st_lookup(RCLASS_IV_TBL(tmp), id, &value)) { + if (value == Qundef && !autoload_node(klass, id, 0)) + return Qfalse; + return Qtrue; + } + if (!recurse && klass != rb_cObject) break; + tmp = RCLASS_SUPER(tmp); + } + if (!exclude && !mod_retry && BUILTIN_TYPE(klass) == T_MODULE) { + mod_retry = 1; + tmp = rb_cObject; + goto retry; + } + return Qfalse; +} + +int +rb_const_defined_from(VALUE klass, ID id) +{ + return rb_const_defined_0(klass, id, Qtrue, Qtrue); +} + +int +rb_const_defined(VALUE klass, ID id) +{ + return rb_const_defined_0(klass, id, Qfalse, Qtrue); +} + +int +rb_const_defined_at(VALUE klass, ID id) +{ + return rb_const_defined_0(klass, id, Qtrue, Qfalse); +} + +static void +mod_av_set(VALUE klass, ID id, VALUE val, int isconst) +{ + const char *dest = isconst ? "constant" : "class variable"; + + if (!OBJ_UNTRUSTED(klass) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't set %s", dest); + if (OBJ_FROZEN(klass)) { + if (BUILTIN_TYPE(klass) == T_MODULE) { + rb_error_frozen("module"); + } + else { + rb_error_frozen("class"); + } + } + if (!RCLASS_IV_TBL(klass)) { + RCLASS_IV_TBL(klass) = st_init_numtable(); + } + else if (isconst) { + VALUE value = Qfalse; + + if (st_lookup(RCLASS_IV_TBL(klass), id, &value)) { + if (value == Qundef) + autoload_delete(klass, id); + else + rb_warn("already initialized %s %s", dest, rb_id2name(id)); + } + } + + if (isconst){ + rb_vm_change_state(); + } + st_insert(RCLASS_IV_TBL(klass), id, val); +} + +void +rb_const_set(VALUE klass, ID id, VALUE val) +{ + if (NIL_P(klass)) { + rb_raise(rb_eTypeError, "no class/module to define constant %s", + rb_id2name(id)); + } + mod_av_set(klass, id, val, Qtrue); +} + +void +rb_define_const(VALUE klass, const char *name, VALUE val) +{ + ID id = rb_intern(name); + + if (!rb_is_const_id(id)) { + rb_warn("rb_define_const: invalid name `%s' for constant", name); + } + if (klass == rb_cObject) { + rb_secure(4); + } + rb_const_set(klass, id, val); +} + +void +rb_define_global_const(const char *name, VALUE val) +{ + rb_define_const(rb_cObject, name, val); +} + +static VALUE +original_module(VALUE c) +{ + if (TYPE(c) == T_ICLASS) + return RBASIC(c)->klass; + return c; +} + +#define CVAR_LOOKUP(v,r) do {\ + if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),id,(v))) {\ + r;\ + }\ + if (FL_TEST(klass, FL_SINGLETON) ) {\ + VALUE obj = rb_iv_get(klass, "__attached__");\ + switch (TYPE(obj)) {\ + case T_MODULE:\ + case T_CLASS:\ + klass = obj;\ + break;\ + default:\ + klass = RCLASS_SUPER(klass);\ + break;\ + }\ + }\ + else {\ + klass = RCLASS_SUPER(klass);\ + }\ + while (klass) {\ + if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),id,(v))) {\ + r;\ + }\ + klass = RCLASS_SUPER(klass);\ + }\ +} while(0) + +void +rb_cvar_set(VALUE klass, ID id, VALUE val) +{ + VALUE tmp, front = 0, target = 0; + + tmp = klass; + CVAR_LOOKUP(0, {if (!front) front = klass; target = klass;}); + if (target) { + if (front && target != front) { + ID did = id; + + if (RTEST(ruby_verbose)) { + rb_warning("class variable %s of %s is overtaken by %s", + rb_id2name(id), rb_class2name(original_module(front)), + rb_class2name(original_module(target))); + } + if (BUILTIN_TYPE(front) == T_CLASS) { + st_delete(RCLASS_IV_TBL(front),&did,0); + } + } + } + else { + target = tmp; + } + mod_av_set(target, id, val, Qfalse); +} + +VALUE +rb_cvar_get(VALUE klass, ID id) +{ + VALUE value, tmp, front = 0, target = 0; + + tmp = klass; + CVAR_LOOKUP(&value, {if (!front) front = klass; target = klass;}); + if (!target) { + rb_name_error(id,"uninitialized class variable %s in %s", + rb_id2name(id), rb_class2name(tmp)); + } + if (front && target != front) { + ID did = id; + + if (RTEST(ruby_verbose)) { + rb_warning("class variable %s of %s is overtaken by %s", + rb_id2name(id), rb_class2name(original_module(front)), + rb_class2name(original_module(target))); + } + if (BUILTIN_TYPE(front) == T_CLASS) { + st_delete(RCLASS_IV_TBL(front),&did,0); + } + } + return value; +} + +VALUE +rb_cvar_defined(VALUE klass, ID id) +{ + if (!klass) return Qfalse; + CVAR_LOOKUP(0,return Qtrue); + return Qfalse; +} + +void +rb_cv_set(VALUE klass, const char *name, VALUE val) +{ + ID id = rb_intern(name); + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", name); + } + rb_cvar_set(klass, id, val); +} + +VALUE +rb_cv_get(VALUE klass, const char *name) +{ + ID id = rb_intern(name); + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", name); + } + return rb_cvar_get(klass, id); +} + +void +rb_define_class_variable(VALUE klass, const char *name, VALUE val) +{ + ID id = rb_intern(name); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", name); + } + rb_cvar_set(klass, id, val); +} + +static int +cv_i(ID key, VALUE value, VALUE ary) +{ + if (rb_is_class_id(key)) { + VALUE kval = ID2SYM(key); + if (!rb_ary_includes(ary, kval)) { + rb_ary_push(ary, kval); + } + } + return ST_CONTINUE; +} + +/* + * call-seq: + * mod.class_variables => array + * + * Returns an array of the names of class variables in mod. + * + * class One + * @@var1 = 1 + * end + * class Two < One + * @@var2 = 2 + * end + * One.class_variables #=> [:@@var1] + * Two.class_variables #=> [:@@var2] + */ + +VALUE +rb_mod_class_variables(VALUE obj) +{ + VALUE ary = rb_ary_new(); + + if (RCLASS_IV_TBL(obj)) { + st_foreach_safe(RCLASS_IV_TBL(obj), cv_i, ary); + } + return ary; +} + +/* + * call-seq: + * remove_class_variable(sym) => obj + * + * Removes the definition of the sym, returning that + * constant's value. + * + * class Dummy + * @@var = 99 + * puts @@var + * remove_class_variable(:@@var) + * p(defined? @@var) + * end + * + * produces: + * + * 99 + * nil + */ + +VALUE +rb_mod_remove_cvar(VALUE mod, VALUE name) +{ + const ID id = rb_to_id(name); + st_data_t val, n = id; + + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", rb_id2name(id)); + } + if (!OBJ_UNTRUSTED(mod) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't remove class variable"); + if (OBJ_FROZEN(mod)) rb_error_frozen("class/module"); + + if (RCLASS_IV_TBL(mod) && st_delete(RCLASS_IV_TBL(mod), &n, &val)) { + return (VALUE)val; + } + if (rb_cvar_defined(mod, id)) { + rb_name_error(id, "cannot remove %s for %s", + rb_id2name(id), rb_class2name(mod)); + } + rb_name_error(id, "class variable %s not defined for %s", + rb_id2name(id), rb_class2name(mod)); + return Qnil; /* not reached */ +} + +VALUE +rb_iv_get(VALUE obj, const char *name) +{ + ID id = rb_intern(name); + + return rb_ivar_get(obj, id); +} + +VALUE +rb_iv_set(VALUE obj, const char *name, VALUE val) +{ + ID id = rb_intern(name); + + return rb_ivar_set(obj, id, val); +} diff --git a/version.c b/version.c new file mode 100644 index 0000000..b3143ae --- /dev/null +++ b/version.c @@ -0,0 +1,52 @@ +/********************************************************************** + + version.c - + + $Author: matz $ + created at: Thu Sep 30 20:08:01 JST 1993 + + Copyright (C) 1993-2007 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "version.h" +#include + +#define PRINT(type) puts(ruby_##type) +#define MKSTR(type) rb_obj_freeze(rb_usascii_str_new(ruby_##type, sizeof(ruby_##type)-1)) + +const char ruby_version[] = RUBY_VERSION; +const char ruby_release_date[] = RUBY_RELEASE_DATE; +const char ruby_platform[] = RUBY_PLATFORM; +const int ruby_patchlevel = RUBY_PATCHLEVEL; +const char ruby_description[] = RUBY_DESCRIPTION; +const char ruby_copyright[] = RUBY_COPYRIGHT; +const char ruby_engine[] = "ruby"; + +void +Init_version(void) +{ + rb_define_global_const("RUBY_VERSION", MKSTR(version)); + rb_define_global_const("RUBY_RELEASE_DATE", MKSTR(release_date)); + rb_define_global_const("RUBY_PLATFORM", MKSTR(platform)); + rb_define_global_const("RUBY_PATCHLEVEL", INT2FIX(RUBY_PATCHLEVEL)); + rb_define_global_const("RUBY_REVISION", INT2FIX(RUBY_REVISION)); + rb_define_global_const("RUBY_DESCRIPTION", MKSTR(description)); + rb_define_global_const("RUBY_COPYRIGHT", MKSTR(copyright)); + rb_define_global_const("RUBY_ENGINE", MKSTR(engine)); +} + +void +ruby_show_version(void) +{ + PRINT(description); + fflush(stdout); +} + +void +ruby_show_copyright(void) +{ + PRINT(copyright); + exit(0); +} diff --git a/version.h b/version.h new file mode 100644 index 0000000..650d4d1 --- /dev/null +++ b/version.h @@ -0,0 +1,55 @@ +#define RUBY_VERSION "1.9.1" +#define RUBY_PATCHLEVEL 376 +#define RUBY_VERSION_MAJOR 1 +#define RUBY_VERSION_MINOR 9 +#define RUBY_VERSION_TEENY 1 + +#define RUBY_RELEASE_YEAR 2009 +#define RUBY_RELEASE_MONTH 12 +#define RUBY_RELEASE_DAY 7 +#define RUBY_RELEASE_DATE "2009-12-07" + +#ifdef RUBY_EXTERN +RUBY_EXTERN const char ruby_version[]; +RUBY_EXTERN const char ruby_release_date[]; +RUBY_EXTERN const char ruby_platform[]; +RUBY_EXTERN const int ruby_patchlevel; +RUBY_EXTERN const char ruby_description[]; +RUBY_EXTERN const char ruby_copyright[]; +#endif + +#define RUBY_AUTHOR "Yukihiro Matsumoto" +#define RUBY_BIRTH_YEAR 1993 +#define RUBY_BIRTH_MONTH 2 +#define RUBY_BIRTH_DAY 24 + +#define RUBY_PATCHLEVEL_STR "p"STRINGIZE(RUBY_PATCHLEVEL) + +#ifndef RUBY_REVISION +# include "revision.h" +#endif +# ifndef RUBY_REVISION +# define RUBY_REVISION 0 +#endif + +#if RUBY_REVISION +# ifdef RUBY_BRANCH_NAME +# define RUBY_REVISION_STR " "RUBY_BRANCH_NAME" "STRINGIZE(RUBY_REVISION) +# else +# define RUBY_REVISION_STR " revision "STRINGIZE(RUBY_REVISION) +# endif +#else +# define RUBY_REVISION_STR "" +#endif + +# define RUBY_DESCRIPTION \ + "ruby "RUBY_VERSION \ + RUBY_PATCHLEVEL_STR \ + " ("RUBY_RELEASE_DATE \ + RUBY_REVISION_STR") " \ + "["RUBY_PLATFORM"]" +# define RUBY_COPYRIGHT \ + "ruby - Copyright (C) " \ + STRINGIZE(RUBY_BIRTH_YEAR)"-" \ + STRINGIZE(RUBY_RELEASE_YEAR)" " \ + RUBY_AUTHOR diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..1fbcf95 --- /dev/null +++ b/vm.c @@ -0,0 +1,2029 @@ +/********************************************************************** + + vm.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/encoding.h" + +#include "gc.h" +#include "vm_core.h" +#include "iseq.h" +#include "eval_intern.h" + +#include "vm_insnhelper.h" +#include "vm_insnhelper.c" +#include "vm_exec.h" +#include "vm_exec.c" + +#include "vm_method.c" +#include "vm_eval.c" + +#define BUFSIZE 0x100 +#define PROCDEBUG 0 + +VALUE rb_cRubyVM; +VALUE rb_cThread; +VALUE rb_cEnv; +VALUE rb_mRubyVMFrozenCore; + +VALUE ruby_vm_global_state_version = 1; +VALUE ruby_vm_const_missing_count = 0; + +char ruby_vm_redefined_flag[BOP_LAST_]; + +rb_thread_t *ruby_current_thread = 0; +rb_vm_t *ruby_current_vm = 0; + +VALUE rb_insns_name_array(void); + +void vm_analysis_operand(int insn, int n, VALUE op); +void vm_analysis_register(int reg, int isset); +void vm_analysis_insn(int insn); + +void +rb_vm_change_state(void) +{ + INC_VM_STATE_VERSION(); +} + +void +rb_vm_inc_const_missing_count(void) +{ + ruby_vm_const_missing_count +=1; +} + +/* control stack frame */ + +static inline VALUE +rb_vm_set_finish_env(rb_thread_t * th) +{ + vm_push_frame(th, 0, VM_FRAME_MAGIC_FINISH, + Qnil, th->cfp->lfp[0], 0, + th->cfp->sp, 0, 1); + th->cfp->pc = (VALUE *)&finish_insn_seq[0]; + return Qtrue; +} + +static void +vm_set_top_stack(rb_thread_t * th, VALUE iseqval) +{ + rb_iseq_t *iseq; + GetISeqPtr(iseqval, iseq); + + if (iseq->type != ISEQ_TYPE_TOP) { + rb_raise(rb_eTypeError, "Not a toplevel InstructionSequence"); + } + + /* for return */ + rb_vm_set_finish_env(th); + + vm_push_frame(th, iseq, VM_FRAME_MAGIC_TOP, + th->top_self, 0, iseq->iseq_encoded, + th->cfp->sp, 0, iseq->local_size); + + CHECK_STACK_OVERFLOW(th->cfp, iseq->stack_max); +} + +static void +vm_set_eval_stack(rb_thread_t * th, VALUE iseqval, const NODE *cref) +{ + rb_iseq_t *iseq; + rb_block_t * const block = th->base_block; + GetISeqPtr(iseqval, iseq); + + /* for return */ + rb_vm_set_finish_env(th); + vm_push_frame(th, iseq, VM_FRAME_MAGIC_EVAL, block->self, + GC_GUARDED_PTR(block->dfp), iseq->iseq_encoded, + th->cfp->sp, block->lfp, iseq->local_size); + + if (cref) { + th->cfp->dfp[-1] = (VALUE)cref; + } + + CHECK_STACK_OVERFLOW(th->cfp, iseq->stack_max); +} + +static void +vm_set_main_stack(rb_thread_t *th, VALUE iseqval) +{ + VALUE toplevel_binding = rb_const_get(rb_cObject, rb_intern("TOPLEVEL_BINDING")); + rb_binding_t *bind; + rb_iseq_t *iseq; + rb_env_t *env; + + GetBindingPtr(toplevel_binding, bind); + GetEnvPtr(bind->env, env); + th->base_block = &env->block; + vm_set_eval_stack(th, iseqval, 0); + th->base_block = 0; + + /* save binding */ + GetISeqPtr(iseqval, iseq); + if (bind && iseq->local_size > 0) { + bind->env = rb_vm_make_env_object(th, th->cfp); + } + + CHECK_STACK_OVERFLOW(th->cfp, iseq->stack_max); +} + +rb_control_frame_t * +rb_vm_get_ruby_level_next_cfp(rb_thread_t *th, rb_control_frame_t *cfp) +{ + while (!RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp)) { + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + return cfp; + } + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + return 0; +} + +static rb_control_frame_t * +vm_get_ruby_level_caller_cfp(rb_thread_t *th, rb_control_frame_t *cfp) +{ + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + return cfp; + } + + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + + while (!RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp)) { + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + return cfp; + } + + if ((cfp->flag & VM_FRAME_FLAG_PASSED) == 0) { + break; + } + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + return 0; +} + +/* Env */ + +/* + env{ + env[0] // special (block or prev env) + env[1] // env object + env[2] // prev env val + }; + */ + +#define ENV_IN_HEAP_P(th, env) \ + (!((th)->stack < (env) && (env) < ((th)->stack + (th)->stack_size))) +#define ENV_VAL(env) ((env)[1]) + +static void +env_free(void * const ptr) +{ + RUBY_FREE_ENTER("env"); + if (ptr) { + const rb_env_t * const env = ptr; + RUBY_FREE_UNLESS_NULL(env->env); + ruby_xfree(ptr); + } + RUBY_FREE_LEAVE("env"); +} + +static void +env_mark(void * const ptr) +{ + RUBY_MARK_ENTER("env"); + if (ptr) { + const rb_env_t * const env = ptr; + + if (env->env) { + /* TODO: should mark more restricted range */ + RUBY_GC_INFO("env->env\n"); + rb_gc_mark_locations(env->env, env->env + env->env_size); + } + + RUBY_GC_INFO("env->prev_envval\n"); + RUBY_MARK_UNLESS_NULL(env->prev_envval); + RUBY_MARK_UNLESS_NULL(env->block.self); + RUBY_MARK_UNLESS_NULL(env->block.proc); + + if (env->block.iseq) { + if (BUILTIN_TYPE(env->block.iseq) == T_NODE) { + RUBY_MARK_UNLESS_NULL((VALUE)env->block.iseq); + } + else { + RUBY_MARK_UNLESS_NULL(env->block.iseq->self); + } + } + } + RUBY_MARK_LEAVE("env"); +} + +static VALUE +env_alloc(void) +{ + VALUE obj; + rb_env_t *env; + obj = Data_Make_Struct(rb_cEnv, rb_env_t, env_mark, env_free, env); + env->env = 0; + env->prev_envval = 0; + env->block.iseq = 0; + return obj; +} + +static VALUE check_env_value(VALUE envval); + +static int +check_env(rb_env_t * const env) +{ + printf("---\n"); + printf("envptr: %p\n", (void *)&env->block.dfp[0]); + printf("orphan: %p\n", (void *)env->block.dfp[1]); + printf("inheap: %p\n", (void *)env->block.dfp[2]); + printf("envval: %10p ", (void *)env->block.dfp[3]); + dp(env->block.dfp[3]); + printf("penvv : %10p ", (void *)env->block.dfp[4]); + dp(env->block.dfp[4]); + printf("lfp: %10p\n", (void *)env->block.lfp); + printf("dfp: %10p\n", (void *)env->block.dfp); + if (env->block.dfp[4]) { + printf(">>\n"); + check_env_value(env->block.dfp[4]); + printf("<<\n"); + } + return 1; +} + +static VALUE +check_env_value(VALUE envval) +{ + rb_env_t *env; + GetEnvPtr(envval, env); + + if (check_env(env)) { + return envval; + } + rb_bug("invalid env"); + return Qnil; /* unreachable */ +} + +static VALUE +vm_make_env_each(rb_thread_t * const th, rb_control_frame_t * const cfp, + VALUE *envptr, VALUE * const endptr) +{ + VALUE envval, penvval = 0; + rb_env_t *env; + VALUE *nenvptr; + int i, local_size; + + if (ENV_IN_HEAP_P(th, envptr)) { + return ENV_VAL(envptr); + } + + if (envptr != endptr) { + VALUE *penvptr = GC_GUARDED_PTR_REF(*envptr); + rb_control_frame_t *pcfp = cfp; + + if (ENV_IN_HEAP_P(th, penvptr)) { + penvval = ENV_VAL(penvptr); + } + else { + while (pcfp->dfp != penvptr) { + pcfp++; + if (pcfp->dfp == 0) { + SDR(); + rb_bug("invalid dfp"); + } + } + penvval = vm_make_env_each(th, pcfp, penvptr, endptr); + cfp->lfp = pcfp->lfp; + *envptr = GC_GUARDED_PTR(pcfp->dfp); + } + } + + /* allocate env */ + envval = env_alloc(); + GetEnvPtr(envval, env); + + if (!RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + local_size = 2; + } + else { + local_size = cfp->iseq->local_size; + } + + env->env_size = local_size + 1 + 2; + env->local_size = local_size; + env->env = ALLOC_N(VALUE, env->env_size); + env->prev_envval = penvval; + + for (i = 0; i <= local_size; i++) { + env->env[i] = envptr[-local_size + i]; +#if 0 + fprintf(stderr, "%2d ", &envptr[-local_size + i] - th->stack); dp(env->env[i]); + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + /* clear value stack for GC */ + envptr[-local_size + i] = 0; + } +#endif + } + + *envptr = envval; /* GC mark */ + nenvptr = &env->env[i - 1]; + nenvptr[1] = envval; /* frame self */ + nenvptr[2] = penvval; /* frame prev env object */ + + /* reset lfp/dfp in cfp */ + cfp->dfp = nenvptr; + if (envptr == endptr) { + cfp->lfp = nenvptr; + } + + /* as Binding */ + env->block.self = cfp->self; + env->block.lfp = cfp->lfp; + env->block.dfp = cfp->dfp; + env->block.iseq = cfp->iseq; + + if (!RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + /* TODO */ + env->block.iseq = 0; + } + return envval; +} + +static int +collect_local_variables_in_env(rb_env_t * const env, const VALUE ary) +{ + int i; + for (i = 0; i < env->block.iseq->local_table_size; i++) { + ID lid = env->block.iseq->local_table[i]; + if (rb_is_local_id(lid)) { + rb_ary_push(ary, ID2SYM(lid)); + } + } + if (env->prev_envval) { + rb_env_t *prevenv; + GetEnvPtr(env->prev_envval, prevenv); + collect_local_variables_in_env(prevenv, ary); + } + return 0; +} + +static int +vm_collect_local_variables_in_heap(rb_thread_t *th, VALUE *dfp, VALUE ary) +{ + if (ENV_IN_HEAP_P(th, dfp)) { + rb_env_t *env; + GetEnvPtr(ENV_VAL(dfp), env); + collect_local_variables_in_env(env, ary); + return 1; + } + else { + return 0; + } +} + +VALUE +rb_vm_make_env_object(rb_thread_t * th, rb_control_frame_t *cfp) +{ + VALUE envval; + + if (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_FINISH) { + /* for method_missing */ + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + + envval = vm_make_env_each(th, cfp, cfp->dfp, cfp->lfp); + + if (PROCDEBUG) { + check_env_value(envval); + } + + return envval; +} + +void +rb_vm_stack_to_heap(rb_thread_t * const th) +{ + rb_control_frame_t *cfp = th->cfp; + while ((cfp = rb_vm_get_ruby_level_next_cfp(th, cfp)) != 0) { + rb_vm_make_env_object(th, cfp); + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } +} + +/* Proc */ + +static VALUE +vm_make_proc_from_block(rb_thread_t *th, rb_block_t *block) +{ + VALUE proc = block->proc; + + if (block->proc) { + return block->proc; + } + + proc = rb_vm_make_proc(th, block, rb_cProc); + block->proc = proc; + + return proc; +} + +VALUE +rb_vm_make_proc(rb_thread_t *th, const rb_block_t *block, VALUE klass) +{ + VALUE procval, envval, blockprocval = 0; + rb_proc_t *proc; + rb_control_frame_t *cfp = RUBY_VM_GET_CFP_FROM_BLOCK_PTR(block); + + if (block->proc) { + rb_bug("rb_vm_make_proc: Proc value is already created."); + } + + if (GC_GUARDED_PTR_REF(cfp->lfp[0])) { + if (!RUBY_VM_CLASS_SPECIAL_P(cfp->lfp[0])) { + rb_proc_t *p; + + blockprocval = vm_make_proc_from_block( + th, (rb_block_t *)GC_GUARDED_PTR_REF(*cfp->lfp)); + + GetProcPtr(blockprocval, p); + *cfp->lfp = GC_GUARDED_PTR(&p->block); + } + } + + envval = rb_vm_make_env_object(th, cfp); + + if (PROCDEBUG) { + check_env_value(envval); + } + procval = rb_proc_alloc(klass); + GetProcPtr(procval, proc); + proc->blockprocval = blockprocval; + proc->block.self = block->self; + proc->block.lfp = block->lfp; + proc->block.dfp = block->dfp; + proc->block.iseq = block->iseq; + proc->block.proc = procval; + proc->envval = envval; + proc->safe_level = th->safe_level; + + if (VMDEBUG) { + if (th->stack < block->dfp && block->dfp < th->stack + th->stack_size) { + rb_bug("invalid ptr: block->dfp"); + } + if (th->stack < block->lfp && block->lfp < th->stack + th->stack_size) { + rb_bug("invalid ptr: block->lfp"); + } + } + + return procval; +} + +/* C -> Ruby: block */ + +static inline VALUE +invoke_block_from_c(rb_thread_t *th, const rb_block_t *block, + VALUE self, int argc, const VALUE *argv, + const rb_block_t *blockptr, const NODE *cref) +{ + if (BUILTIN_TYPE(block->iseq) != T_NODE) { + const rb_iseq_t *iseq = block->iseq; + const rb_control_frame_t *cfp; + int i, opt_pc, arg_size = iseq->arg_size; + int type = block_proc_is_lambda(block->proc) ? + VM_FRAME_MAGIC_LAMBDA : VM_FRAME_MAGIC_BLOCK; + + rb_vm_set_finish_env(th); + + cfp = th->cfp; + CHECK_STACK_OVERFLOW(cfp, argc + iseq->stack_max); + + for (i=0; isp[i] = argv[i]; + } + + opt_pc = vm_yield_setup_args(th, iseq, argc, cfp->sp, blockptr, + type == VM_FRAME_MAGIC_LAMBDA); + + vm_push_frame(th, iseq, type, + self, GC_GUARDED_PTR(block->dfp), + iseq->iseq_encoded + opt_pc, cfp->sp + arg_size, block->lfp, + iseq->local_size - arg_size); + + if (cref) { + th->cfp->dfp[-1] = (VALUE)cref; + } + + return vm_exec(th); + } + else { + return vm_yield_with_cfunc(th, block, self, argc, argv, blockptr); + } +} + +static inline const rb_block_t * +check_block(rb_thread_t *th) +{ + const rb_block_t *blockptr = GC_GUARDED_PTR_REF(th->cfp->lfp[0]); + + if (blockptr == 0) { + rb_vm_localjump_error("no block given", Qnil, 0); + } + + return blockptr; +} + +static inline VALUE +vm_yield_with_cref(rb_thread_t *th, int argc, const VALUE *argv, const NODE *cref) +{ + const rb_block_t *blockptr = check_block(th); + return invoke_block_from_c(th, blockptr, blockptr->self, argc, argv, 0, cref); +} + +static inline VALUE +vm_yield(rb_thread_t *th, int argc, const VALUE *argv) +{ + const rb_block_t *blockptr = check_block(th); + return invoke_block_from_c(th, blockptr, blockptr->self, argc, argv, 0, 0); +} + +VALUE +rb_vm_invoke_proc(rb_thread_t *th, rb_proc_t *proc, VALUE self, + int argc, const VALUE *argv, rb_block_t * blockptr) +{ + VALUE val = Qundef; + int state; + volatile int stored_safe = th->safe_level; + rb_control_frame_t * volatile cfp = th->cfp; + + TH_PUSH_TAG(th); + if ((state = EXEC_TAG()) == 0) { + if (!proc->is_from_method) { + th->safe_level = proc->safe_level; + } + val = invoke_block_from_c(th, &proc->block, self, argc, argv, blockptr, 0); + } + TH_POP_TAG(); + + if (!proc->is_from_method) { + th->safe_level = stored_safe; + } + + if (state) { + if (state == TAG_RETURN && proc->is_lambda) { + VALUE err = th->errinfo; + VALUE *escape_dfp = GET_THROWOBJ_CATCH_POINT(err); + + if (escape_dfp == cfp->dfp) { + printf("ok\n"); + state = 0; + th->errinfo = Qnil; + th->cfp = cfp; + val = GET_THROWOBJ_VAL(err); + } + } + } + + if (state) { + JUMP_TAG(state); + } + return val; +} + +/* special variable */ + +static rb_control_frame_t * +vm_normal_frame(rb_thread_t *th, rb_control_frame_t *cfp) +{ + while (cfp->pc == 0) { + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + if (RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp)) { + return 0; + } + } + return cfp; +} + +static VALUE +vm_cfp_svar_get(rb_thread_t *th, rb_control_frame_t *cfp, VALUE key) +{ + cfp = vm_normal_frame(th, cfp); + return lfp_svar_get(th, cfp ? cfp->lfp : 0, key); +} + +static void +vm_cfp_svar_set(rb_thread_t *th, rb_control_frame_t *cfp, VALUE key, const VALUE val) +{ + cfp = vm_normal_frame(th, cfp); + lfp_svar_set(th, cfp ? cfp->lfp : 0, key, val); +} + +static VALUE +vm_svar_get(VALUE key) +{ + rb_thread_t *th = GET_THREAD(); + return vm_cfp_svar_get(th, th->cfp, key); +} + +static void +vm_svar_set(VALUE key, VALUE val) +{ + rb_thread_t *th = GET_THREAD(); + vm_cfp_svar_set(th, th->cfp, key, val); +} + +VALUE +rb_backref_get(void) +{ + return vm_svar_get(1); +} + +void +rb_backref_set(VALUE val) +{ + vm_svar_set(1, val); +} + +VALUE +rb_lastline_get(void) +{ + return vm_svar_get(0); +} + +void +rb_lastline_set(VALUE val) +{ + vm_svar_set(0, val); +} + +/* backtrace */ + +int +rb_vm_get_sourceline(const rb_control_frame_t *cfp) +{ + int line_no = 0; + const rb_iseq_t *iseq = cfp->iseq; + + if (RUBY_VM_NORMAL_ISEQ_P(iseq)) { + rb_num_t i; + size_t pos = cfp->pc - cfp->iseq->iseq_encoded; + + for (i = 0; i < iseq->insn_info_size; i++) { + if (iseq->insn_info_table[i].position == pos) { + if (i == 0) goto found; + line_no = iseq->insn_info_table[i - 1].line_no; + goto found; + } + } + line_no = iseq->insn_info_table[i - 1].line_no; + } + found: + return line_no; +} + +static VALUE +vm_backtrace_each(rb_thread_t *th, + const rb_control_frame_t *limit_cfp, const rb_control_frame_t *cfp, + const char * file, int line_no, VALUE ary) +{ + VALUE str; + + while (cfp > limit_cfp) { + str = 0; + if (cfp->iseq != 0) { + if (cfp->pc != 0) { + rb_iseq_t *iseq = cfp->iseq; + + line_no = rb_vm_get_sourceline(cfp); + file = RSTRING_PTR(iseq->filename); + str = rb_sprintf("%s:%d:in `%s'", + file, line_no, RSTRING_PTR(iseq->name)); + rb_ary_push(ary, str); + } + } + else if (RUBYVM_CFUNC_FRAME_P(cfp)) { + str = rb_sprintf("%s:%d:in `%s'", + file, line_no, + rb_id2name(cfp->method_id)); + rb_ary_push(ary, str); + } + cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp); + } + return rb_ary_reverse(ary); +} + +static inline VALUE +vm_backtrace(rb_thread_t *th, int lev) +{ + VALUE ary; + const rb_control_frame_t *cfp = th->cfp; + const rb_control_frame_t *top_of_cfp = (void *)(th->stack + th->stack_size); + top_of_cfp -= 2; + + if (lev < 0) { + /* TODO ?? */ + ary = rb_ary_new(); + } + else { + while (lev-- >= 0) { + cfp++; + if (cfp >= top_of_cfp) { + return Qnil; + } + } + ary = rb_ary_new(); + } + + ary = vm_backtrace_each(th, RUBY_VM_NEXT_CONTROL_FRAME(cfp), + top_of_cfp, RSTRING_PTR(th->vm->progname), 0, ary); + return ary; +} + +const char * +rb_sourcefile(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + + if (cfp) { + return RSTRING_PTR(cfp->iseq->filename); + } + else { + return 0; + } +} + +int +rb_sourceline(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + + if (cfp) { + return rb_vm_get_sourceline(cfp); + } + else { + return 0; + } +} + +NODE * +rb_vm_cref(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + return vm_get_cref(cfp->iseq, cfp->lfp, cfp->dfp); +} + +#if 0 +void +debug_cref(NODE *cref) +{ + while (cref) { + dp(cref->nd_clss); + printf("%ld\n", cref->nd_visi); + cref = cref->nd_next; + } +} +#endif + +VALUE +rb_vm_cbase(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + + return vm_get_cbase(cfp->iseq, cfp->lfp, cfp->dfp); +} + +/* jump */ + +static VALUE +make_localjump_error(const char *mesg, VALUE value, int reason) +{ + extern VALUE rb_eLocalJumpError; + VALUE exc = rb_exc_new2(rb_eLocalJumpError, mesg); + ID id; + + switch (reason) { + case TAG_BREAK: + CONST_ID(id, "break"); + break; + case TAG_REDO: + CONST_ID(id, "redo"); + break; + case TAG_RETRY: + CONST_ID(id, "retry"); + break; + case TAG_NEXT: + CONST_ID(id, "next"); + break; + case TAG_RETURN: + CONST_ID(id, "return"); + break; + default: + CONST_ID(id, "noreason"); + break; + } + rb_iv_set(exc, "@exit_value", value); + rb_iv_set(exc, "@reason", ID2SYM(id)); + return exc; +} + +void +rb_vm_localjump_error(const char *mesg, VALUE value, int reason) +{ + VALUE exc = make_localjump_error(mesg, value, reason); + rb_exc_raise(exc); +} + +VALUE +rb_vm_make_jump_tag_but_local_jump(int state, VALUE val) +{ + VALUE result = Qnil; + + if (val == Qundef) { + val = GET_THREAD()->tag->retval; + } + switch (state) { + case 0: + break; + case TAG_RETURN: + result = make_localjump_error("unexpected return", val, state); + break; + case TAG_BREAK: + result = make_localjump_error("unexpected break", val, state); + break; + case TAG_NEXT: + result = make_localjump_error("unexpected next", val, state); + break; + case TAG_REDO: + result = make_localjump_error("unexpected redo", Qnil, state); + break; + case TAG_RETRY: + result = make_localjump_error("retry outside of rescue clause", Qnil, state); + break; + default: + break; + } + return result; +} + +void +rb_vm_jump_tag_but_local_jump(int state, VALUE val) +{ + if (val != Qnil) { + VALUE exc = rb_vm_make_jump_tag_but_local_jump(state, val); + rb_exc_raise(exc); + } + JUMP_TAG(state); +} + +NORETURN(static void vm_iter_break(rb_thread_t *th)); + +static void +vm_iter_break(rb_thread_t *th) +{ + rb_control_frame_t *cfp = th->cfp; + VALUE *dfp = GC_GUARDED_PTR_REF(*cfp->dfp); + + th->state = TAG_BREAK; + th->errinfo = (VALUE)NEW_THROW_OBJECT(Qnil, (VALUE)dfp, TAG_BREAK); + TH_JUMP_TAG(th, TAG_BREAK); +} + +void +rb_iter_break(void) +{ + vm_iter_break(GET_THREAD()); +} + +/* optimization: redefine management */ + +static st_table *vm_opt_method_table = 0; + +static void +rb_vm_check_redefinition_opt_method(const NODE *node) +{ + VALUE bop; + + if (st_lookup(vm_opt_method_table, (st_data_t)node, &bop)) { + ruby_vm_redefined_flag[bop] = 1; + } +} + +static void +add_opt_method(VALUE klass, ID mid, VALUE bop) +{ + NODE *node; + if (st_lookup(RCLASS_M_TBL(klass), mid, (void *)&node) && + nd_type(node->nd_body->nd_body) == NODE_CFUNC) { + st_insert(vm_opt_method_table, (st_data_t)node, (st_data_t)bop); + } + else { + rb_bug("undefined optimized method: %s", rb_id2name(mid)); + } +} + +static void +vm_init_redefined_flag(void) +{ + ID mid; + VALUE bop; + + vm_opt_method_table = st_init_numtable(); + +#define OP(mid_, bop_) (mid = id##mid_, bop = BOP_##bop_, ruby_vm_redefined_flag[bop] = 0) +#define C(k) add_opt_method(rb_c##k, mid, bop) + OP(PLUS, PLUS), (C(Fixnum), C(Float), C(String), C(Array)); + OP(MINUS, MINUS), (C(Fixnum)); + OP(MULT, MULT), (C(Fixnum), C(Float)); + OP(DIV, DIV), (C(Fixnum), C(Float)); + OP(MOD, MOD), (C(Fixnum), C(Float)); + OP(Eq, EQ), (C(Fixnum), C(Float), C(String)); + OP(Eqq, EQQ), (C(Fixnum), C(Bignum), C(Float), C(Symbol), C(String)); + OP(LT, LT), (C(Fixnum)); + OP(LE, LE), (C(Fixnum)); + OP(LTLT, LTLT), (C(String), C(Array)); + OP(AREF, AREF), (C(Array), C(Hash)); + OP(ASET, ASET), (C(Array), C(Hash)); + OP(Length, LENGTH), (C(Array), C(String), C(Hash)); + OP(Succ, SUCC), (C(Fixnum), C(String), C(Time)); + OP(GT, GT), (C(Fixnum)); + OP(GE, GE), (C(Fixnum)); +#undef C +#undef OP +} + +/* evaluator body */ + +/* finish + VMe (h1) finish + VM finish F1 F2 + cfunc finish F1 F2 C1 + rb_funcall finish F1 F2 C1 + VMe finish F1 F2 C1 + VM finish F1 F2 C1 F3 + + F1 - F3 : pushed by VM + C1 : pushed by send insn (CFUNC) + + struct CONTROL_FRAME { + VALUE *pc; // cfp[0], program counter + VALUE *sp; // cfp[1], stack pointer + VALUE *bp; // cfp[2], base pointer + rb_iseq_t *iseq; // cfp[3], iseq + VALUE flag; // cfp[4], magic + VALUE self; // cfp[5], self + VALUE *lfp; // cfp[6], local frame pointer + VALUE *dfp; // cfp[7], dynamic frame pointer + rb_iseq_t * block_iseq; // cfp[8], block iseq + VALUE proc; // cfp[9], always 0 + }; + + struct BLOCK { + VALUE self; + VALUE *lfp; + VALUE *dfp; + rb_iseq_t *block_iseq; + VALUE proc; + }; + + struct METHOD_CONTROL_FRAME { + rb_control_frame_t frame; + }; + + struct METHOD_FRAME { + VALUE arg0; + ... + VALUE argM; + VALUE param0; + ... + VALUE paramN; + VALUE cref; + VALUE special; // lfp [1] + struct block_object *block_ptr | 0x01; // lfp [0] + }; + + struct BLOCK_CONTROL_FRAME { + rb_control_frame_t frame; + }; + + struct BLOCK_FRAME { + VALUE arg0; + ... + VALUE argM; + VALUE param0; + ... + VALUE paramN; + VALUE cref; + VALUE *(prev_ptr | 0x01); // DFP[0] + }; + + struct CLASS_CONTROL_FRAME { + rb_control_frame_t frame; + }; + + struct CLASS_FRAME { + VALUE param0; + ... + VALUE paramN; + VALUE cref; + VALUE prev_dfp; // for frame jump + }; + + struct C_METHOD_CONTROL_FRAME { + VALUE *pc; // 0 + VALUE *sp; // stack pointer + VALUE *bp; // base pointer (used in exception) + rb_iseq_t *iseq; // cmi + VALUE magic; // C_METHOD_FRAME + VALUE self; // ? + VALUE *lfp; // lfp + VALUE *dfp; // == lfp + rb_iseq_t * block_iseq; // + VALUE proc; // always 0 + }; + + struct C_BLOCK_CONTROL_FRAME { + VALUE *pc; // point only "finish" insn + VALUE *sp; // sp + rb_iseq_t *iseq; // ? + VALUE magic; // C_METHOD_FRAME + VALUE self; // needed? + VALUE *lfp; // lfp + VALUE *dfp; // lfp + rb_iseq_t * block_iseq; // 0 + }; + */ + + +static VALUE +vm_exec(rb_thread_t *th) +{ + int state; + VALUE result, err; + VALUE initial = 0; + VALUE *escape_dfp = NULL; + + TH_PUSH_TAG(th); + _tag.retval = Qnil; + if ((state = EXEC_TAG()) == 0) { + vm_loop_start: + result = vm_exec_core(th, initial); + if ((state = th->state) != 0) { + err = result; + th->state = 0; + goto exception_handler; + } + } + else { + int i; + struct iseq_catch_table_entry *entry; + unsigned long epc, cont_pc, cont_sp; + VALUE catch_iseqval; + rb_control_frame_t *cfp; + VALUE type; + + err = th->errinfo; + + exception_handler: + cont_pc = cont_sp = catch_iseqval = 0; + + while (th->cfp->pc == 0 || th->cfp->iseq == 0) { + th->cfp++; + } + + cfp = th->cfp; + epc = cfp->pc - cfp->iseq->iseq_encoded; + + if (state == TAG_BREAK || state == TAG_RETURN) { + escape_dfp = GET_THROWOBJ_CATCH_POINT(err); + + if (cfp->dfp == escape_dfp) { + if (state == TAG_RETURN) { + if ((cfp + 1)->pc != &finish_insn_seq[0]) { + SET_THROWOBJ_CATCH_POINT(err, (VALUE)(cfp + 1)->dfp); + SET_THROWOBJ_STATE(err, state = TAG_BREAK); + } + else { + for (i = 0; i < cfp->iseq->catch_table_size; i++) { + entry = &cfp->iseq->catch_table[i]; + if (entry->start < epc && entry->end >= epc) { + if (entry->type == CATCH_TYPE_ENSURE) { + catch_iseqval = entry->iseq; + cont_pc = entry->cont; + cont_sp = entry->sp; + break; + } + } + } + if (!catch_iseqval) { + result = GET_THROWOBJ_VAL(err); + th->errinfo = Qnil; + th->cfp += 2; + goto finish_vme; + } + } + /* through */ + } + else { + /* TAG_BREAK */ +#if OPT_STACK_CACHING + initial = (GET_THROWOBJ_VAL(err)); +#else + *th->cfp->sp++ = (GET_THROWOBJ_VAL(err)); +#endif + th->errinfo = Qnil; + goto vm_loop_start; + } + } + } + + if (state == TAG_RAISE) { + for (i = 0; i < cfp->iseq->catch_table_size; i++) { + entry = &cfp->iseq->catch_table[i]; + if (entry->start < epc && entry->end >= epc) { + + if (entry->type == CATCH_TYPE_RESCUE || + entry->type == CATCH_TYPE_ENSURE) { + catch_iseqval = entry->iseq; + cont_pc = entry->cont; + cont_sp = entry->sp; + break; + } + } + } + } + else if (state == TAG_RETRY) { + for (i = 0; i < cfp->iseq->catch_table_size; i++) { + entry = &cfp->iseq->catch_table[i]; + if (entry->start < epc && entry->end >= epc) { + + if (entry->type == CATCH_TYPE_ENSURE) { + catch_iseqval = entry->iseq; + cont_pc = entry->cont; + cont_sp = entry->sp; + break; + } + else if (entry->type == CATCH_TYPE_RETRY) { + VALUE *escape_dfp; + escape_dfp = GET_THROWOBJ_CATCH_POINT(err); + if (cfp->dfp == escape_dfp) { + cfp->pc = cfp->iseq->iseq_encoded + entry->cont; + th->errinfo = Qnil; + goto vm_loop_start; + } + } + } + } + } + else if (state == TAG_BREAK && ((VALUE)escape_dfp & ~0x03) == 0) { + type = CATCH_TYPE_BREAK; + + search_restart_point: + for (i = 0; i < cfp->iseq->catch_table_size; i++) { + entry = &cfp->iseq->catch_table[i]; + + if (entry->start < epc && entry->end >= epc) { + if (entry->type == CATCH_TYPE_ENSURE) { + catch_iseqval = entry->iseq; + cont_pc = entry->cont; + cont_sp = entry->sp; + break; + } + else if (entry->type == type) { + cfp->pc = cfp->iseq->iseq_encoded + entry->cont; + cfp->sp = cfp->bp + entry->sp; + + if (state != TAG_REDO) { +#if OPT_STACK_CACHING + initial = (GET_THROWOBJ_VAL(err)); +#else + *th->cfp->sp++ = (GET_THROWOBJ_VAL(err)); +#endif + } + th->errinfo = Qnil; + goto vm_loop_start; + } + } + } + } + else if (state == TAG_REDO) { + type = CATCH_TYPE_REDO; + goto search_restart_point; + } + else if (state == TAG_NEXT) { + type = CATCH_TYPE_NEXT; + goto search_restart_point; + } + else { + for (i = 0; i < cfp->iseq->catch_table_size; i++) { + entry = &cfp->iseq->catch_table[i]; + if (entry->start < epc && entry->end >= epc) { + + if (entry->type == CATCH_TYPE_ENSURE) { + catch_iseqval = entry->iseq; + cont_pc = entry->cont; + cont_sp = entry->sp; + break; + } + } + } + } + + if (catch_iseqval != 0) { + /* found catch table */ + rb_iseq_t *catch_iseq; + + /* enter catch scope */ + GetISeqPtr(catch_iseqval, catch_iseq); + cfp->sp = cfp->bp + cont_sp; + cfp->pc = cfp->iseq->iseq_encoded + cont_pc; + + /* push block frame */ + cfp->sp[0] = err; + vm_push_frame(th, catch_iseq, VM_FRAME_MAGIC_BLOCK, + cfp->self, (VALUE)cfp->dfp, catch_iseq->iseq_encoded, + cfp->sp + 1 /* push value */, cfp->lfp, catch_iseq->local_size - 1); + + state = 0; + th->errinfo = Qnil; + goto vm_loop_start; + } + else { + th->cfp++; + if (th->cfp->pc != &finish_insn_seq[0]) { + goto exception_handler; + } + else { + vm_pop_frame(th); + th->errinfo = err; + TH_POP_TAG2(); + JUMP_TAG(state); + } + } + } + finish_vme: + TH_POP_TAG(); + return result; +} + +/* misc */ + +VALUE +rb_iseq_eval(VALUE iseqval) +{ + rb_thread_t *th = GET_THREAD(); + VALUE val; + volatile VALUE tmp; + + vm_set_top_stack(th, iseqval); + + val = vm_exec(th); + tmp = iseqval; /* prohibit tail call optimization */ + return val; + } + +VALUE +rb_iseq_eval_main(VALUE iseqval) +{ + rb_thread_t *th = GET_THREAD(); + VALUE val; + volatile VALUE tmp; + + vm_set_main_stack(th, iseqval); + + val = vm_exec(th); + tmp = iseqval; /* prohibit tail call optimization */ + return val; +} + +int +rb_thread_method_id_and_class(rb_thread_t *th, + ID *idp, VALUE *klassp) +{ + rb_control_frame_t *cfp = th->cfp; + rb_iseq_t *iseq = cfp->iseq; + if (!iseq) { + if (idp) *idp = cfp->method_id; + if (klassp) *klassp = cfp->method_class; + return 1; + } + while (iseq) { + if (RUBY_VM_IFUNC_P(iseq)) { + if (idp) CONST_ID(*idp, ""); + if (klassp) *klassp = 0; + return 1; + } + if (iseq->defined_method_id) { + if (idp) *idp = iseq->defined_method_id; + if (klassp) *klassp = iseq->klass; + return 1; + } + if (iseq->local_iseq == iseq) { + break; + } + iseq = iseq->parent_iseq; + } + return 0; +} + +int +rb_frame_method_id_and_class(ID *idp, VALUE *klassp) +{ + return rb_thread_method_id_and_class(GET_THREAD(), idp, klassp); +} + +VALUE +rb_thread_current_status(const rb_thread_t *th) +{ + const rb_control_frame_t *cfp = th->cfp; + VALUE str = Qnil; + + if (cfp->iseq != 0) { + if (cfp->pc != 0) { + rb_iseq_t *iseq = cfp->iseq; + int line_no = rb_vm_get_sourceline(cfp); + char *file = RSTRING_PTR(iseq->filename); + str = rb_sprintf("%s:%d:in `%s'", + file, line_no, RSTRING_PTR(iseq->name)); + } + } + else if (cfp->method_id) { + str = rb_sprintf("`%s#%s' (cfunc)", + RSTRING_PTR(rb_class_name(cfp->method_class)), + rb_id2name(cfp->method_id)); + } + + return str; +} + +VALUE +rb_vm_call_cfunc(VALUE recv, VALUE (*func)(VALUE), VALUE arg, + const rb_block_t *blockptr, VALUE filename) +{ + rb_thread_t *th = GET_THREAD(); + const rb_control_frame_t *reg_cfp = th->cfp; + volatile VALUE iseqval = rb_iseq_new(0, filename, filename, 0, ISEQ_TYPE_TOP); + VALUE val; + + vm_push_frame(th, DATA_PTR(iseqval), VM_FRAME_MAGIC_TOP, + recv, (VALUE)blockptr, 0, reg_cfp->sp, 0, 1); + + val = (*func)(arg); + + vm_pop_frame(th); + return val; +} + +/* vm */ + +static void +vm_free(void *ptr) +{ + RUBY_FREE_ENTER("vm"); + if (ptr) { + rb_vm_t *vmobj = ptr; + + st_free_table(vmobj->living_threads); + vmobj->living_threads = 0; + /* TODO: MultiVM Instance */ + /* VM object should not be cleaned by GC */ + /* ruby_xfree(ptr); */ + /* ruby_current_vm = 0; */ + } + RUBY_FREE_LEAVE("vm"); +} + +static int +vm_mark_each_thread_func(st_data_t key, st_data_t value, st_data_t dummy) +{ + VALUE thval = (VALUE)key; + rb_gc_mark(thval); + return ST_CONTINUE; +} + +static void +mark_event_hooks(rb_event_hook_t *hook) +{ + while (hook) { + rb_gc_mark(hook->data); + hook = hook->next; + } +} + +void +rb_vm_mark(void *ptr) +{ + int i; + + RUBY_MARK_ENTER("vm"); + RUBY_GC_INFO("-------------------------------------------------\n"); + if (ptr) { + rb_vm_t *vm = ptr; + if (vm->living_threads) { + st_foreach(vm->living_threads, vm_mark_each_thread_func, 0); + } + RUBY_MARK_UNLESS_NULL(vm->thgroup_default); + RUBY_MARK_UNLESS_NULL(vm->mark_object_ary); + RUBY_MARK_UNLESS_NULL(vm->load_path); + RUBY_MARK_UNLESS_NULL(vm->loaded_features); + RUBY_MARK_UNLESS_NULL(vm->top_self); + RUBY_MARK_UNLESS_NULL(vm->coverages); + rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + ruby_special_error_count); + + if (vm->loading_table) { + rb_mark_tbl(vm->loading_table); + } + + mark_event_hooks(vm->event_hooks); + + for (i = 0; i < RUBY_NSIG; i++) { + if (vm->trap_list[i].cmd) + rb_gc_mark(vm->trap_list[i].cmd); + } + } + + RUBY_MARK_LEAVE("vm"); +} + +static void +vm_init2(rb_vm_t *vm) +{ + MEMZERO(vm, rb_vm_t, 1); + vm->src_encoding_index = -1; +} + +/* Thread */ + +#define USE_THREAD_DATA_RECYCLE 1 + +#if USE_THREAD_DATA_RECYCLE +#define RECYCLE_MAX 64 +static VALUE *thread_recycle_stack_slot[RECYCLE_MAX]; +static int thread_recycle_stack_count = 0; + +static VALUE * +thread_recycle_stack(size_t size) +{ + if (thread_recycle_stack_count) { + return thread_recycle_stack_slot[--thread_recycle_stack_count]; + } + else { + return ALLOC_N(VALUE, size); + } +} + +#else +#define thread_recycle_stack(size) ALLOC_N(VALUE, (size)) +#endif + +void +rb_thread_recycle_stack_release(VALUE *stack) +{ +#if USE_THREAD_DATA_RECYCLE + if (thread_recycle_stack_count < RECYCLE_MAX) { + thread_recycle_stack_slot[thread_recycle_stack_count++] = stack; + return; + } +#endif + ruby_xfree(stack); +} + +#ifdef USE_THREAD_RECYCLE +static rb_thread_t * +thread_recycle_struct(void) +{ + void *p = ALLOC_N(rb_thread_t, 1); + memset(p, 0, sizeof(rb_thread_t)); + return p; +} +#endif + +static void +thread_free(void *ptr) +{ + rb_thread_t *th; + RUBY_FREE_ENTER("thread"); + + if (ptr) { + th = ptr; + + if (!th->root_fiber) { + RUBY_FREE_UNLESS_NULL(th->stack); + } + + if (th->locking_mutex != Qfalse) { + rb_bug("thread_free: locking_mutex must be NULL (%p:%ld)", (void *)th, th->locking_mutex); + } + if (th->keeping_mutexes != NULL) { + rb_bug("thread_free: keeping_mutexes must be NULL (%p:%ld)", (void *)th, th->locking_mutex); + } + + if (th->local_storage) { + st_free_table(th->local_storage); + } + +#if USE_VALUE_CACHE + { + VALUE *ptr = th->value_cache_ptr; + while (*ptr) { + VALUE v = *ptr; + RBASIC(v)->flags = 0; + RBASIC(v)->klass = 0; + ptr++; + } + } +#endif + + if (th->vm && th->vm->main_thread == th) { + RUBY_GC_INFO("main thread\n"); + } + else { + ruby_xfree(ptr); + } + } + RUBY_FREE_LEAVE("thread"); +} + +void rb_gc_mark_machine_stack(rb_thread_t *th); + +void +rb_thread_mark(void *ptr) +{ + rb_thread_t *th = NULL; + RUBY_MARK_ENTER("thread"); + if (ptr) { + th = ptr; + if (th->stack) { + VALUE *p = th->stack; + VALUE *sp = th->cfp->sp; + rb_control_frame_t *cfp = th->cfp; + rb_control_frame_t *limit_cfp = (void *)(th->stack + th->stack_size); + + while (p < sp) { + rb_gc_mark(*p++); + } + rb_gc_mark_locations(p, p + th->mark_stack_len); + + while (cfp != limit_cfp) { + rb_gc_mark(cfp->proc); + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + } + + /* mark ruby objects */ + RUBY_MARK_UNLESS_NULL(th->first_proc); + if (th->first_proc) RUBY_MARK_UNLESS_NULL(th->first_args); + + RUBY_MARK_UNLESS_NULL(th->thgroup); + RUBY_MARK_UNLESS_NULL(th->value); + RUBY_MARK_UNLESS_NULL(th->errinfo); + RUBY_MARK_UNLESS_NULL(th->thrown_errinfo); + RUBY_MARK_UNLESS_NULL(th->local_svar); + RUBY_MARK_UNLESS_NULL(th->top_self); + RUBY_MARK_UNLESS_NULL(th->top_wrapper); + RUBY_MARK_UNLESS_NULL(th->fiber); + RUBY_MARK_UNLESS_NULL(th->root_fiber); + RUBY_MARK_UNLESS_NULL(th->stat_insn_usage); + RUBY_MARK_UNLESS_NULL(th->last_status); + + RUBY_MARK_UNLESS_NULL(th->locking_mutex); + + rb_mark_tbl(th->local_storage); + + if (GET_THREAD() != th && th->machine_stack_start && th->machine_stack_end) { + rb_gc_mark_machine_stack(th); + rb_gc_mark_locations((VALUE *)&th->machine_regs, + (VALUE *)(&th->machine_regs) + + sizeof(th->machine_regs) / sizeof(VALUE)); + } + + mark_event_hooks(th->event_hooks); + } + + RUBY_MARK_LEAVE("thread"); +} + +static VALUE +thread_alloc(VALUE klass) +{ + VALUE volatile obj; +#ifdef USE_THREAD_RECYCLE + rb_thread_t *th = thread_recycle_struct(); + obj = Data_Wrap_Struct(klass, rb_thread_mark, thread_free, th); +#else + rb_thread_t *th; + obj = Data_Make_Struct(klass, rb_thread_t, rb_thread_mark, thread_free, th); +#endif + return obj; +} + +static void +th_init2(rb_thread_t *th, VALUE self) +{ + th->self = self; + + /* allocate thread stack */ + th->stack_size = RUBY_VM_THREAD_STACK_SIZE; + th->stack = thread_recycle_stack(th->stack_size); + + th->cfp = (void *)(th->stack + th->stack_size); + + vm_push_frame(th, 0, VM_FRAME_MAGIC_TOP, Qnil, 0, 0, + th->stack, 0, 1); + + th->status = THREAD_RUNNABLE; + th->errinfo = Qnil; + th->last_status = Qnil; + +#if USE_VALUE_CACHE + th->value_cache_ptr = &th->value_cache[0]; +#endif +} + +static void +th_init(rb_thread_t *th, VALUE self) +{ + th_init2(th, self); +} + +static VALUE +ruby_thread_init(VALUE self) +{ + rb_thread_t *th; + rb_vm_t *vm = GET_THREAD()->vm; + GetThreadPtr(self, th); + + th_init(th, self); + th->vm = vm; + + th->top_wrapper = 0; + th->top_self = rb_vm_top_self(); + return self; +} + +VALUE +rb_thread_alloc(VALUE klass) +{ + VALUE self = thread_alloc(klass); + ruby_thread_init(self); + return self; +} + +static void +vm_define_method(rb_thread_t *th, VALUE obj, ID id, VALUE iseqval, + rb_num_t is_singleton, NODE *cref) +{ + NODE *newbody; + VALUE klass = cref->nd_clss; + int noex = (int)cref->nd_visi; + rb_iseq_t *miseq; + GetISeqPtr(iseqval, miseq); + + if (NIL_P(klass)) { + rb_raise(rb_eTypeError, "no class/module to add method"); + } + + if (is_singleton) { + if (FIXNUM_P(obj) || SYMBOL_P(obj)) { + rb_raise(rb_eTypeError, + "can't define singleton method \"%s\" for %s", + rb_id2name(id), rb_obj_classname(obj)); + } + + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + + klass = rb_singleton_class(obj); + noex = NOEX_PUBLIC; + } + + /* dup */ + COPY_CREF(miseq->cref_stack, cref); + miseq->klass = klass; + miseq->defined_method_id = id; + newbody = NEW_NODE(RUBY_VM_METHOD_NODE, 0, miseq->self, 0); + rb_add_method(klass, id, newbody, noex); + + if (!is_singleton && noex == NOEX_MODFUNC) { + rb_add_method(rb_singleton_class(klass), id, newbody, NOEX_PUBLIC); + } + INC_VM_STATE_VERSION(); +} + +#define REWIND_CFP(expr) do { \ + rb_thread_t *th__ = GET_THREAD(); \ + th__->cfp++; expr; th__->cfp--; \ +} while (0) + +static VALUE +m_core_define_method(VALUE self, VALUE cbase, VALUE sym, VALUE iseqval) +{ + REWIND_CFP({ + vm_define_method(GET_THREAD(), cbase, SYM2ID(sym), iseqval, 0, rb_vm_cref()); + }); + return Qnil; +} + +static VALUE +m_core_define_singleton_method(VALUE self, VALUE cbase, VALUE sym, VALUE iseqval) +{ + REWIND_CFP({ + vm_define_method(GET_THREAD(), cbase, SYM2ID(sym), iseqval, 1, rb_vm_cref()); + }); + return Qnil; +} + +static VALUE +m_core_set_method_alias(VALUE self, VALUE cbase, VALUE sym1, VALUE sym2) +{ + REWIND_CFP({ + rb_alias(cbase, SYM2ID(sym1), SYM2ID(sym2)); + }); + return Qnil; +} + +static VALUE +m_core_set_variable_alias(VALUE self, VALUE sym1, VALUE sym2) +{ + REWIND_CFP({ + rb_alias_variable(SYM2ID(sym1), SYM2ID(sym2)); + }); + return Qnil; +} + +static VALUE +m_core_undef_method(VALUE self, VALUE cbase, VALUE sym) +{ + REWIND_CFP({ + rb_undef(cbase, SYM2ID(sym)); + INC_VM_STATE_VERSION(); + }); + return Qnil; +} + +static VALUE +m_core_set_postexe(VALUE self, VALUE iseqval) +{ + REWIND_CFP({ + rb_iseq_t *blockiseq; + rb_block_t *blockptr; + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + VALUE proc; + extern void rb_call_end_proc(VALUE data); + + GetISeqPtr(iseqval, blockiseq); + + blockptr = RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp); + blockptr->iseq = blockiseq; + blockptr->proc = 0; + + proc = rb_vm_make_proc(th, blockptr, rb_cProc); + rb_set_end_proc(rb_call_end_proc, proc); + }); + return Qnil; +} + +extern VALUE *rb_gc_stack_start; +extern size_t rb_gc_stack_maxsize; +#ifdef __ia64 +extern VALUE *rb_gc_register_stack_start; +#endif + +/* debug functions */ + +static VALUE +sdr(void) +{ + rb_vm_bugreport(); + return Qnil; +} + +static VALUE +nsdr(void) +{ + VALUE ary = rb_ary_new(); +#if HAVE_BACKTRACE +#include +#define MAX_NATIVE_TRACE 1024 + static void *trace[MAX_NATIVE_TRACE]; + int n = backtrace(trace, MAX_NATIVE_TRACE); + char **syms = backtrace_symbols(trace, n); + int i; + + if (syms == 0) { + rb_memerror(); + } + + for (i=0; iflags = T_ICLASS; + klass = rb_singleton_class(fcore); + rb_define_method_id(klass, id_core_set_method_alias, m_core_set_method_alias, 3); + rb_define_method_id(klass, id_core_set_variable_alias, m_core_set_variable_alias, 2); + rb_define_method_id(klass, id_core_undef_method, m_core_undef_method, 2); + rb_define_method_id(klass, id_core_define_method, m_core_define_method, 3); + rb_define_method_id(klass, id_core_define_singleton_method, m_core_define_singleton_method, 3); + rb_define_method_id(klass, id_core_set_postexe, m_core_set_postexe, 1); + rb_obj_freeze(fcore); + rb_gc_register_mark_object(fcore); + rb_mRubyVMFrozenCore = fcore; + + /* ::VM::Env */ + rb_cEnv = rb_define_class_under(rb_cRubyVM, "Env", rb_cObject); + rb_undef_alloc_func(rb_cEnv); + + /* ::Thread */ + rb_cThread = rb_define_class("Thread", rb_cObject); + rb_undef_alloc_func(rb_cThread); + + /* ::VM::USAGE_ANALYSIS_* */ + rb_define_const(rb_cRubyVM, "USAGE_ANALYSIS_INSN", rb_hash_new()); + rb_define_const(rb_cRubyVM, "USAGE_ANALYSIS_REGS", rb_hash_new()); + rb_define_const(rb_cRubyVM, "USAGE_ANALYSIS_INSN_BIGRAM", rb_hash_new()); + rb_define_const(rb_cRubyVM, "OPTS", opts = rb_ary_new()); + +#if OPT_DIRECT_THREADED_CODE + rb_ary_push(opts, rb_str_new2("direct threaded code")); +#elif OPT_TOKEN_THREADED_CODE + rb_ary_push(opts, rb_str_new2("token threaded code")); +#elif OPT_CALL_THREADED_CODE + rb_ary_push(opts, rb_str_new2("call threaded code")); +#endif + +#if OPT_BASIC_OPERATIONS + rb_ary_push(opts, rb_str_new2("optimize basic operation")); +#endif + +#if OPT_STACK_CACHING + rb_ary_push(opts, rb_str_new2("stack caching")); +#endif +#if OPT_OPERANDS_UNIFICATION + rb_ary_push(opts, rb_str_new2("operands unification]")); +#endif +#if OPT_INSTRUCTIONS_UNIFICATION + rb_ary_push(opts, rb_str_new2("instructions unification")); +#endif +#if OPT_INLINE_METHOD_CACHE + rb_ary_push(opts, rb_str_new2("inline method cache")); +#endif +#if OPT_BLOCKINLINING + rb_ary_push(opts, rb_str_new2("block inlining")); +#endif + + /* ::VM::InsnNameArray */ + rb_define_const(rb_cRubyVM, "INSTRUCTION_NAMES", rb_insns_name_array()); + + /* debug functions ::VM::SDR(), ::VM::NSDR() */ +#if VMDEBUG + rb_define_singleton_method(rb_cRubyVM, "SDR", sdr, 0); + rb_define_singleton_method(rb_cRubyVM, "NSDR", nsdr, 0); +#else + (void)sdr; + (void)nsdr; +#endif + + /* VM bootstrap: phase 2 */ + { + rb_vm_t *vm = ruby_current_vm; + rb_thread_t *th = GET_THREAD(); + VALUE filename = rb_str_new2("
"); + volatile VALUE iseqval = rb_iseq_new(0, filename, filename, 0, ISEQ_TYPE_TOP); + volatile VALUE th_self; + rb_iseq_t *iseq; + + /* create vm object */ + vm->self = Data_Wrap_Struct(rb_cRubyVM, rb_vm_mark, vm_free, vm); + + /* create main thread */ + th_self = th->self = Data_Wrap_Struct(rb_cThread, rb_thread_mark, thread_free, th); + vm->main_thread = th; + vm->running_thread = th; + th->vm = vm; + th->top_wrapper = 0; + th->top_self = rb_vm_top_self(); + rb_thread_set_current(th); + + vm->living_threads = st_init_numtable(); + st_insert(vm->living_threads, th_self, (st_data_t) th->thread_id); + + rb_gc_register_mark_object(iseqval); + GetISeqPtr(iseqval, iseq); + th->cfp->iseq = iseq; + th->cfp->pc = iseq->iseq_encoded; + th->cfp->self = th->top_self; + + rb_define_global_const("TOPLEVEL_BINDING", rb_binding_new()); + } + vm_init_redefined_flag(); +} + +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE +struct rb_objspace *rb_objspace_alloc(void); +#endif +void ruby_thread_init_stack(rb_thread_t *th); + +void +Init_BareVM(void) +{ + /* VM bootstrap: phase 1 */ + rb_vm_t * vm = malloc(sizeof(*vm)); + rb_thread_t * th = malloc(sizeof(*th)); + if (!vm || !th) { + fprintf(stderr, "[FATAL] failed to allocate memory\n"); + exit(EXIT_FAILURE); + } + MEMZERO(th, rb_thread_t, 1); + + rb_thread_set_current_raw(th); + + vm_init2(vm); +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE + vm->objspace = rb_objspace_alloc(); +#endif + ruby_current_vm = vm; + + th_init2(th, 0); + th->vm = vm; + ruby_thread_init_stack(th); +} + +/* top self */ + +static VALUE +main_to_s(VALUE obj) +{ + return rb_str_new2("main"); +} + +VALUE +rb_vm_top_self(void) +{ + return GET_VM()->top_self; +} + +void +Init_top_self(void) +{ + rb_vm_t *vm = GET_VM(); + + vm->top_self = rb_obj_alloc(rb_cObject); + rb_define_singleton_method(rb_vm_top_self(), "to_s", main_to_s, 0); + + /* initialize mark object array */ + vm->mark_object_ary = rb_ary_tmp_new(1); +} + +VALUE * +ruby_vm_verbose_ptr(rb_vm_t *vm) +{ + return &vm->verbose; +} + +VALUE * +ruby_vm_debug_ptr(rb_vm_t *vm) +{ + return &vm->debug; +} + +VALUE * +rb_ruby_verbose_ptr(void) +{ + return ruby_vm_verbose_ptr(GET_VM()); +} + +VALUE * +rb_ruby_debug_ptr(void) +{ + return ruby_vm_debug_ptr(GET_VM()); +} diff --git a/vm_core.h b/vm_core.h new file mode 100644 index 0000000..e42decb --- /dev/null +++ b/vm_core.h @@ -0,0 +1,647 @@ +/********************************************************************** + + vm_core.h - + + $Author: yugui $ + created at: 04/01/01 19:41:38 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_VM_CORE_H +#define RUBY_VM_CORE_H + +#define RUBY_VM_THREAD_MODEL 2 + +#include "ruby/ruby.h" +#include "ruby/st.h" + +#include "node.h" +#include "debug.h" +#include "vm_opts.h" +#include "id.h" + +#if defined(_WIN32) +#include "thread_win32.h" +#elif defined(HAVE_PTHREAD_H) +#include "thread_pthread.h" +#else +#error "unsupported thread type" +#endif + +#include +#include + +#ifndef NSIG +# define NSIG (_SIGMAX + 1) /* For QNX */ +#endif + +#define RUBY_NSIG NSIG + +#ifdef HAVE_STDARG_PROTOTYPES +#include +#define va_init_list(a,b) va_start(a,b) +#else +#include +#define va_init_list(a,b) va_start(a) +#endif + +/*****************/ +/* configuration */ +/*****************/ + +/* gcc ver. check */ +#if defined(__GNUC__) && __GNUC__ >= 2 + +#if OPT_TOKEN_THREADED_CODE +#if OPT_DIRECT_THREADED_CODE +#undef OPT_DIRECT_THREADED_CODE +#endif +#endif + +#else /* defined(__GNUC__) && __GNUC__ >= 2 */ + +/* disable threaded code options */ +#if OPT_DIRECT_THREADED_CODE +#undef OPT_DIRECT_THREADED_CODE +#endif +#if OPT_TOKEN_THREADED_CODE +#undef OPT_TOKEN_THREADED_CODE +#endif +#endif + +/* call threaded code */ +#if OPT_CALL_THREADED_CODE +#if OPT_DIRECT_THREADED_CODE +#undef OPT_DIRECT_THREADED_CODE +#endif /* OPT_DIRECT_THREADED_CODE */ +#if OPT_STACK_CACHING +#undef OPT_STACK_CACHING +#endif /* OPT_STACK_CACHING */ +#endif /* OPT_CALL_THREADED_CODE */ + +/* likely */ +#if __GNUC__ >= 3 +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else /* __GNUC__ >= 3 */ +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif /* __GNUC__ >= 3 */ + +typedef unsigned long rb_num_t; + +struct iseq_compile_data_ensure_node_stack; + +typedef struct rb_compile_option_struct { + int inline_const_cache; + int peephole_optimization; + int tailcall_optimization; + int specialized_instruction; + int operands_unification; + int instructions_unification; + int stack_caching; + int trace_instruction; + int debug_level; +} rb_compile_option_t; + +#if 1 +#define GetCoreDataFromValue(obj, type, ptr) do { \ + ptr = (type*)DATA_PTR(obj); \ +} while (0) +#else +#define GetCoreDataFromValue(obj, type, ptr) Data_Get_Struct(obj, type, ptr) +#endif + +#define GetISeqPtr(obj, ptr) \ + GetCoreDataFromValue(obj, rb_iseq_t, ptr) + +struct rb_iseq_struct; + +struct rb_iseq_struct { + /***************/ + /* static data */ + /***************/ + + VALUE type; /* instruction sequence type */ + VALUE name; /* String: iseq name */ + VALUE filename; /* file information where this sequence from */ + VALUE *iseq; /* iseq (insn number and openrads) */ + VALUE *iseq_encoded; /* encoded iseq */ + unsigned long iseq_size; + VALUE mark_ary; /* Array: includes operands which should be GC marked */ + VALUE coverage; /* coverage array */ + unsigned short line_no; + + /* insn info, must be freed */ + struct iseq_insn_info_entry *insn_info_table; + unsigned long insn_info_size; + + ID *local_table; /* must free */ + int local_table_size; + + /* method, class frame: sizeof(vars) + 1, block frame: sizeof(vars) */ + int local_size; + + /** + * argument information + * + * def m(a1, a2, ..., aM, # mandatory + * b1=(...), b2=(...), ..., bN=(...), # optinal + * *c, # rest + * d1, d2, ..., dO, # post + * &e) # block + * => + * + * argc = M + * arg_rest = M+N+1 // or -1 if no rest arg + * arg_opts = N + * arg_opts_tbl = [ (N entries) ] + * arg_post_len = O // 0 if no post arguments + * arg_post_start = M+N+2 + * arg_block = M+N + 1 + O + 1 // -1 if no block arg + * arg_simple = 0 if not simple arguments. + * = 1 if no opt, rest, post, block. + * = 2 if ambiguos block parameter ({|a|}). + * arg_size = argument size. + */ + + int argc; + int arg_simple; + int arg_rest; + int arg_block; + int arg_opts; + int arg_post_len; + int arg_post_start; + int arg_size; + VALUE *arg_opt_table; + + int stack_max; /* for stack overflow check */ + + /* catch table */ + struct iseq_catch_table_entry *catch_table; + int catch_table_size; + + /* for child iseq */ + struct rb_iseq_struct *parent_iseq; + struct rb_iseq_struct *local_iseq; + + /****************/ + /* dynamic data */ + /****************/ + + VALUE self; + VALUE orig; /* non-NULL if its data have origin */ + + /* block inlining */ + /* + * NODE *node; + * void *special_block_builder; + * void *cached_special_block_builder; + * VALUE cached_special_block; + */ + + /* klass/module nest information stack (cref) */ + NODE *cref_stack; + VALUE klass; + + /* misc */ + ID defined_method_id; /* for define_method */ + + /* used at compile time */ + struct iseq_compile_data *compile_data; +}; + +enum ruby_special_exceptions { + ruby_error_reenter, + ruby_error_nomemory, + ruby_error_sysstack, + ruby_special_error_count +}; + +typedef struct rb_iseq_struct rb_iseq_t; + +#define GetVMPtr(obj, ptr) \ + GetCoreDataFromValue(obj, rb_vm_t, ptr) + +typedef struct rb_vm_struct { + VALUE self; + + rb_thread_lock_t global_vm_lock; + + struct rb_thread_struct *main_thread; + struct rb_thread_struct *running_thread; + + st_table *living_threads; + VALUE thgroup_default; + + int running; + int thread_abort_on_exception; + unsigned long trace_flag; + volatile int sleeper; + + /* object management */ + VALUE mark_object_ary; + + VALUE special_exceptions[ruby_special_error_count]; + + /* load */ + VALUE top_self; + VALUE load_path; + VALUE loaded_features; + struct st_table *loading_table; + + /* signal */ + struct { + VALUE cmd; + int safe; + } trap_list[RUBY_NSIG]; + + /* hook */ + rb_event_hook_t *event_hooks; + + int src_encoding_index; + + VALUE verbose, debug, progname; + VALUE coverages; + +#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE + struct rb_objspace *objspace; +#endif +} rb_vm_t; + +typedef struct { + VALUE *pc; /* cfp[0] */ + VALUE *sp; /* cfp[1] */ + VALUE *bp; /* cfp[2] */ + rb_iseq_t *iseq; /* cfp[3] */ + VALUE flag; /* cfp[4] */ + VALUE self; /* cfp[5] / block[0] */ + VALUE *lfp; /* cfp[6] / block[1] */ + VALUE *dfp; /* cfp[7] / block[2] */ + rb_iseq_t *block_iseq; /* cfp[8] / block[3] */ + VALUE proc; /* cfp[9] / block[4] */ + ID method_id; /* cfp[10] saved in special case */ + VALUE method_class; /* cfp[11] saved in special case */ +} rb_control_frame_t; + +typedef struct rb_block_struct { + VALUE self; /* share with method frame if it's only block */ + VALUE *lfp; /* share with method frame if it's only block */ + VALUE *dfp; /* share with method frame if it's only block */ + rb_iseq_t *iseq; + VALUE proc; +} rb_block_t; + +#define GetThreadPtr(obj, ptr) \ + GetCoreDataFromValue(obj, rb_thread_t, ptr) + +enum rb_thread_status { + THREAD_TO_KILL, + THREAD_RUNNABLE, + THREAD_STOPPED, + THREAD_STOPPED_FOREVER, + THREAD_KILLED +}; + +typedef RUBY_JMP_BUF rb_jmpbuf_t; + +struct rb_vm_tag { + rb_jmpbuf_t buf; + VALUE tag; + VALUE retval; + struct rb_vm_tag *prev; +}; + +struct rb_vm_trap_tag { + struct rb_vm_trap_tag *prev; +}; + +#define RUBY_VM_VALUE_CACHE_SIZE 0x1000 +#define USE_VALUE_CACHE 0 + +struct rb_unblock_callback { + rb_unblock_function_t *func; + void *arg; +}; + +struct rb_mutex_struct; + +typedef struct rb_thread_struct +{ + VALUE self; + rb_vm_t *vm; + + /* execution information */ + VALUE *stack; /* must free, must mark */ + unsigned long stack_size; + rb_control_frame_t *cfp; + int safe_level; + int raised_flag; + VALUE last_status; /* $? */ + + /* passing state */ + int state; + + /* for rb_iterate */ + rb_block_t *passed_block; + + /* for load(true) */ + VALUE top_self; + VALUE top_wrapper; + + /* eval env */ + rb_block_t *base_block; + + VALUE *local_lfp; + VALUE local_svar; + + /* thread control */ + rb_thread_id_t thread_id; + enum rb_thread_status status; + int priority; + int slice; + + native_thread_data_t native_thread_data; + void *blocking_region_buffer; + + VALUE thgroup; + VALUE value; + + VALUE errinfo; + VALUE thrown_errinfo; + int exec_signal; + + int interrupt_flag; + rb_thread_lock_t interrupt_lock; + struct rb_unblock_callback unblock; + VALUE locking_mutex; + struct rb_mutex_struct *keeping_mutexes; + int transition_for_lock; + + struct rb_vm_tag *tag; + struct rb_vm_trap_tag *trap_tag; + + int parse_in_eval; + int mild_compile_error; + + /* storage */ + st_table *local_storage; +#if USE_VALUE_CACHE + VALUE value_cache[RUBY_VM_VALUE_CACHE_SIZE + 1]; + VALUE *value_cache_ptr; +#endif + + struct rb_thread_struct *join_list_next; + struct rb_thread_struct *join_list_head; + + VALUE first_proc; + VALUE first_args; + VALUE (*first_func)(ANYARGS); + + /* for GC */ + VALUE *machine_stack_start; + VALUE *machine_stack_end; + size_t machine_stack_maxsize; +#ifdef __ia64 + VALUE *machine_register_stack_start; + VALUE *machine_register_stack_end; + size_t machine_register_stack_maxsize; +#endif + jmp_buf machine_regs; + int mark_stack_len; + + /* statistics data for profiler */ + VALUE stat_insn_usage; + + /* tracer */ + rb_event_hook_t *event_hooks; + rb_event_flag_t event_flags; + int tracing; + + /* fiber */ + VALUE fiber; + VALUE root_fiber; + rb_jmpbuf_t root_jmpbuf; + + /* misc */ + int method_missing_reason; + int abort_on_exception; +} rb_thread_t; + +/* iseq.c */ +VALUE rb_iseq_new(NODE*, VALUE, VALUE, VALUE, VALUE); +VALUE rb_iseq_new_top(NODE *node, VALUE name, VALUE filename, VALUE parent); +VALUE rb_iseq_new_main(NODE *node, VALUE filename); +VALUE rb_iseq_new_with_bopt(NODE*, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE); +VALUE rb_iseq_new_with_opt(NODE*, VALUE, VALUE, VALUE, VALUE, VALUE, const rb_compile_option_t*); +VALUE rb_iseq_compile(VALUE src, VALUE file, VALUE line); +VALUE rb_iseq_disasm(VALUE self); +VALUE rb_iseq_disasm_insn(VALUE str, VALUE *iseqval, int pos, rb_iseq_t *iseq, VALUE child); +const char *ruby_node_name(int node); +int rb_iseq_first_lineno(rb_iseq_t *iseq); + +RUBY_EXTERN VALUE rb_cISeq; +RUBY_EXTERN VALUE rb_cRubyVM; +RUBY_EXTERN VALUE rb_cEnv; +RUBY_EXTERN VALUE rb_mRubyVMFrozenCore; + +/* each thread has this size stack : 128KB */ +#define RUBY_VM_THREAD_STACK_SIZE (128 * 1024) + +struct global_entry { + struct global_variable *var; + ID id; +}; + +#define GetProcPtr(obj, ptr) \ + GetCoreDataFromValue(obj, rb_proc_t, ptr) + +typedef struct { + rb_block_t block; + + VALUE envval; /* for GC mark */ + VALUE blockprocval; + int safe_level; + int is_from_method; + int is_lambda; +} rb_proc_t; + +#define GetEnvPtr(obj, ptr) \ + GetCoreDataFromValue(obj, rb_env_t, ptr) + +typedef struct { + VALUE *env; + int env_size; + int local_size; + VALUE prev_envval; /* for GC mark */ + rb_block_t block; +} rb_env_t; + +#define GetBindingPtr(obj, ptr) \ + GetCoreDataFromValue(obj, rb_binding_t, ptr) + +typedef struct { + VALUE env; +} rb_binding_t; + + +/* used by compile time and send insn */ +#define VM_CALL_ARGS_SPLAT_BIT (0x01 << 1) +#define VM_CALL_ARGS_BLOCKARG_BIT (0x01 << 2) +#define VM_CALL_FCALL_BIT (0x01 << 3) +#define VM_CALL_VCALL_BIT (0x01 << 4) +#define VM_CALL_TAILCALL_BIT (0x01 << 5) +#define VM_CALL_TAILRECURSION_BIT (0x01 << 6) +#define VM_CALL_SUPER_BIT (0x01 << 7) +#define VM_CALL_SEND_BIT (0x01 << 8) + +#define VM_SPECIAL_OBJECT_VMCORE 0x01 +#define VM_SPECIAL_OBJECT_CBASE 0x02 + +#define VM_FRAME_MAGIC_METHOD 0x11 +#define VM_FRAME_MAGIC_BLOCK 0x21 +#define VM_FRAME_MAGIC_CLASS 0x31 +#define VM_FRAME_MAGIC_TOP 0x41 +#define VM_FRAME_MAGIC_FINISH 0x51 +#define VM_FRAME_MAGIC_CFUNC 0x61 +#define VM_FRAME_MAGIC_PROC 0x71 +#define VM_FRAME_MAGIC_IFUNC 0x81 +#define VM_FRAME_MAGIC_EVAL 0x91 +#define VM_FRAME_MAGIC_LAMBDA 0xa1 +#define VM_FRAME_MAGIC_MASK_BITS 8 +#define VM_FRAME_MAGIC_MASK (~(~0<flag & VM_FRAME_MAGIC_MASK) + +/* other frame flag */ +#define VM_FRAME_FLAG_PASSED 0x0100 + + +#define RUBYVM_CFUNC_FRAME_P(cfp) \ + (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_CFUNC) + + +/* inline (method|const) cache */ +#define NEW_INLINE_CACHE_ENTRY() NEW_WHILE(Qundef, 0, 0) +#define ic_class u1.value +#define ic_method u2.node +#define ic_value u2.value +#define ic_vmstat u3.cnt +typedef NODE *IC; + +void rb_vm_change_state(void); + +typedef VALUE CDHASH; + +#ifndef FUNC_FASTCALL +#define FUNC_FASTCALL(x) x +#endif + +typedef rb_control_frame_t * + (FUNC_FASTCALL(*rb_insn_func_t))(rb_thread_t *, rb_control_frame_t *); + +#define GC_GUARDED_PTR(p) ((VALUE)((VALUE)(p) | 0x01)) +#define GC_GUARDED_PTR_REF(p) ((void *)(((VALUE)p) & ~0x03)) +#define GC_GUARDED_PTR_P(p) (((VALUE)p) & 0x01) + +#define RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp) (cfp+1) +#define RUBY_VM_NEXT_CONTROL_FRAME(cfp) (cfp-1) +#define RUBY_VM_END_CONTROL_FRAME(th) \ + ((rb_control_frame_t *)((th)->stack + (th)->stack_size)) +#define RUBY_VM_VALID_CONTROL_FRAME_P(cfp, ecfp) \ + ((void *)(ecfp) > (void *)(cfp)) +#define RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp) \ + (!RUBY_VM_VALID_CONTROL_FRAME_P((cfp), RUBY_VM_END_CONTROL_FRAME(th))) + +#define RUBY_VM_IFUNC_P(ptr) (BUILTIN_TYPE(ptr) == T_NODE) +#define RUBY_VM_NORMAL_ISEQ_P(ptr) \ + (ptr && !RUBY_VM_IFUNC_P(ptr)) + +#define RUBY_VM_CLASS_SPECIAL_P(ptr) (((VALUE)(ptr)) & 0x02) + +#define RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp) ((rb_block_t *)(&(cfp)->self)) +#define RUBY_VM_GET_CFP_FROM_BLOCK_PTR(b) \ + ((rb_control_frame_t *)((VALUE *)(b) - 5)) + +/* VM related object allocate functions */ +VALUE rb_thread_alloc(VALUE klass); +VALUE rb_proc_alloc(VALUE klass); + +/* for debug */ +extern void rb_vmdebug_stack_dump_raw(rb_thread_t *, rb_control_frame_t *); +#define SDR() rb_vmdebug_stack_dump_raw(GET_THREAD(), GET_THREAD()->cfp) +#define SDR2(cfp) rb_vmdebug_stack_dump_raw(GET_THREAD(), (cfp)) +void rb_vm_bugreport(void); + + +/* functions about thread/vm execution */ + +VALUE rb_iseq_eval(VALUE iseqval); +VALUE rb_iseq_eval_main(VALUE iseqval); +void rb_enable_interrupt(void); +void rb_disable_interrupt(void); +int rb_thread_method_id_and_class(rb_thread_t *th, ID *idp, VALUE *klassp); + +VALUE rb_vm_invoke_proc(rb_thread_t *th, rb_proc_t *proc, VALUE self, + int argc, const VALUE *argv, rb_block_t *blockptr); +VALUE rb_vm_make_proc(rb_thread_t *th, const rb_block_t *block, VALUE klass); +VALUE rb_vm_make_env_object(rb_thread_t *th, rb_control_frame_t *cfp); + +void *rb_thread_call_with_gvl(void *(*func)(void *), void *data1); +int ruby_thread_has_gvl_p(void); +rb_control_frame_t *rb_vm_get_ruby_level_next_cfp(rb_thread_t *th, rb_control_frame_t *cfp); + +NOINLINE(void rb_gc_save_machine_context(rb_thread_t *)); + +#define sysstack_error GET_VM()->special_exceptions[ruby_error_sysstack] + +/* for thread */ + +#if RUBY_VM_THREAD_MODEL == 2 +RUBY_EXTERN rb_thread_t *ruby_current_thread; +extern rb_vm_t *ruby_current_vm; + +#define GET_VM() ruby_current_vm +#define GET_THREAD() ruby_current_thread +#define rb_thread_set_current_raw(th) (void)(ruby_current_thread = (th)) +#define rb_thread_set_current(th) do { \ + rb_thread_set_current_raw(th); \ + th->vm->running_thread = th; \ +} while (0) + +#else +#error "unsupported thread model" +#endif + +#define RUBY_VM_SET_INTERRUPT(th) ((th)->interrupt_flag |= 0x02) +#define RUBY_VM_SET_TIMER_INTERRUPT(th) ((th)->interrupt_flag |= 0x01) +#define RUBY_VM_SET_FINALIZER_INTERRUPT(th) ((th)->interrupt_flag |= 0x04) +#define RUBY_VM_INTERRUPTED(th) ((th)->interrupt_flag & 0x02) + +void rb_thread_execute_interrupts(rb_thread_t *); + +#define RUBY_VM_CHECK_INTS_TH(th) do { \ + if (UNLIKELY(th->interrupt_flag)) { \ + rb_thread_execute_interrupts(th); \ + } \ +} while (0) + +#define RUBY_VM_CHECK_INTS() \ + RUBY_VM_CHECK_INTS_TH(GET_THREAD()) + +/* tracer */ +void +rb_threadptr_exec_event_hooks(rb_thread_t *th, rb_event_flag_t flag, VALUE self, ID id, VALUE klass); + +#define EXEC_EVENT_HOOK(th, flag, self, id, klass) do { \ + rb_event_flag_t wait_event__ = th->event_flags; \ + if (UNLIKELY(wait_event__)) { \ + if (wait_event__ & (flag | RUBY_EVENT_VM)) { \ + rb_threadptr_exec_event_hooks(th, flag, self, id, klass); \ + } \ + } \ +} while (0) + +#endif /* RUBY_VM_CORE_H */ diff --git a/vm_dump.c b/vm_dump.c new file mode 100644 index 0000000..b644e76 --- /dev/null +++ b/vm_dump.c @@ -0,0 +1,613 @@ +/********************************************************************** + + vm_dump.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + + +#include "ruby/ruby.h" +#include "vm_core.h" + +#define MAX_POSBUF 128 + +#define VM_CFP_CNT(th, cfp) \ + ((rb_control_frame_t *)(th->stack + th->stack_size) - (rb_control_frame_t *)(cfp)) + +static void +control_frame_dump(rb_thread_t *th, rb_control_frame_t *cfp) +{ + int pc = -1, bp = -1, line = 0; + ptrdiff_t lfp = cfp->lfp - th->stack; + ptrdiff_t dfp = cfp->dfp - th->stack; + char lfp_in_heap = ' ', dfp_in_heap = ' '; + char posbuf[MAX_POSBUF+1]; + int nopos = 0; + + const char *magic, *iseq_name = "-", *selfstr = "-", *biseq_name = "-"; + VALUE tmp; + + if (cfp->block_iseq != 0 && BUILTIN_TYPE(cfp->block_iseq) != T_NODE) { + biseq_name = ""; /* RSTRING(cfp->block_iseq->name)->ptr; */ + } + + if (lfp < 0 || lfp > th->stack_size) { + lfp = (ptrdiff_t)cfp->lfp; + lfp_in_heap = 'p'; + } + if (dfp < 0 || dfp > th->stack_size) { + dfp = (ptrdiff_t)cfp->dfp; + dfp_in_heap = 'p'; + } + if (cfp->bp) { + bp = cfp->bp - th->stack; + } + + switch (VM_FRAME_TYPE(cfp)) { + case VM_FRAME_MAGIC_TOP: + magic = "TOP"; + break; + case VM_FRAME_MAGIC_METHOD: + magic = "METHOD"; + break; + case VM_FRAME_MAGIC_CLASS: + magic = "CLASS"; + break; + case VM_FRAME_MAGIC_BLOCK: + magic = "BLOCK"; + break; + case VM_FRAME_MAGIC_FINISH: + magic = "FINISH"; + nopos = 1; + break; + case VM_FRAME_MAGIC_CFUNC: + magic = "CFUNC"; + break; + case VM_FRAME_MAGIC_PROC: + magic = "PROC"; + break; + case VM_FRAME_MAGIC_LAMBDA: + magic = "LAMBDA"; + break; + case VM_FRAME_MAGIC_IFUNC: + magic = "IFUNC"; + break; + case VM_FRAME_MAGIC_EVAL: + magic = "EVAL"; + break; + case 0: + magic = "------"; + break; + default: + magic = "(none)"; + break; + } + + if (0) { + tmp = rb_inspect(cfp->self); + selfstr = StringValueCStr(tmp); + } + else { + selfstr = ""; + } + + if (nopos) { + /* no name */ + } + else if (cfp->iseq != 0) { + if (RUBY_VM_IFUNC_P(cfp->iseq)) { + iseq_name = ""; + } + else { + int rb_vm_get_sourceline(rb_control_frame_t *); + + pc = cfp->pc - cfp->iseq->iseq_encoded; + iseq_name = RSTRING_PTR(cfp->iseq->name); + line = rb_vm_get_sourceline(cfp); + if (line) { + snprintf(posbuf, MAX_POSBUF, "%s:%d", RSTRING_PTR(cfp->iseq->filename), line); + } + } + } + else if (cfp->method_id) { + iseq_name = rb_id2name(cfp->method_id); + snprintf(posbuf, MAX_POSBUF, ":%s", rb_id2name(cfp->method_id)); + line = -1; + } + + fprintf(stderr, "c:%04"PRIdPTRDIFF" ", + ((rb_control_frame_t *)(th->stack + th->stack_size) - cfp)); + if (pc == -1) { + fprintf(stderr, "p:---- "); + } + else { + fprintf(stderr, "p:%04d ", pc); + } + fprintf(stderr, "s:%04"PRIdPTRDIFF" b:%04d ", (cfp->sp - th->stack), bp); + fprintf(stderr, lfp_in_heap == ' ' ? "l:%06"PRIdPTRDIFF" " : "l:%06"PRIxPTRDIFF" ", lfp % 10000); + fprintf(stderr, dfp_in_heap == ' ' ? "d:%06"PRIdPTRDIFF" " : "d:%06"PRIxPTRDIFF" ", dfp % 10000); + fprintf(stderr, "%-6s", magic); + if (line && !nopos) { + fprintf(stderr, " %s", posbuf); + } + if (0) { + fprintf(stderr, " \t"); + fprintf(stderr, "iseq: %-24s ", iseq_name); + fprintf(stderr, "self: %-24s ", selfstr); + fprintf(stderr, "%-1s ", biseq_name); + } + fprintf(stderr, "\n"); +} + +void +rb_vmdebug_stack_dump_raw(rb_thread_t *th, rb_control_frame_t *cfp) +{ +#if 0 + VALUE *sp = cfp->sp, *bp = cfp->bp; + VALUE *lfp = cfp->lfp; + VALUE *dfp = cfp->dfp; + VALUE *p, *st, *t; + + fprintf(stderr, "-- stack frame ------------\n"); + for (p = st = th->stack; p < sp; p++) { + fprintf(stderr, "%04ld (%p): %08"PRIxVALUE, (long)(p - st), p, *p); + + t = (VALUE *)*p; + if (th->stack <= t && t < sp) { + fprintf(stderr, " (= %ld)", (long)((VALUE *)GC_GUARDED_PTR_REF(t) - th->stack)); + } + + if (p == lfp) + fprintf(stderr, " <- lfp"); + if (p == dfp) + fprintf(stderr, " <- dfp"); + if (p == bp) + fprintf(stderr, " <- bp"); /* should not be */ + + fprintf(stderr, "\n"); + } +#endif + + fprintf(stderr, "-- control frame ----------\n"); + while ((void *)cfp < (void *)(th->stack + th->stack_size)) { + control_frame_dump(th, cfp); + cfp++; + } + fprintf(stderr, "---------------------------\n"); +} + +void +rb_vmdebug_stack_dump_raw_current(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_vmdebug_stack_dump_raw(th, th->cfp); +} + +void +rb_vmdebug_env_dump_raw(rb_env_t *env, VALUE *lfp, VALUE *dfp) +{ + int i; + fprintf(stderr, "-- env --------------------\n"); + + while (env) { + fprintf(stderr, "--\n"); + for (i = 0; i < env->env_size; i++) { + fprintf(stderr, "%04d: %08lx (%p)", -env->local_size + i, env->env[i], + (void *)&env->env[i]); + if (&env->env[i] == lfp) + fprintf(stderr, " <- lfp"); + if (&env->env[i] == dfp) + fprintf(stderr, " <- dfp"); + fprintf(stderr, "\n"); + } + + if (env->prev_envval != 0) { + GetEnvPtr(env->prev_envval, env); + } + else { + env = 0; + } + } + fprintf(stderr, "---------------------------\n"); +} + +void +rb_vmdebug_proc_dump_raw(rb_proc_t *proc) +{ + rb_env_t *env; + char *selfstr; + VALUE val = rb_inspect(proc->block.self); + selfstr = StringValueCStr(val); + + fprintf(stderr, "-- proc -------------------\n"); + fprintf(stderr, "self: %s\n", selfstr); + GetEnvPtr(proc->envval, env); + rb_vmdebug_env_dump_raw(env, proc->block.lfp, proc->block.dfp); +} + +void +rb_vmdebug_stack_dump_th(VALUE thval) +{ + rb_thread_t *th; + GetThreadPtr(thval, th); + rb_vmdebug_stack_dump_raw(th, th->cfp); +} + +static void +vm_stack_dump_each(rb_thread_t *th, rb_control_frame_t *cfp) +{ + int i; + + VALUE rstr; + VALUE *sp = cfp->sp; + VALUE *lfp = cfp->lfp; + VALUE *dfp = cfp->dfp; + + int argc = 0, local_size = 0; + const char *name; + rb_iseq_t *iseq = cfp->iseq; + + if (iseq == 0) { + if (RUBYVM_CFUNC_FRAME_P(cfp)) { + name = rb_id2name(cfp->method_id); + } + else { + name = "?"; + } + } + else if (RUBY_VM_IFUNC_P(iseq)) { + name = ""; + } + else { + argc = iseq->argc; + local_size = iseq->local_size; + name = RSTRING_PTR(iseq->name); + } + + /* stack trace header */ + + if (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_METHOD || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_TOP || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_BLOCK || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_CLASS || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_PROC || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_LAMBDA || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_CFUNC || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_IFUNC || + VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_EVAL) { + + VALUE *ptr = dfp - local_size; + + vm_stack_dump_each(th, cfp + 1); + control_frame_dump(th, cfp); + + if (lfp != dfp) { + local_size++; + } + for (i = 0; i < argc; i++) { + rstr = rb_inspect(*ptr); + fprintf(stderr, " arg %2d: %8s (%p)\n", i, StringValueCStr(rstr), + (void *)ptr++); + } + for (; i < local_size - 1; i++) { + rstr = rb_inspect(*ptr); + fprintf(stderr, " local %2d: %8s (%p)\n", i, StringValueCStr(rstr), + (void *)ptr++); + } + + ptr = cfp->bp; + for (; ptr < sp; ptr++, i++) { + if (*ptr == Qundef) { + rstr = rb_str_new2("undef"); + } + else { + rstr = rb_inspect(*ptr); + } + fprintf(stderr, " stack %2d: %8s (%"PRIdPTRDIFF")\n", i, StringValueCStr(rstr), + (ptr - th->stack)); + } + } + else if (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_FINISH) { + if ((th)->stack + (th)->stack_size > (VALUE *)(cfp + 2)) { + vm_stack_dump_each(th, cfp + 1); + } + else { + /* SDR(); */ + } + } + else { + rb_bug("unsupport frame type: %08lx", VM_FRAME_TYPE(cfp)); + } +} + + +void +rb_vmdebug_debug_print_register(rb_thread_t *th) +{ + rb_control_frame_t *cfp = th->cfp; + int pc = -1; + int lfp = cfp->lfp - th->stack; + int dfp = cfp->dfp - th->stack; + int cfpi; + + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + pc = cfp->pc - cfp->iseq->iseq_encoded; + } + + if (lfp < 0 || lfp > th->stack_size) + lfp = -1; + if (dfp < 0 || dfp > th->stack_size) + dfp = -1; + + cfpi = ((rb_control_frame_t *)(th->stack + th->stack_size)) - cfp; + fprintf(stderr, " [PC] %04d, [SP] %04"PRIdPTRDIFF", [LFP] %04d, [DFP] %04d, [CFP] %04d\n", + pc, (cfp->sp - th->stack), lfp, dfp, cfpi); +} + +void +rb_vmdebug_thread_dump_regs(VALUE thval) +{ + rb_thread_t *th; + GetThreadPtr(thval, th); + rb_vmdebug_debug_print_register(th); +} + +void +rb_vmdebug_debug_print_pre(rb_thread_t *th, rb_control_frame_t *cfp) +{ + rb_iseq_t *iseq = cfp->iseq; + + if (iseq != 0 && VM_FRAME_TYPE(cfp) != VM_FRAME_MAGIC_FINISH) { + VALUE *seq = iseq->iseq; + int pc = cfp->pc - iseq->iseq_encoded; + + printf("%3"PRIdPTRDIFF" ", VM_CFP_CNT(th, cfp)); + rb_iseq_disasm_insn(0, seq, pc, iseq, 0); + } + +#if VMDEBUG > 3 + fprintf(stderr, " (1)"); + rb_vmdebug_debug_print_register(th); +#endif +} + +void +rb_vmdebug_debug_print_post(rb_thread_t *th, rb_control_frame_t *cfp +#if OPT_STACK_CACHING + , VALUE reg_a, VALUE reg_b +#endif + ) +{ +#if VMDEBUG > 9 + SDR2(cfp); +#endif + +#if VMDEBUG > 3 + fprintf(stderr, " (2)"); + rb_vmdebug_debug_print_register(th); +#endif + /* stack_dump_raw(th, cfp); */ + +#if VMDEBUG > 2 + /* stack_dump_thobj(th); */ + vm_stack_dump_each(th, th->cfp); +#if OPT_STACK_CACHING + { + VALUE rstr; + rstr = rb_inspect(reg_a); + fprintf(stderr, " sc reg A: %s\n", StringValueCStr(rstr)); + rstr = rb_inspect(reg_b); + fprintf(stderr, " sc reg B: %s\n", StringValueCStr(rstr)); + } +#endif + printf + ("--------------------------------------------------------------\n"); +#endif +} + +#ifdef COLLECT_USAGE_ANALYSIS +/* uh = { + * insn(Fixnum) => ihash(Hash) + * } + * ihash = { + * -1(Fixnum) => count, # insn usage + * 0(Fixnum) => ophash, # operand usage + * } + * ophash = { + * val(interned string) => count(Fixnum) + * } + */ +void +vm_analysis_insn(int insn) +{ + ID usage_hash; + ID bigram_hash; + static int prev_insn = -1; + + VALUE uh; + VALUE ihash; + VALUE cv; + + CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); + CONST_ID(bigram_hash, "USAGE_ANALYSIS_INSN_BIGRAM"); + uh = rb_const_get(rb_cRubyVM, usage_hash); + if ((ihash = rb_hash_aref(uh, INT2FIX(insn))) == Qnil) { + ihash = rb_hash_new(); + rb_hash_aset(uh, INT2FIX(insn), ihash); + } + if ((cv = rb_hash_aref(ihash, INT2FIX(-1))) == Qnil) { + cv = INT2FIX(0); + } + rb_hash_aset(ihash, INT2FIX(-1), INT2FIX(FIX2INT(cv) + 1)); + + /* calc bigram */ + if (prev_insn != -1) { + VALUE bi; + VALUE ary[2]; + VALUE cv; + + ary[0] = INT2FIX(prev_insn); + ary[1] = INT2FIX(insn); + bi = rb_ary_new4(2, &ary[0]); + + uh = rb_const_get(rb_cRubyVM, bigram_hash); + if ((cv = rb_hash_aref(uh, bi)) == Qnil) { + cv = INT2FIX(0); + } + rb_hash_aset(uh, bi, INT2FIX(FIX2INT(cv) + 1)); + } + prev_insn = insn; +} + +/* from disasm.c */ +extern VALUE insn_operand_intern(int insn, int op_no, VALUE op, + int len, int pos, VALUE child); + +void +vm_analysis_operand(int insn, int n, VALUE op) +{ + ID usage_hash; + + VALUE uh; + VALUE ihash; + VALUE ophash; + VALUE valstr; + VALUE cv; + + CONST_ID(usage_hash, "USAGE_ANALYSIS_INSN"); + + uh = rb_const_get(rb_cRubyVM, usage_hash); + if ((ihash = rb_hash_aref(uh, INT2FIX(insn))) == Qnil) { + ihash = rb_hash_new(); + rb_hash_aset(uh, INT2FIX(insn), ihash); + } + if ((ophash = rb_hash_aref(ihash, INT2FIX(n))) == Qnil) { + ophash = rb_hash_new(); + rb_hash_aset(ihash, INT2FIX(n), ophash); + } + /* intern */ + valstr = insn_operand_intern(insn, n, op, 0, 0, 0); + + /* set count */ + if ((cv = rb_hash_aref(ophash, valstr)) == Qnil) { + cv = INT2FIX(0); + } + rb_hash_aset(ophash, valstr, INT2FIX(FIX2INT(cv) + 1)); +} + +void +vm_analysis_register(int reg, int isset) +{ + ID usage_hash; + VALUE uh; + VALUE rhash; + VALUE valstr; + static const char regstrs[][5] = { + "pc", /* 0 */ + "sp", /* 1 */ + "cfp", /* 2 */ + "lfp", /* 3 */ + "dfp", /* 4 */ + "self", /* 5 */ + "iseq", /* 6 */ + }; + static const char getsetstr[][4] = { + "get", + "set", + }; + static VALUE syms[sizeof(regstrs) / sizeof(regstrs[0])][2]; + + VALUE cv; + + CONST_ID(usage_hash, "USAGE_ANALYSIS_REGS"); + if (syms[0] == 0) { + char buff[0x10]; + int i; + + for (i = 0; i < sizeof(regstrs) / sizeof(regstrs[0]); i++) { + int j; + for (j = 0; j < 2; j++) { + snfprintf(stderr, buff, 0x10, "%d %s %-4s", i, getsetstr[j], + regstrs[i]); + syms[i][j] = ID2SYM(rb_intern(buff)); + } + } + } + valstr = syms[reg][isset]; + + uh = rb_const_get(rb_cRubyVM, usage_hash); + if ((cv = rb_hash_aref(uh, valstr)) == Qnil) { + cv = INT2FIX(0); + } + rb_hash_aset(uh, valstr, INT2FIX(FIX2INT(cv) + 1)); +} + + +#endif + +VALUE +rb_vmdebug_thread_dump_state(VALUE self) +{ + rb_thread_t *th; + rb_control_frame_t *cfp; + GetThreadPtr(self, th); + cfp = th->cfp; + + fprintf(stderr, "Thread state dump:\n"); + fprintf(stderr, "pc : %p, sp : %p\n", (void *)cfp->pc, (void *)cfp->sp); + fprintf(stderr, "cfp: %p, lfp: %p, dfp: %p\n", (void *)cfp, (void *)cfp->lfp, (void *)cfp->dfp); + + return Qnil; +} + +VALUE rb_make_backtrace(void); + +void +rb_vm_bugreport(void) +{ + VALUE bt; + + if (GET_THREAD()->vm) { + int i; + SDR(); + + bt = rb_make_backtrace(); + + if (bt) { + fprintf(stderr, "-- Ruby level backtrace information" + "-----------------------------------------\n"); + + for (i = 0; i < RARRAY_LEN(bt); i++) { + VALUE str = RARRAY_PTR(bt)[i]; + fprintf(stderr, "%s\n", RSTRING_PTR(str)); + } + fprintf(stderr, "\n"); + } + } + +#if HAVE_BACKTRACE +#include +#define MAX_NATIVE_TRACE 1024 + { + static void *trace[MAX_NATIVE_TRACE]; + int n = backtrace(trace, MAX_NATIVE_TRACE); + char **syms = backtrace_symbols(trace, n); + int i; + + fprintf(stderr, "-- C level backtrace information " + "-------------------------------------------\n"); + for (i=0; ipassed_block); + + if (th->passed_block) { + blockptr = th->passed_block; + th->passed_block = 0; + } + again: + switch (nd_type(body)) { + case RUBY_VM_METHOD_NODE:{ + rb_control_frame_t *reg_cfp; + VALUE iseqval = (VALUE)body->nd_body; + int i; + + rb_vm_set_finish_env(th); + reg_cfp = th->cfp; + + CHECK_STACK_OVERFLOW(reg_cfp, argc + 1); + + *reg_cfp->sp++ = recv; + for (i = 0; i < argc; i++) { + *reg_cfp->sp++ = argv[i]; + } + + vm_setup_method(th, reg_cfp, argc, blockptr, 0, iseqval, recv); + val = vm_exec(th); + break; + } + case NODE_CFUNC: { + EXEC_EVENT_HOOK(th, RUBY_EVENT_C_CALL, recv, id, klass); + { + rb_control_frame_t *reg_cfp = th->cfp; + rb_control_frame_t *cfp = + vm_push_frame(th, 0, VM_FRAME_MAGIC_CFUNC, + recv, (VALUE)blockptr, 0, reg_cfp->sp, 0, 1); + + cfp->method_id = oid; + cfp->method_class = klass; + + val = call_cfunc(body->nd_cfnc, recv, body->nd_argc, argc, argv); + + if (reg_cfp != th->cfp + 1) { + SDR2(reg_cfp); + SDR2(th->cfp-5); + rb_bug("cfp consistency error - call0"); + th->cfp = reg_cfp; + } + vm_pop_frame(th); + } + EXEC_EVENT_HOOK(th, RUBY_EVENT_C_RETURN, recv, id, klass); + break; + } + case NODE_ATTRSET:{ + if (argc != 1) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + val = rb_ivar_set(recv, body->nd_vid, argv[0]); + break; + } + case NODE_IVAR: { + if (argc != 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", + argc); + } + val = rb_attr_get(recv, body->nd_vid); + break; + } + case NODE_BMETHOD:{ + val = vm_call_bmethod(th, oid, body->nd_cval, + recv, klass, argc, (VALUE *)argv, blockptr); + break; + } + case NODE_ZSUPER:{ + klass = RCLASS_SUPER(klass); + if (!klass || !(body = rb_method_node(klass, id))) { + return method_missing(recv, id, argc, argv, 0); + } + RUBY_VM_CHECK_INTS(); + nosuper = CALL_SUPER; + body = body->nd_body; + goto again; + } + default: + rb_bug("unsupported: vm_call0(%s)", ruby_node_name(nd_type(body))); + } + RUBY_VM_CHECK_INTS(); + return val; +} + +VALUE +rb_vm_call(rb_thread_t * th, VALUE klass, VALUE recv, VALUE id, ID oid, + int argc, const VALUE *argv, const NODE *body, int nosuper) +{ + return vm_call0(th, klass, recv, id, oid, argc, argv, body, nosuper); +} + +static inline VALUE +vm_call_super(rb_thread_t * const th, const int argc, const VALUE * const argv) +{ + VALUE recv = th->cfp->self; + VALUE klass; + ID id; + NODE *body; + rb_control_frame_t *cfp = th->cfp; + + if (!cfp->iseq) { + klass = cfp->method_class; + klass = RCLASS_SUPER(klass); + + if (klass == 0) { + klass = vm_search_normal_superclass(cfp->method_class, recv); + } + + id = cfp->method_id; + } + else { + rb_bug("vm_call_super: should not be reached"); + } + + body = rb_method_node(klass, id); /* this returns NODE_METHOD */ + if (!body) { + return method_missing(recv, id, argc, argv, 0); + } + + return vm_call0(th, klass, recv, id, (ID)body->nd_file, + argc, argv, body->nd_body, CALL_SUPER); +} + +VALUE +rb_call_super(int argc, const VALUE *argv) +{ + PASS_PASSED_BLOCK(); + return vm_call_super(GET_THREAD(), argc, argv); +} + +static inline void +stack_check(void) +{ + rb_thread_t *th = GET_THREAD(); + + if (!rb_thread_raised_p(th, RAISED_STACKOVERFLOW) && ruby_stack_check()) { + rb_thread_raised_set(th, RAISED_STACKOVERFLOW); + rb_exc_raise(sysstack_error); + } +} + +static inline VALUE +rb_call0(VALUE klass, VALUE recv, ID mid, int argc, const VALUE *argv, + int scope, VALUE self) +{ + NODE *body, *method; + int noex; + ID id = mid; + struct cache_entry *ent; + rb_thread_t *th = GET_THREAD(); + + if (!klass) { + rb_raise(rb_eNotImpError, + "method `%s' called on terminated object (%p)", + rb_id2name(mid), (void *)recv); + } + /* is it in the method cache? */ + ent = cache + EXPR1(klass, mid); + + if (ent->mid == mid && ent->klass == klass) { + if (!ent->method) + return method_missing(recv, mid, argc, argv, + scope == 2 ? NOEX_VCALL : 0); + id = ent->mid0; + noex = ent->method->nd_noex; + klass = ent->method->nd_clss; + body = ent->method->nd_body; + } + else if ((method = rb_get_method_body(klass, id, &id)) != 0) { + noex = method->nd_noex; + klass = method->nd_clss; + body = method->nd_body; + } + else { + if (scope == 3) { + return method_missing(recv, mid, argc, argv, NOEX_SUPER); + } + return method_missing(recv, mid, argc, argv, + scope == 2 ? NOEX_VCALL : 0); + } + + + if (mid != idMethodMissing) { + /* receiver specified form for private method */ + if (UNLIKELY(noex)) { + if (((noex & NOEX_MASK) & NOEX_PRIVATE) && scope == 0) { + return method_missing(recv, mid, argc, argv, NOEX_PRIVATE); + } + + /* self must be kind of a specified form for protected method */ + if (((noex & NOEX_MASK) & NOEX_PROTECTED) && scope == 0) { + VALUE defined_class = klass; + + if (TYPE(defined_class) == T_ICLASS) { + defined_class = RBASIC(defined_class)->klass; + } + + if (self == Qundef) { + self = th->cfp->self; + } + if (!rb_obj_is_kind_of(self, rb_class_real(defined_class))) { + return method_missing(recv, mid, argc, argv, NOEX_PROTECTED); + } + } + + if (NOEX_SAFE(noex) > th->safe_level) { + rb_raise(rb_eSecurityError, "calling insecure method: %s", rb_id2name(mid)); + } + } + } + + stack_check(); + return vm_call0(th, klass, recv, mid, id, argc, argv, body, noex & NOEX_NOSUPER); +} + +static inline VALUE +rb_call(VALUE klass, VALUE recv, ID mid, int argc, const VALUE *argv, int scope) +{ + return rb_call0(klass, recv, mid, argc, argv, scope, Qundef); +} + +NORETURN(static void raise_method_missing(rb_thread_t *th, int argc, const VALUE *argv, + VALUE obj, int call_status)); + +/* + * call-seq: + * obj.method_missing(symbol [, *args] ) => result + * + * Invoked by Ruby when obj is sent a message it cannot handle. + * symbol is the symbol for the method called, and args + * are any arguments that were passed to it. By default, the interpreter + * raises an error when this method is called. However, it is possible + * to override the method to provide more dynamic behavior. + * If it is decided that a particular method should not be handled, then + * super should be called, so that ancestors can pick up the + * missing method. + * The example below creates + * a class Roman, which responds to methods with names + * consisting of roman numerals, returning the corresponding integer + * values. + * + * class Roman + * def romanToInt(str) + * # ... + * end + * def method_missing(methId) + * str = methId.id2name + * romanToInt(str) + * end + * end + * + * r = Roman.new + * r.iv #=> 4 + * r.xxiii #=> 23 + * r.mm #=> 2000 + */ + +static VALUE +rb_method_missing(int argc, const VALUE *argv, VALUE obj) +{ + rb_thread_t *th = GET_THREAD(); + raise_method_missing(th, argc, argv, obj, th->method_missing_reason); + return Qnil; /* not reached */ +} + +#define NOEX_MISSING 0x80 + +static void +raise_method_missing(rb_thread_t *th, int argc, const VALUE *argv, VALUE obj, + int last_call_status) +{ + ID id; + VALUE exc = rb_eNoMethodError; + const char *format = 0; + + if (argc == 0 || !SYMBOL_P(argv[0])) { + rb_raise(rb_eArgError, "no id given"); + } + + stack_check(); + + id = SYM2ID(argv[0]); + + if (last_call_status & NOEX_PRIVATE) { + format = "private method `%s' called for %s"; + } + else if (last_call_status & NOEX_PROTECTED) { + format = "protected method `%s' called for %s"; + } + else if (last_call_status & NOEX_VCALL) { + format = "undefined local variable or method `%s' for %s"; + exc = rb_eNameError; + } + else if (last_call_status & NOEX_SUPER) { + format = "super: no superclass method `%s' for %s"; + } + if (!format) { + format = "undefined method `%s' for %s"; + } + + { + int n = 0; + VALUE args[3]; + args[n++] = rb_funcall(rb_const_get(exc, rb_intern("message")), '!', + 3, rb_str_new2(format), obj, argv[0]); + args[n++] = argv[0]; + if (exc == rb_eNoMethodError) { + args[n++] = rb_ary_new4(argc - 1, argv + 1); + } + exc = rb_class_new_instance(n, args, exc); + + if (!(last_call_status & NOEX_MISSING)) { + th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); + } + rb_exc_raise(exc); + } +} + +static inline VALUE +method_missing(VALUE obj, ID id, int argc, const VALUE *argv, int call_status) +{ + VALUE *nargv, result, argv_ary = 0; + rb_thread_t *th = GET_THREAD(); + + th->method_missing_reason = call_status; + th->passed_block = 0; + + if (id == idMethodMissing) { + raise_method_missing(th, argc, argv, obj, call_status | NOEX_MISSING); + } + else if (id == ID_ALLOCATOR) { + rb_raise(rb_eTypeError, "allocator undefined for %s", + rb_class2name(obj)); + } + + if (argc < 0x100) { + nargv = ALLOCA_N(VALUE, argc + 1); + } + else { + argv_ary = rb_ary_tmp_new(argc + 1); + nargv = RARRAY_PTR(argv_ary); + } + nargv[0] = ID2SYM(id); + MEMCPY(nargv + 1, argv, VALUE, argc); + + result = rb_funcall2(obj, idMethodMissing, argc + 1, nargv); + if (argv_ary) rb_ary_clear(argv_ary); + return result; +} + +void +rb_raise_method_missing(rb_thread_t *th, int argc, VALUE *argv, + VALUE obj, int call_status) +{ + th->passed_block = 0; + raise_method_missing(th, argc, argv, obj, call_status | NOEX_MISSING); +} + +VALUE +rb_apply(VALUE recv, ID mid, VALUE args) +{ + int argc; + VALUE *argv; + + argc = RARRAY_LEN(args); /* Assigns LONG, but argc is INT */ + argv = ALLOCA_N(VALUE, argc); + MEMCPY(argv, RARRAY_PTR(args), VALUE, argc); + return rb_call(CLASS_OF(recv), recv, mid, argc, argv, CALL_FCALL); +} + +VALUE +rb_funcall(VALUE recv, ID mid, int n, ...) +{ + VALUE *argv; + va_list ar; + va_init_list(ar, n); + + if (n > 0) { + long i; + + argv = ALLOCA_N(VALUE, n); + + for (i = 0; i < n; i++) { + argv[i] = va_arg(ar, VALUE); + } + va_end(ar); + } + else { + argv = 0; + } + return rb_call(CLASS_OF(recv), recv, mid, n, argv, CALL_FCALL); +} + +VALUE +rb_funcall2(VALUE recv, ID mid, int argc, const VALUE *argv) +{ + return rb_call(CLASS_OF(recv), recv, mid, argc, argv, CALL_FCALL); +} + +VALUE +rb_funcall3(VALUE recv, ID mid, int argc, const VALUE *argv) +{ + return rb_call(CLASS_OF(recv), recv, mid, argc, argv, CALL_PUBLIC); +} + +static VALUE +send_internal(int argc, VALUE *argv, VALUE recv, int scope) +{ + VALUE vid; + VALUE self = RUBY_VM_PREVIOUS_CONTROL_FRAME(GET_THREAD()->cfp)->self; + rb_thread_t *th = GET_THREAD(); + + if (argc == 0) { + rb_raise(rb_eArgError, "no method name given"); + } + + vid = *argv++; argc--; + PASS_PASSED_BLOCK_TH(th); + + return rb_call0(CLASS_OF(recv), recv, rb_to_id(vid), argc, argv, scope, self); +} + +/* + * call-seq: + * obj.send(symbol [, args...]) => obj + * obj.__send__(symbol [, args...]) => obj + * + * Invokes the method identified by _symbol_, passing it any + * arguments specified. You can use __send__ if the name + * +send+ clashes with an existing method in _obj_. + * + * class Klass + * def hello(*args) + * "Hello " + args.join(' ') + * end + * end + * k = Klass.new + * k.send :hello, "gentle", "readers" #=> "Hello gentle readers" + */ + +VALUE +rb_f_send(int argc, VALUE *argv, VALUE recv) +{ + return send_internal(argc, argv, recv, NOEX_NOSUPER | NOEX_PRIVATE); +} + +/* + * call-seq: + * obj.public_send(symbol [, args...]) => obj + * + * Invokes the method identified by _symbol_, passing it any + * arguments specified. Unlike send, public_send calls public + * methods only. + * + * 1.public_send(:puts, "hello") # causes NoMethodError + */ + +VALUE +rb_f_public_send(int argc, VALUE *argv, VALUE recv) +{ + return send_internal(argc, argv, recv, NOEX_PUBLIC); +} + +/* yield */ + +static inline VALUE +rb_yield_0(int argc, const VALUE * argv) +{ + return vm_yield(GET_THREAD(), argc, argv); +} + +VALUE +rb_yield(VALUE val) +{ + if (val == Qundef) { + return rb_yield_0(0, 0); + } + else { + return rb_yield_0(1, &val); + } +} + +VALUE +rb_yield_values(int n, ...) +{ + if (n == 0) { + return rb_yield_0(0, 0); + } + else { + int i; + VALUE *argv; + va_list args; + argv = ALLOCA_N(VALUE, n); + + va_init_list(args, n); + for (i=0; icfp; + + TH_PUSH_TAG(th); + state = TH_EXEC_TAG(); + if (state == 0) { + iter_retry: + { + rb_block_t *blockptr = RUBY_VM_GET_BLOCK_PTR_IN_CFP(th->cfp); + blockptr->iseq = (void *)node; + blockptr->proc = 0; + th->passed_block = blockptr; + } + retval = (*it_proc) (data1); + } + else { + VALUE err = th->errinfo; + if (state == TAG_BREAK) { + VALUE *escape_dfp = GET_THROWOBJ_CATCH_POINT(err); + VALUE *cdfp = cfp->dfp; + + if (cdfp == escape_dfp) { + state = 0; + th->state = 0; + th->errinfo = Qnil; + th->cfp = cfp; + } + else{ + /* SDR(); printf("%p, %p\n", cdfp, escape_dfp); */ + } + } + else if (state == TAG_RETRY) { + VALUE *escape_dfp = GET_THROWOBJ_CATCH_POINT(err); + VALUE *cdfp = cfp->dfp; + + if (cdfp == escape_dfp) { + state = 0; + th->state = 0; + th->errinfo = Qnil; + th->cfp = cfp; + goto iter_retry; + } + } + } + TH_POP_TAG(); + + switch (state) { + case 0: + break; + default: + TH_JUMP_TAG(th, state); + } + return retval; +} + +struct iter_method_arg { + VALUE obj; + ID mid; + int argc; + VALUE *argv; +}; + +static VALUE +iterate_method(VALUE obj) +{ + const struct iter_method_arg * arg = + (struct iter_method_arg *) obj; + + return rb_call(CLASS_OF(arg->obj), arg->obj, arg->mid, + arg->argc, arg->argv, CALL_FCALL); +} + +VALUE +rb_block_call(VALUE obj, ID mid, int argc, VALUE * argv, + VALUE (*bl_proc) (ANYARGS), VALUE data2) +{ + struct iter_method_arg arg; + + arg.obj = obj; + arg.mid = mid; + arg.argc = argc; + arg.argv = argv; + return rb_iterate(iterate_method, (VALUE)&arg, bl_proc, data2); +} + +VALUE +rb_each(VALUE obj) +{ + return rb_call(CLASS_OF(obj), obj, idEach, 0, 0, CALL_FCALL); +} + +static VALUE +eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *file, int line) +{ + int state; + VALUE result = Qundef; + VALUE envval; + rb_binding_t *bind = 0; + rb_thread_t *th = GET_THREAD(); + rb_env_t *env = NULL; + rb_block_t block; + volatile int parse_in_eval; + volatile int mild_compile_error; + + if (file == 0) { + file = rb_sourcefile(); + line = rb_sourceline(); + } + + parse_in_eval = th->parse_in_eval; + mild_compile_error = th->mild_compile_error; + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + rb_iseq_t *iseq; + volatile VALUE iseqval; + + if (scope != Qnil) { + if (rb_obj_is_kind_of(scope, rb_cBinding)) { + GetBindingPtr(scope, bind); + envval = bind->env; + } + else { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected Binding)", + rb_obj_classname(scope)); + } + GetEnvPtr(envval, env); + th->base_block = &env->block; + } + else { + rb_control_frame_t *cfp = rb_vm_get_ruby_level_next_cfp(th, th->cfp); + + if (cfp != 0) { + block = *RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp); + th->base_block = █ + th->base_block->self = self; + th->base_block->iseq = cfp->iseq; /* TODO */ + } + else { + rb_raise(rb_eRuntimeError, "Can't eval on top of Fiber or Thread"); + } + } + + /* make eval iseq */ + th->parse_in_eval++; + th->mild_compile_error++; + iseqval = rb_iseq_compile(src, rb_str_new2(file), INT2FIX(line)); + th->mild_compile_error--; + th->parse_in_eval--; + + vm_set_eval_stack(th, iseqval, cref); + th->base_block = 0; + + if (0) { /* for debug */ + printf("%s\n", RSTRING_PTR(rb_iseq_disasm(iseqval))); + } + + /* save new env */ + GetISeqPtr(iseqval, iseq); + if (bind && iseq->local_table_size > 0) { + bind->env = rb_vm_make_env_object(th, th->cfp); + } + + /* kick */ + CHECK_STACK_OVERFLOW(th->cfp, iseq->stack_max); + result = vm_exec(th); + } + POP_TAG(); + th->mild_compile_error = mild_compile_error; + th->parse_in_eval = parse_in_eval; + + if (state) { + if (state == TAG_RAISE) { + VALUE errinfo = th->errinfo; + if (strcmp(file, "(eval)") == 0) { + VALUE mesg, errat, bt2; + extern VALUE rb_get_backtrace(VALUE info); + ID id_mesg; + + CONST_ID(id_mesg, "mesg"); + errat = rb_get_backtrace(errinfo); + mesg = rb_attr_get(errinfo, id_mesg); + if (!NIL_P(errat) && TYPE(errat) == T_ARRAY && + (bt2 = vm_backtrace(th, -2), RARRAY_LEN(bt2) > 0)) { + if (!NIL_P(mesg) && TYPE(mesg) == T_STRING && !RSTRING_LEN(mesg)) { + if (OBJ_FROZEN(mesg)) { + VALUE m = rb_str_cat(rb_str_dup(RARRAY_PTR(errat)[0]), ": ", 2); + rb_ivar_set(errinfo, id_mesg, rb_str_append(m, mesg)); + } + else { + rb_str_update(mesg, 0, 0, rb_str_new2(": ")); + rb_str_update(mesg, 0, 0, RARRAY_PTR(errat)[0]); + } + } + RARRAY_PTR(errat)[0] = RARRAY_PTR(bt2)[0]; + } + } + rb_exc_raise(errinfo); + } + JUMP_TAG(state); + } + return result; +} + +static VALUE +eval_string(VALUE self, VALUE src, VALUE scope, const char *file, int line) +{ + return eval_string_with_cref(self, src, scope, 0, file, line); +} + +/* + * call-seq: + * eval(string [, binding [, filename [,lineno]]]) => obj + * + * Evaluates the Ruby expression(s) in string. If + * binding is given, the evaluation is performed in its + * context. The binding may be a Binding object or a + * Proc object. If the optional filename and + * lineno parameters are present, they will be used when + * reporting syntax errors. + * + * def getBinding(str) + * return binding + * end + * str = "hello" + * eval "str + ' Fred'" #=> "hello Fred" + * eval "str + ' Fred'", getBinding("bye") #=> "bye Fred" + */ + +VALUE +rb_f_eval(int argc, VALUE *argv, VALUE self) +{ + VALUE src, scope, vfile, vline; + const char *file = "(eval)"; + int line = 1; + + rb_scan_args(argc, argv, "13", &src, &scope, &vfile, &vline); + if (rb_safe_level() >= 4) { + StringValue(src); + if (!NIL_P(scope) && !OBJ_TAINTED(scope)) { + rb_raise(rb_eSecurityError, + "Insecure: can't modify trusted binding"); + } + } + else { + SafeStringValue(src); + } + if (argc >= 3) { + StringValue(vfile); + } + if (argc >= 4) { + line = NUM2INT(vline); + } + + if (!NIL_P(vfile)) + file = RSTRING_PTR(vfile); + return eval_string(self, src, scope, file, line); +} + +VALUE +rb_eval_string(const char *str) +{ + return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, "(eval)", 1); +} + +VALUE +rb_eval_string_protect(const char *str, int *state) +{ + return rb_protect((VALUE (*)(VALUE))rb_eval_string, (VALUE)str, state); +} + +VALUE +rb_eval_string_wrap(const char *str, int *state) +{ + int status; + rb_thread_t *th = GET_THREAD(); + VALUE self = th->top_self; + VALUE wrapper = th->top_wrapper; + VALUE val; + + th->top_wrapper = rb_module_new(); + th->top_self = rb_obj_clone(rb_vm_top_self()); + rb_extend_object(th->top_self, th->top_wrapper); + + val = rb_eval_string_protect(str, &status); + + th->top_self = self; + th->top_wrapper = wrapper; + + if (state) { + *state = status; + } + else if (status) { + JUMP_TAG(status); + } + return val; +} + +VALUE +rb_eval_cmd(VALUE cmd, VALUE arg, int level) +{ + int state; + VALUE val = Qnil; /* OK */ + volatile int safe = rb_safe_level(); + + if (OBJ_TAINTED(cmd)) { + level = 4; + } + + if (TYPE(cmd) != T_STRING) { + PUSH_TAG(); + rb_set_safe_level_force(level); + if ((state = EXEC_TAG()) == 0) { + val = rb_funcall2(cmd, rb_intern("call"), RARRAY_LEN(arg), + RARRAY_PTR(arg)); + } + POP_TAG(); + + rb_set_safe_level_force(safe); + + if (state) + JUMP_TAG(state); + return val; + } + + PUSH_TAG(); + if ((state = EXEC_TAG()) == 0) { + val = eval_string(rb_vm_top_self(), cmd, Qnil, 0, 0); + } + POP_TAG(); + + rb_set_safe_level_force(safe); + if (state) rb_vm_jump_tag_but_local_jump(state, val); + return val; +} + +/* block eval under the class/module context */ + +static VALUE +yield_under(VALUE under, VALUE self, VALUE values) +{ + rb_thread_t *th = GET_THREAD(); + rb_block_t block, *blockptr; + NODE *cref = vm_cref_push(th, under, NOEX_PUBLIC); + + if ((blockptr = GC_GUARDED_PTR_REF(th->cfp->lfp[0])) != 0) { + block = *blockptr; + block.self = self; + th->cfp->lfp[0] = GC_GUARDED_PTR(&block); + } + + if (values == Qundef) { + return vm_yield_with_cref(th, 0, 0, cref); + } + else { + return vm_yield_with_cref(th, RARRAY_LEN(values), RARRAY_PTR(values), cref); + } +} + +/* string eval under the class/module context */ +static VALUE +eval_under(VALUE under, VALUE self, VALUE src, const char *file, int line) +{ + NODE *cref = vm_cref_push(GET_THREAD(), under, NOEX_PUBLIC); + + if (rb_safe_level() >= 4) { + StringValue(src); + } + else { + SafeStringValue(src); + } + + return eval_string_with_cref(self, src, Qnil, cref, file, line); +} + +static VALUE +specific_eval(int argc, VALUE *argv, VALUE klass, VALUE self) +{ + if (rb_block_given_p()) { + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + return yield_under(klass, self, Qundef); + } + else { + const char *file = "(eval)"; + int line = 1; + + if (argc == 0) { + rb_raise(rb_eArgError, "block not supplied"); + } + else { + if (rb_safe_level() >= 4) { + StringValue(argv[0]); + } + else { + SafeStringValue(argv[0]); + } + if (argc > 3) { + const char *name = rb_id2name(rb_frame_callee()); + rb_raise(rb_eArgError, + "wrong number of arguments: %s(src) or %s{..}", + name, name); + } + if (argc > 2) + line = NUM2INT(argv[2]); + if (argc > 1) { + file = StringValuePtr(argv[1]); + } + } + return eval_under(klass, self, argv[0], file, line); + } +} + +/* + * call-seq: + * obj.instance_eval(string [, filename [, lineno]] ) => obj + * obj.instance_eval {| | block } => obj + * + * Evaluates a string containing Ruby source code, or the given block, + * within the context of the receiver (_obj_). In order to set the + * context, the variable +self+ is set to _obj_ while + * the code is executing, giving the code access to _obj_'s + * instance variables. In the version of instance_eval + * that takes a +String+, the optional second and third + * parameters supply a filename and starting line number that are used + * when reporting compilation errors. + * + * class KlassWithSecret + * def initialize + * @secret = 99 + * end + * end + * k = KlassWithSecret.new + * k.instance_eval { @secret } #=> 99 + */ + +VALUE +rb_obj_instance_eval(int argc, VALUE *argv, VALUE self) +{ + VALUE klass; + + if (SPECIAL_CONST_P(self)) { + klass = Qnil; + } + else { + klass = rb_singleton_class(self); + } + return specific_eval(argc, argv, klass, self); +} + +/* + * call-seq: + * obj.instance_exec(arg...) {|var...| block } => obj + * + * Executes the given block within the context of the receiver + * (_obj_). In order to set the context, the variable +self+ is set + * to _obj_ while the code is executing, giving the code access to + * _obj_'s instance variables. Arguments are passed as block parameters. + * + * class KlassWithSecret + * def initialize + * @secret = 99 + * end + * end + * k = KlassWithSecret.new + * k.instance_exec(5) {|x| @secret+x } #=> 104 + */ + +VALUE +rb_obj_instance_exec(int argc, VALUE *argv, VALUE self) +{ + VALUE klass; + + if (SPECIAL_CONST_P(self)) { + klass = Qnil; + } + else { + klass = rb_singleton_class(self); + } + return yield_under(klass, self, rb_ary_new4(argc, argv)); +} + +/* + * call-seq: + * mod.class_eval(string [, filename [, lineno]]) => obj + * mod.module_eval {|| block } => obj + * + * Evaluates the string or block in the context of _mod_. This can + * be used to add methods to a class. module_eval returns + * the result of evaluating its argument. The optional _filename_ + * and _lineno_ parameters set the text for error messages. + * + * class Thing + * end + * a = %q{def hello() "Hello there!" end} + * Thing.module_eval(a) + * puts Thing.new.hello() + * Thing.module_eval("invalid code", "dummy", 123) + * + * produces: + * + * Hello there! + * dummy:123:in `module_eval': undefined local variable + * or method `code' for Thing:Class + */ + +VALUE +rb_mod_module_eval(int argc, VALUE *argv, VALUE mod) +{ + return specific_eval(argc, argv, mod, mod); +} + +/* + * call-seq: + * mod.module_exec(arg...) {|var...| block } => obj + * mod.class_exec(arg...) {|var...| block } => obj + * + * Evaluates the given block in the context of the class/module. + * The method defined in the block will belong to the receiver. + * + * class Thing + * end + * Thing.class_exec{ + * def hello() "Hello there!" end + * } + * puts Thing.new.hello() + * + * produces: + * + * Hello there! + */ + +VALUE +rb_mod_module_exec(int argc, VALUE *argv, VALUE mod) +{ + return yield_under(mod, mod, rb_ary_new4(argc, argv)); +} + +NORETURN(static VALUE rb_f_throw _((int, VALUE *))); + +/* + * call-seq: + * throw(tag [, obj]) + * + * Transfers control to the end of the active +catch+ block + * waiting for _tag_. Raises +ArgumentError+ if there + * is no +catch+ block for the _tag_. The optional second + * parameter supplies a return value for the +catch+ block, + * which otherwise defaults to +nil+. For examples, see + * Kernel::catch. + */ + +static VALUE +rb_f_throw(int argc, VALUE *argv) +{ + VALUE tag, value; + rb_thread_t *th = GET_THREAD(); + struct rb_vm_tag *tt = th->tag; + + rb_scan_args(argc, argv, "11", &tag, &value); + while (tt) { + if (tt->tag == tag) { + tt->retval = value; + break; + } + tt = tt->prev; + } + if (!tt) { + VALUE desc = rb_inspect(tag); + rb_raise(rb_eArgError, "uncaught throw %s", RSTRING_PTR(desc)); + } + rb_trap_restore_mask(); + th->errinfo = NEW_THROW_OBJECT(tag, 0, TAG_THROW); + + JUMP_TAG(TAG_THROW); +#ifndef __GNUC__ + return Qnil; /* not reached */ +#endif +} + +void +rb_throw(const char *tag, VALUE val) +{ + VALUE argv[2]; + + argv[0] = ID2SYM(rb_intern(tag)); + argv[1] = val; + rb_f_throw(2, argv); +} + +void +rb_throw_obj(VALUE tag, VALUE val) +{ + VALUE argv[2]; + + argv[0] = tag; + argv[1] = val; + rb_f_throw(2, argv); +} + +/* + * call-seq: + * catch([arg]) {|tag| block } => obj + * + * +catch+ executes its block. If a +throw+ is + * executed, Ruby searches up its stack for a +catch+ block + * with a tag corresponding to the +throw+'s + * _tag_. If found, that block is terminated, and + * +catch+ returns the value given to +throw+. If + * +throw+ is not called, the block terminates normally, and + * the value of +catch+ is the value of the last expression + * evaluated. +catch+ expressions may be nested, and the + * +throw+ call need not be in lexical scope. + * + * def routine(n) + * puts n + * throw :done if n <= 0 + * routine(n-1) + * end + * + * + * catch(:done) { routine(3) } + * + * produces: + * + * 3 + * 2 + * 1 + * 0 + * + * when _arg_ is given, +catch+ yields it as is, or when no + * _arg_ is given, +catch+ assigns a new unique object to + * +throw+. this is usefull for nested +catch+. _arg_ can + * be an arbitrary object, not only Symbol. + * + */ + +static VALUE +rb_f_catch(int argc, VALUE *argv) +{ + VALUE tag; + int state; + volatile VALUE val = Qnil; /* OK */ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *saved_cfp = th->cfp; + + if (argc == 0) { + tag = rb_obj_alloc(rb_cObject); + } + else { + rb_scan_args(argc, argv, "01", &tag); + } + PUSH_TAG(); + + th->tag->tag = tag; + + if ((state = EXEC_TAG()) == 0) { + val = rb_yield_0(1, &tag); + } + else if (state == TAG_THROW && RNODE(th->errinfo)->u1.value == tag) { + th->cfp = saved_cfp; + val = th->tag->retval; + th->errinfo = Qnil; + state = 0; + } + POP_TAG(); + if (state) + JUMP_TAG(state); + + return val; +} + +static VALUE +catch_null_i(VALUE dmy) +{ + return rb_funcall(Qnil, rb_intern("catch"), 0, 0); +} + +static VALUE +catch_i(VALUE tag) +{ + return rb_funcall(Qnil, rb_intern("catch"), 1, tag); +} + +VALUE +rb_catch(const char *tag, VALUE (*func)(), VALUE data) +{ + if (!tag) { + return rb_iterate(catch_null_i, 0, func, data); + } + return rb_iterate(catch_i, ID2SYM(rb_intern(tag)), func, data); +} + +VALUE +rb_catch_obj(VALUE tag, VALUE (*func)(), VALUE data) +{ + return rb_iterate((VALUE (*)_((VALUE)))catch_i, tag, func, data); +} + +/* + * call-seq: + * caller(start=1) => array + * + * Returns the current execution stack---an array containing strings in + * the form ``file:line'' or ``file:line: in + * `method'''. The optional _start_ parameter + * determines the number of initial stack entries to omit from the + * result. + * + * def a(skip) + * caller(skip) + * end + * def b(skip) + * a(skip) + * end + * def c(skip) + * b(skip) + * end + * c(0) #=> ["prog:2:in `a'", "prog:5:in `b'", "prog:8:in `c'", "prog:10"] + * c(1) #=> ["prog:5:in `b'", "prog:8:in `c'", "prog:11"] + * c(2) #=> ["prog:8:in `c'", "prog:12"] + * c(3) #=> ["prog:13"] + */ + +static VALUE +rb_f_caller(int argc, VALUE *argv) +{ + VALUE level; + int lev; + + rb_scan_args(argc, argv, "01", &level); + + if (NIL_P(level)) + lev = 1; + else + lev = NUM2INT(level); + if (lev < 0) + rb_raise(rb_eArgError, "negative level (%d)", lev); + + return vm_backtrace(GET_THREAD(), lev); +} + +void +rb_backtrace(void) +{ + long i; + VALUE ary; + + ary = vm_backtrace(GET_THREAD(), -1); + for (i = 0; i < RARRAY_LEN(ary); i++) { + printf("\tfrom %s\n", RSTRING_PTR(RARRAY_PTR(ary)[i])); + } +} + +VALUE +rb_make_backtrace(void) +{ + return vm_backtrace(GET_THREAD(), -1); +} + +/* + * call-seq: + * local_variables => array + * + * Returns the names of the current local variables. + * + * fred = 1 + * for i in 1..10 + * # ... + * end + * local_variables #=> ["fred", "i"] + */ + +static VALUE +rb_f_local_variables(void) +{ + VALUE ary = rb_ary_new(); + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = + vm_get_ruby_level_caller_cfp(th, RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp)); + int i; + + while (cfp) { + if (cfp->iseq) { + for (i = 0; i < cfp->iseq->local_table_size; i++) { + ID lid = cfp->iseq->local_table[i]; + if (lid) { + const char *vname = rb_id2name(lid); + /* should skip temporary variable */ + if (vname) { + rb_ary_push(ary, ID2SYM(lid)); + } + } + } + } + if (cfp->lfp != cfp->dfp) { + /* block */ + VALUE *dfp = GC_GUARDED_PTR_REF(cfp->dfp[0]); + + if (vm_collect_local_variables_in_heap(th, dfp, ary)) { + break; + } + else { + while (cfp->dfp != dfp) { + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + } + } + else { + break; + } + } + return ary; +} + +/* + * call-seq: + * block_given? => true or false + * iterator? => true or false + * + * Returns true if yield would execute a + * block in the current context. The iterator? form + * is mildly deprecated. + * + * def try + * if block_given? + * yield + * else + * "no block" + * end + * end + * try #=> "no block" + * try { "hello" } #=> "hello" + * try do "hello" end #=> "hello" + */ + + +VALUE +rb_f_block_given_p(void) +{ + rb_thread_t *th = GET_THREAD(); + rb_control_frame_t *cfp = th->cfp; + cfp = vm_get_ruby_level_caller_cfp(th, RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp)); + + if (cfp != 0 && + (cfp->lfp[0] & 0x02) == 0 && + GC_GUARDED_PTR_REF(cfp->lfp[0])) { + return Qtrue; + } + else { + return Qfalse; + } +} + +void +Init_vm_eval(void) +{ + rb_define_global_function("eval", rb_f_eval, -1); + rb_define_global_function("local_variables", rb_f_local_variables, 0); + rb_define_global_function("iterator?", rb_f_block_given_p, 0); + rb_define_global_function("block_given?", rb_f_block_given_p, 0); + + rb_define_global_function("catch", rb_f_catch, -1); + rb_define_global_function("throw", rb_f_throw, -1); + + rb_define_global_function("loop", rb_f_loop, 0); + + rb_define_method(rb_cBasicObject, "instance_eval", rb_obj_instance_eval, -1); + rb_define_method(rb_cBasicObject, "instance_exec", rb_obj_instance_exec, -1); + rb_define_private_method(rb_cBasicObject, "method_missing", rb_method_missing, -1); + + rb_define_method(rb_cBasicObject, "__send__", rb_f_send, -1); + rb_define_method(rb_mKernel, "send", rb_f_send, -1); + rb_define_method(rb_mKernel, "public_send", rb_f_public_send, -1); + + rb_define_method(rb_cModule, "module_exec", rb_mod_module_exec, -1); + rb_define_method(rb_cModule, "class_exec", rb_mod_module_exec, -1); + rb_define_method(rb_cModule, "module_eval", rb_mod_module_eval, -1); + rb_define_method(rb_cModule, "class_eval", rb_mod_module_eval, -1); + + rb_define_global_function("caller", rb_f_caller, -1); +} + diff --git a/vm_exec.c b/vm_exec.c new file mode 100644 index 0000000..28d8065 --- /dev/null +++ b/vm_exec.c @@ -0,0 +1,156 @@ +/* -*-c-*- */ +/********************************************************************** + + vm_exec.c - + + $Author: yugui $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#include + +#if VMDEBUG > 0 +#define DECL_SC_REG(type, r, reg) register type reg_##r + +#elif __GNUC__ && __x86_64__ +#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg) + +#elif __GNUC__ && __i386__ +#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("e" reg) + +#else +#define DECL_SC_REG(type, r, reg) register type reg_##r +#endif +/* #define DECL_SC_REG(r, reg) VALUE reg_##r */ + +#if OPT_STACK_CACHING +static VALUE finish_insn_seq[1] = { BIN(finish_SC_ax_ax) }; +#elif OPT_CALL_THREADED_CODE +static VALUE const finish_insn_seq[1] = { 0 }; +#else +static VALUE finish_insn_seq[1] = { BIN(finish) }; +#endif + +#if !OPT_CALL_THREADED_CODE +static VALUE +vm_exec_core(rb_thread_t *th, VALUE initial) +{ + +#if OPT_STACK_CACHING +#if 0 +#elif __GNUC__ && __x86_64 + DECL_SC_REG(VALUE, a, "12"); + DECL_SC_REG(VALUE, b, "13"); +#else + register VALUE reg_a; + register VALUE reg_b; +#endif +#endif + +#if __GNUC__ && __i386__ + DECL_SC_REG(VALUE *, pc, "di"); + DECL_SC_REG(rb_control_frame_t *, cfp, "si"); +#define USE_MACHINE_REGS 1 + +#elif __GNUC__ && __x86_64__ + DECL_SC_REG(VALUE *, pc, "14"); + DECL_SC_REG(rb_control_frame_t *, cfp, "15"); +#define USE_MACHINE_REGS 1 + +#else + register rb_control_frame_t *reg_cfp; + VALUE *reg_pc; +#endif + +#if USE_MACHINE_REGS + +#undef RESTORE_REGS +#define RESTORE_REGS() \ +{ \ + REG_CFP = th->cfp; \ + reg_pc = reg_cfp->pc; \ +} + +#undef REG_PC +#define REG_PC reg_pc +#undef GET_PC +#define GET_PC() (reg_pc) +#undef SET_PC +#define SET_PC(x) (reg_cfp->pc = REG_PC = (x)) +#endif + +#if OPT_TOKEN_THREADED_CODE || OPT_DIRECT_THREADED_CODE +#include "vmtc.inc" + if (UNLIKELY(th == 0)) { +#if OPT_STACK_CACHING + finish_insn_seq[0] = (VALUE)&&LABEL (finish_SC_ax_ax); +#else + finish_insn_seq[0] = (VALUE)&&LABEL (finish); +#endif + return (VALUE)insns_address_table; + } +#endif + reg_cfp = th->cfp; + reg_pc = reg_cfp->pc; + +#if OPT_STACK_CACHING + reg_a = initial; + reg_b = 0; +#endif + + first: + INSN_DISPATCH(); +/*****************/ + #include "vm.inc" +/*****************/ + END_INSNS_DISPATCH(); + + /* unreachable */ + rb_bug("vm_eval: unreachable"); + goto first; +} + +const void ** +rb_vm_get_insns_address_table(void) +{ + return (const void **)vm_exec_core(0, 0); +} + +#else + +#include "vm.inc" +#include "vmtc.inc" + +const void *const * +rb_vm_get_insns_address_table(void) +{ + return insns_address_table; +} + +static VALUE +vm_exec_core(rb_thread_t *th, VALUE initial) +{ + register rb_control_frame_t *reg_cfp = th->cfp; + VALUE ret; + + while (*GET_PC()) { + reg_cfp = ((rb_insn_func_t) (*GET_PC()))(th, reg_cfp); + + if (reg_cfp == 0) { + VALUE err = th->errinfo; + th->errinfo = Qnil; + return err; + } + } + + if (VM_FRAME_TYPE(th->cfp) != VM_FRAME_MAGIC_FINISH) { + rb_bug("cfp consistency error"); + } + + ret = *(th->cfp->sp-1); /* pop */ + th->cfp++; /* pop cf */ + return ret; +} +#endif diff --git a/vm_exec.h b/vm_exec.h new file mode 100644 index 0000000..8b8b7f4 --- /dev/null +++ b/vm_exec.h @@ -0,0 +1,184 @@ +/********************************************************************** + + vm.h - + + $Author: yugui $ + created at: 04/01/01 16:56:59 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_VM_EXEC_H +#define RUBY_VM_EXEC_H + +typedef long OFFSET; +typedef unsigned long lindex_t; +typedef unsigned long dindex_t; +typedef rb_num_t GENTRY; +typedef rb_iseq_t *ISEQ; + +#ifdef COLLECT_USAGE_ANALYSIS +#define USAGE_ANALYSIS_INSN(insn) vm_analysis_insn(insn) +#define USAGE_ANALYSIS_OPERAND(insn, n, op) vm_analysis_operand(insn, n, (VALUE)op) +#define USAGE_ANALYSIS_REGISTER(reg, s) vm_analysis_register(reg, s) +#else +#define USAGE_ANALYSIS_INSN(insn) /* none */ +#define USAGE_ANALYSIS_OPERAND(insn, n, op) /* none */ +#define USAGE_ANALYSIS_REGISTER(reg, s) /* none */ +#endif + +#ifdef __GCC__ +/* TODO: machine dependent prefetch instruction */ +#define PREFETCH(pc) +#else +#define PREFETCH(pc) +#endif + +#if VMDEBUG > 0 +#define debugs printf +#define DEBUG_ENTER_INSN(insn) \ + debug_print_pre(th, GET_CFP()); + +#if OPT_STACK_CACHING +#define SC_REGS() , reg_a, reg_b +#else +#define SC_REGS() +#endif + +#define DEBUG_END_INSN() \ + debug_print_post(th, GET_CFP() SC_REGS()); + +#else + +#define debugs +#define DEBUG_ENTER_INSN(insn) +#define DEBUG_END_INSN() +#endif + +#define throwdebug if(0)printf +/* #define throwdebug printf */ + +/************************************************/ +#if DISPATCH_XXX +error ! +/************************************************/ +#elif OPT_CALL_THREADED_CODE + +#define LABEL(x) insn_func_##x +#define ELABEL(x) +#define LABEL_PTR(x) &LABEL(x) + +#define INSN_ENTRY(insn) \ + static rb_control_frame_t * \ + FUNC_FASTCALL(LABEL(insn))(rb_thread_t *th, rb_control_frame_t *reg_cfp) { + +#define END_INSN(insn) return reg_cfp;} + +#define NEXT_INSN() return reg_cfp; + +/************************************************/ +#elif OPT_TOKEN_THREADED_CODE || OPT_DIRECT_THREADED_CODE +/* threaded code with gcc */ + +#define LABEL(x) INSN_LABEL_##x +#define ELABEL(x) INSN_ELABEL_##x +#define LABEL_PTR(x) &&LABEL(x) + +#define INSN_ENTRY_SIG(insn) + + +#define INSN_DISPATCH_SIG(insn) + +#define INSN_ENTRY(insn) \ + LABEL(insn): \ + INSN_ENTRY_SIG(insn); \ + +/* dispather */ +#if __GNUC__ && (__i386__ || __x86_64__) && __GNUC__ == 3 +#define DISPATCH_ARCH_DEPEND_WAY(addr) \ + asm volatile("jmp *%0;\t# -- inseted by vm.h\t[length = 2]" : : "r" (addr)) + +#else +#define DISPATCH_ARCH_DEPEND_WAY(addr) \ + /* do nothing */ + +#endif + + +/**********************************/ +#if OPT_DIRECT_THREADED_CODE + +/* for GCC 3.4.x */ +#define TC_DISPATCH(insn) \ + INSN_DISPATCH_SIG(insn); \ + goto *GET_CURRENT_INSN(); \ + ; + +#else +/* token threade code */ + +#define TC_DISPATCH(insn) \ + DISPATCH_ARCH_DEPEND_WAY(insns_address_table[GET_CURRENT_INSN()]); \ + INSN_DISPATCH_SIG(insn); \ + goto *insns_address_table[GET_CURRENT_INSN()]; \ + rb_bug("tc error"); + + +#endif /* DISPATCH_DIRECT_THREADED_CODE */ + +#define END_INSN(insn) \ + DEBUG_END_INSN(); \ + TC_DISPATCH(insn); \ + +#define INSN_DISPATCH() \ + TC_DISPATCH(__START__) \ + { + +#define END_INSNS_DISPATCH() \ + rb_bug("unknown insn: %ld", GET_CURRENT_INSN()); \ + } /* end of while loop */ \ + +#define NEXT_INSN() TC_DISPATCH(__NEXT_INSN__) + +/************************************************/ +#else /* no threaded code */ +/* most common method */ + +#define INSN_ENTRY(insn) \ +case BIN(insn): + +#define END_INSN(insn) \ + DEBUG_END_INSN(); \ + break; + + +#define INSN_DISPATCH() \ + while(1){ \ + switch(GET_CURRENT_INSN()){ + +#define END_INSNS_DISPATCH() \ +default: \ + SDR(); \ + rb_bug("unknown insn: %ld", GET_CURRENT_INSN()); \ + } /* end of switch */ \ + } /* end of while loop */ \ + +#define NEXT_INSN() goto first + +#endif + +#define VM_SP_CNT(th, sp) ((sp) - (th)->stack) + +#if OPT_CALL_THREADED_CODE +#define THROW_EXCEPTION(exc) do { \ + th->errinfo = (VALUE)(exc); \ + return 0; \ +} while (0) +#else +#define THROW_EXCEPTION(exc) return (VALUE)(exc) +#endif + +#define SCREG(r) (reg_##r) + +#endif /* RUBY_VM_EXEC_H */ diff --git a/vm_insnhelper.c b/vm_insnhelper.c new file mode 100644 index 0000000..de92bf9 --- /dev/null +++ b/vm_insnhelper.c @@ -0,0 +1,1556 @@ +/********************************************************************** + + vm_insnhelper.c - instruction helper functions. + + $Author: yugui $ + + Copyright (C) 2007 Koichi Sasada + +**********************************************************************/ + +/* finish iseq array */ +#include "insns.inc" +#include + +/* control stack frame */ + +#ifndef INLINE +#define INLINE inline +#endif + +static rb_control_frame_t *vm_get_ruby_level_caller_cfp(rb_thread_t *th, rb_control_frame_t *cfp); + +static inline rb_control_frame_t * +vm_push_frame(rb_thread_t * th, const rb_iseq_t * iseq, + VALUE type, VALUE self, VALUE specval, + const VALUE *pc, VALUE *sp, VALUE *lfp, + int local_size) +{ + rb_control_frame_t * const cfp = th->cfp = th->cfp - 1; + int i; + + /* setup vm value stack */ + + /* nil initialize */ + for (i=0; i < local_size; i++) { + *sp = Qnil; + sp++; + } + + /* set special val */ + *sp = GC_GUARDED_PTR(specval); + + if (lfp == 0) { + lfp = sp; + } + + /* setup vm control frame stack */ + + cfp->pc = (VALUE *)pc; + cfp->sp = sp + 1; + cfp->bp = sp + 1; + cfp->iseq = (rb_iseq_t *) iseq; + cfp->flag = type; + cfp->self = self; + cfp->lfp = lfp; + cfp->dfp = sp; + cfp->proc = 0; + +#define COLLECT_PROFILE 0 +#if COLLECT_PROFILE + cfp->prof_time_self = clock(); + cfp->prof_time_chld = 0; +#endif + + if (VMDEBUG == 2) { + SDR(); + } + + return cfp; +} + +static inline void +vm_pop_frame(rb_thread_t *th) +{ +#if COLLECT_PROFILE + rb_control_frame_t *cfp = th->cfp; + + if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) { + VALUE current_time = clock(); + rb_control_frame_t *cfp = th->cfp; + cfp->prof_time_self = current_time - cfp->prof_time_self; + (cfp+1)->prof_time_chld += cfp->prof_time_self; + + cfp->iseq->profile.count++; + cfp->iseq->profile.time_cumu = cfp->prof_time_self; + cfp->iseq->profile.time_self = cfp->prof_time_self - cfp->prof_time_chld; + } + else if (0 /* c method? */) { + + } +#endif + th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); + + if (VMDEBUG == 2) { + SDR(); + } +} + +/* method dispatch */ + +#define VM_CALLEE_SETUP_ARG(ret, th, iseq, orig_argc, orig_argv, block) \ + if (LIKELY(iseq->arg_simple & 0x01)) { \ + /* simple check */ \ + if (orig_argc != iseq->argc) { \ + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", orig_argc, iseq->argc); \ + } \ + ret = 0; \ + } \ + else { \ + ret = vm_callee_setup_arg_complex(th, iseq, orig_argc, orig_argv, block); \ + } + +static inline int +vm_callee_setup_arg_complex(rb_thread_t *th, const rb_iseq_t * iseq, + int orig_argc, VALUE * orig_argv, + const rb_block_t **block) +{ + const int m = iseq->argc; + int argc = orig_argc; + VALUE *argv = orig_argv; + int opt_pc = 0; + + th->mark_stack_len = argc + iseq->arg_size; + + /* mandatory */ + if (argc < (m + iseq->arg_post_len)) { /* check with post arg */ + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", + argc, m + iseq->arg_post_len); + } + + argv += m; + argc -= m; + + /* post arguments */ + if (iseq->arg_post_len) { + if (!(orig_argc < iseq->arg_post_start)) { + VALUE *new_argv = ALLOCA_N(VALUE, argc); + MEMCPY(new_argv, argv, VALUE, argc); + argv = new_argv; + } + + MEMCPY(&orig_argv[iseq->arg_post_start], &argv[argc -= iseq->arg_post_len], + VALUE, iseq->arg_post_len); + } + + /* opt arguments */ + if (iseq->arg_opts) { + const int opts = iseq->arg_opts - 1 /* no opt */; + + if (iseq->arg_rest == -1 && argc > opts) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", + orig_argc, m + opts + iseq->arg_post_len); + } + + if (argc > opts) { + argc -= opts; + argv += opts; + opt_pc = iseq->arg_opt_table[opts]; /* no opt */ + } + else { + int i; + for (i = argc; iarg_opt_table[argc]; + argc = 0; + } + } + + /* rest arguments */ + if (iseq->arg_rest != -1) { + orig_argv[iseq->arg_rest] = rb_ary_new4(argc, argv); + argc = 0; + } + + /* block arguments */ + if (block && iseq->arg_block != -1) { + VALUE blockval = Qnil; + const rb_block_t *blockptr = *block; + + if (argc != 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", + orig_argc, m + iseq->arg_post_len); + } + + if (blockptr) { + /* make Proc object */ + if (blockptr->proc == 0) { + rb_proc_t *proc; + blockval = rb_vm_make_proc(th, blockptr, rb_cProc); + GetProcPtr(blockval, proc); + *block = &proc->block; + } + else { + blockval = blockptr->proc; + } + } + + orig_argv[iseq->arg_block] = blockval; /* Proc or nil */ + } + + th->mark_stack_len = 0; + return opt_pc; +} + +static inline int +caller_setup_args(const rb_thread_t *th, rb_control_frame_t *cfp, VALUE flag, + int argc, rb_iseq_t *blockiseq, rb_block_t **block) +{ + rb_block_t *blockptr = 0; + + if (block) { + if (flag & VM_CALL_ARGS_BLOCKARG_BIT) { + rb_proc_t *po; + VALUE proc; + + proc = *(--cfp->sp); + + if (proc != Qnil) { + if (!rb_obj_is_proc(proc)) { + VALUE b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc"); + if (NIL_P(b) || !rb_obj_is_proc(b)) { + rb_raise(rb_eTypeError, + "wrong argument type %s (expected Proc)", + rb_obj_classname(proc)); + } + proc = b; + } + GetProcPtr(proc, po); + blockptr = &po->block; + RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp)->proc = proc; + *block = blockptr; + } + } + else if (blockiseq) { + blockptr = RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp); + blockptr->iseq = blockiseq; + blockptr->proc = 0; + *block = blockptr; + } + } + + /* expand top of stack? */ + if (flag & VM_CALL_ARGS_SPLAT_BIT) { + VALUE ary = *(cfp->sp - 1); + VALUE *ptr; + int i; + VALUE tmp = rb_check_convert_type(ary, T_ARRAY, "Array", "to_a"); + + if (NIL_P(tmp)) { + /* do nothing */ + } + else { + int len = RARRAY_LEN(tmp); + ptr = RARRAY_PTR(tmp); + cfp->sp -= 1; + + CHECK_STACK_OVERFLOW(cfp, len); + + for (i = 0; i < len; i++) { + *cfp->sp++ = ptr[i]; + } + argc += i-1; + } + } + + return argc; +} + +static inline VALUE +call_cfunc(VALUE (*func)(), VALUE recv, + int len, int argc, const VALUE *argv) +{ + /* printf("len: %d, argc: %d\n", len, argc); */ + + if (len >= 0 && argc != len) { + rb_raise(rb_eArgError, "wrong number of arguments(%d for %d)", + argc, len); + } + + switch (len) { + case -2: + return (*func) (recv, rb_ary_new4(argc, argv)); + break; + case -1: + return (*func) (argc, argv, recv); + break; + case 0: + return (*func) (recv); + break; + case 1: + return (*func) (recv, argv[0]); + break; + case 2: + return (*func) (recv, argv[0], argv[1]); + break; + case 3: + return (*func) (recv, argv[0], argv[1], argv[2]); + break; + case 4: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3]); + break; + case 5: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4]); + break; + case 6: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5]); + break; + case 7: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6]); + break; + case 8: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7]); + break; + case 9: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8]); + break; + case 10: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9]); + break; + case 11: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], + argv[10]); + break; + case 12: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], + argv[10], argv[11]); + break; + case 13: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], + argv[11], argv[12]); + break; + case 14: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], + argv[11], argv[12], argv[13]); + break; + case 15: + return (*func) (recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], + argv[11], argv[12], argv[13], argv[14]); + break; + default: + rb_raise(rb_eArgError, "too many arguments(%d)", len); + break; + } + return Qnil; /* not reached */ +} + +static inline VALUE +vm_call_cfunc(rb_thread_t *th, rb_control_frame_t *reg_cfp, + int num, ID id, ID oid, VALUE recv, VALUE klass, + VALUE flag, const NODE *mn, const rb_block_t *blockptr) +{ + VALUE val = 0; + int state = 0; + + EXEC_EVENT_HOOK(th, RUBY_EVENT_C_CALL, recv, id, klass); + TH_PUSH_TAG(th); + if (th->event_flags & (RUBY_EVENT_C_RETURN | RUBY_EVENT_VM)) { + state = TH_EXEC_TAG(); + } + else { + _th->tag = _tag.prev; + } + if (state == 0) { + rb_control_frame_t *cfp = + vm_push_frame(th, 0, VM_FRAME_MAGIC_CFUNC, + recv, (VALUE) blockptr, 0, reg_cfp->sp, 0, 1); + + cfp->method_id = oid; + cfp->method_class = klass; + + reg_cfp->sp -= num + 1; + + val = call_cfunc(mn->nd_cfnc, recv, mn->nd_argc, num, reg_cfp->sp + 1); + + if (reg_cfp != th->cfp + 1) { + rb_bug("cfp consistency error - send"); + } + + vm_pop_frame(th); + } + TH_POP_TAG(); + EXEC_EVENT_HOOK(th, RUBY_EVENT_C_RETURN, recv, id, klass); + if (state) TH_JUMP_TAG(th, state); + + return val; +} + +static inline VALUE +vm_call_bmethod(rb_thread_t *th, ID id, VALUE procval, VALUE recv, + VALUE klass, int argc, VALUE *argv, rb_block_t *blockptr) +{ + rb_control_frame_t *cfp = th->cfp; + rb_proc_t *proc; + VALUE val; + + /* control block frame */ + (cfp-2)->method_id = id; + (cfp-2)->method_class = klass; + + GetProcPtr(procval, proc); + val = rb_vm_invoke_proc(th, proc, recv, argc, argv, blockptr); + return val; +} + +static inline void +vm_method_missing_args(rb_thread_t *th, VALUE *argv, + int num, rb_block_t *blockptr, int opt) +{ + rb_control_frame_t * const reg_cfp = th->cfp; + MEMCPY(argv, STACK_ADDR_FROM_TOP(num + 1), VALUE, num + 1); + th->method_missing_reason = opt; + th->passed_block = blockptr; + POPN(num + 1); +} + +static inline VALUE +vm_method_missing(rb_thread_t *th, ID id, VALUE recv, + int num, rb_block_t *blockptr, int opt) +{ + VALUE *argv = ALLOCA_N(VALUE, num + 1); + vm_method_missing_args(th, argv, num, blockptr, opt); + argv[0] = ID2SYM(id); + return rb_funcall2(recv, idMethodMissing, num + 1, argv); +} + +static inline void +vm_setup_method(rb_thread_t *th, rb_control_frame_t *cfp, + const int argc, const rb_block_t *blockptr, const VALUE flag, + const VALUE iseqval, const VALUE recv) +{ + rb_iseq_t *iseq; + int opt_pc, i; + VALUE *sp, *rsp = cfp->sp - argc; + + /* TODO: eliminate it */ + GetISeqPtr(iseqval, iseq); + VM_CALLEE_SETUP_ARG(opt_pc, th, iseq, argc, rsp, &blockptr); + + /* stack overflow check */ + CHECK_STACK_OVERFLOW(cfp, iseq->stack_max); + + sp = rsp + iseq->arg_size; + + if (LIKELY(!(flag & VM_CALL_TAILCALL_BIT))) { + if (0) printf("local_size: %d, arg_size: %d\n", + iseq->local_size, iseq->arg_size); + + /* clear local variables */ + for (i = 0; i < iseq->local_size - iseq->arg_size; i++) { + *sp++ = Qnil; + } + + vm_push_frame(th, iseq, + VM_FRAME_MAGIC_METHOD, recv, (VALUE) blockptr, + iseq->iseq_encoded + opt_pc, sp, 0, 0); + + cfp->sp = rsp - 1 /* recv */; + } + else { + VALUE *p_rsp; + th->cfp++; /* pop cf */ + p_rsp = th->cfp->sp; + + /* copy arguments */ + for (i=0; i < (sp - rsp); i++) { + p_rsp[i] = rsp[i]; + } + + sp -= rsp - p_rsp; + + /* clear local variables */ + for (i = 0; i < iseq->local_size - iseq->arg_size; i++) { + *sp++ = Qnil; + } + + vm_push_frame(th, iseq, + VM_FRAME_MAGIC_METHOD, recv, (VALUE) blockptr, + iseq->iseq_encoded + opt_pc, sp, 0, 0); + } +} + +static inline VALUE +vm_call_method(rb_thread_t * const th, rb_control_frame_t * const cfp, + const int num, rb_block_t * const blockptr, const VALUE flag, + const ID id, const NODE * mn, const VALUE recv) +{ + VALUE val; + + start_method_dispatch: + + if (mn != 0) { + if ((mn->nd_noex == 0)) { + /* dispatch method */ + NODE *node; + + normal_method_dispatch: + + node = mn->nd_body; + + switch (nd_type(node)) { + case RUBY_VM_METHOD_NODE:{ + vm_setup_method(th, cfp, num, blockptr, flag, (VALUE)node->nd_body, recv); + return Qundef; + } + case NODE_CFUNC:{ + val = vm_call_cfunc(th, cfp, num, id, (ID)mn->nd_file, recv, mn->nd_clss, flag, node, blockptr); + break; + } + case NODE_ATTRSET:{ + val = rb_ivar_set(recv, node->nd_vid, *(cfp->sp - 1)); + cfp->sp -= 2; + break; + } + case NODE_IVAR:{ + if (num != 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", + num); + } + val = rb_attr_get(recv, node->nd_vid); + cfp->sp -= 1; + break; + } + case NODE_BMETHOD:{ + VALUE *argv = ALLOCA_N(VALUE, num); + MEMCPY(argv, cfp->sp - num, VALUE, num); + cfp->sp += - num - 1; + val = vm_call_bmethod(th, (ID)mn->nd_file, node->nd_cval, recv, mn->nd_clss, num, argv, blockptr); + break; + } + case NODE_ZSUPER:{ + VALUE klass; + klass = RCLASS_SUPER(mn->nd_clss); + mn = rb_method_node(klass, id); + + if (mn != 0) { + goto normal_method_dispatch; + } + else { + goto start_method_dispatch; + } + } + default:{ + printf("node: %s\n", ruby_node_name(nd_type(node))); + rb_bug("eval_invoke_method: unreachable"); + /* unreachable */ + break; + } + } + } + else { + int noex_safe; + + if (!(flag & VM_CALL_FCALL_BIT) && + (mn->nd_noex & NOEX_MASK) & NOEX_PRIVATE) { + int stat = NOEX_PRIVATE; + + if (flag & VM_CALL_VCALL_BIT) { + stat |= NOEX_VCALL; + } + val = vm_method_missing(th, id, recv, num, blockptr, stat); + } + else if (((mn->nd_noex & NOEX_MASK) & NOEX_PROTECTED) && + !(flag & VM_CALL_SEND_BIT)) { + VALUE defined_class = mn->nd_clss; + + if (TYPE(defined_class) == T_ICLASS) { + defined_class = RBASIC(defined_class)->klass; + } + + if (!rb_obj_is_kind_of(cfp->self, rb_class_real(defined_class))) { + val = vm_method_missing(th, id, recv, num, blockptr, NOEX_PROTECTED); + } + else { + goto normal_method_dispatch; + } + } + else if ((noex_safe = NOEX_SAFE(mn->nd_noex)) > th->safe_level && + (noex_safe > 2)) { + rb_raise(rb_eSecurityError, "calling insecure method: %s", rb_id2name(id)); + } + else { + goto normal_method_dispatch; + } + } + } + else { + /* method missing */ + int stat = 0; + if (flag & VM_CALL_VCALL_BIT) { + stat |= NOEX_VCALL; + } + if (flag & VM_CALL_SUPER_BIT) { + stat |= NOEX_SUPER; + } + if (id == idMethodMissing) { + VALUE *argv = ALLOCA_N(VALUE, num); + vm_method_missing_args(th, argv, num - 1, 0, stat); + rb_raise_method_missing(th, num, argv, recv, stat); + } + else { + val = vm_method_missing(th, id, recv, num, blockptr, stat); + } + } + + RUBY_VM_CHECK_INTS(); + return val; +} + +static inline void +vm_send_optimize(rb_control_frame_t * const reg_cfp, NODE ** const mn, + rb_num_t * const flag, rb_num_t * const num, + ID * const id, const VALUE klass) +{ + if (*mn && nd_type((*mn)->nd_body) == NODE_CFUNC) { + NODE *node = (*mn)->nd_body; + extern VALUE rb_f_send(int argc, VALUE *argv, VALUE recv); + + if (node->nd_cfnc == rb_f_send) { + int i = *num - 1; + VALUE sym = TOPN(i); + *id = SYMBOL_P(sym) ? SYM2ID(sym) : rb_to_id(sym); + + /* shift arguments */ + if (i > 0) { + MEMMOVE(&TOPN(i), &TOPN(i-1), VALUE, i); + } + + *mn = rb_method_node(klass, *id); + *num -= 1; + DEC_SP(1); + *flag |= VM_CALL_FCALL_BIT; + } + } +} + +/* yield */ + +static inline int +block_proc_is_lambda(const VALUE procval) +{ + rb_proc_t *proc; + + if (procval) { + GetProcPtr(procval, proc); + return proc->is_lambda; + } + else { + return 0; + } +} + +static inline VALUE +vm_yield_with_cfunc(rb_thread_t *th, const rb_block_t *block, + VALUE self, int argc, const VALUE *argv, + const rb_block_t *blockargptr) +{ + NODE *ifunc = (NODE *) block->iseq; + VALUE val, arg, blockarg; + int lambda = block_proc_is_lambda(block->proc); + + if (lambda) { + arg = rb_ary_new4(argc, argv); + } + else if (argc == 0) { + arg = Qnil; + } + else { + arg = argv[0]; + } + + if (blockargptr) { + if (blockargptr->proc) { + blockarg = blockargptr->proc; + } + else { + blockarg = rb_vm_make_proc(th, blockargptr, rb_cProc); + } + } + else { + blockarg = Qnil; + } + + vm_push_frame(th, 0, VM_FRAME_MAGIC_IFUNC, + self, (VALUE)block->dfp, + 0, th->cfp->sp, block->lfp, 1); + + val = (*ifunc->nd_cfnc) (arg, ifunc->nd_tval, argc, argv, blockarg); + + th->cfp++; + return val; +} + + +/*-- + * @brief on supplied all of optional, rest and post parameters. + * @pre iseq is block style (not lambda style) + */ +static inline int +vm_yield_setup_block_args_complex(rb_thread_t *th, const rb_iseq_t * iseq, + int argc, VALUE * argv) +{ + int opt_pc = 0; + int i; + const int m = iseq->argc; + const int r = iseq->arg_rest; + int len = iseq->arg_post_len; + int start = iseq->arg_post_start; + int rsize = argc > m ? argc - m : 0; /* # of arguments which did not consumed yet */ + int psize = rsize > len ? len : rsize; /* # of post arguments */ + int osize = 0; /* # of opt arguments */ + VALUE ary; + + /* reserves arguments for post parameters */ + rsize -= psize; + + if (iseq->arg_opts) { + const int opts = iseq->arg_opts - 1; + if (rsize > opts) { + osize = opts; + opt_pc = iseq->arg_opt_table[opts]; + } + else { + osize = rsize; + opt_pc = iseq->arg_opt_table[rsize]; + } + } + rsize -= osize; + + if (0) { + printf(" argc: %d\n", argc); + printf(" len: %d\n", len); + printf("start: %d\n", start); + printf("rsize: %d\n", rsize); + } + + if (r == -1) { + /* copy post argument */ + MEMMOVE(&argv[start], &argv[m+osize], VALUE, psize); + } + else { + ary = rb_ary_new4(rsize, &argv[r]); + + /* copy post argument */ + MEMMOVE(&argv[start], &argv[m+rsize+osize], VALUE, psize); + argv[r] = ary; + } + + for (i=psize; iargc; + VALUE ary, arg0; + int opt_pc = 0; + + th->mark_stack_len = argc; + + /* + * yield [1, 2] + * => {|a|} => a = [1, 2] + * => {|a, b|} => a, b = [1, 2] + */ + arg0 = argv[0]; + if (!(iseq->arg_simple & 0x02) && /* exclude {|a|} */ + (m + iseq->arg_post_len) > 0 && /* this process is meaningful */ + argc == 1 && !NIL_P(ary = rb_check_array_type(arg0))) { /* rhs is only an array */ + th->mark_stack_len = argc = RARRAY_LEN(ary); + + CHECK_STACK_OVERFLOW(th->cfp, argc); + + MEMCPY(argv, RARRAY_PTR(ary), VALUE, argc); + } + else { + argv[0] = arg0; + } + + for (i=argc; iarg_rest == -1 && iseq->arg_opts == 0) { + const int arg_size = iseq->arg_size; + if (arg_size < argc) { + /* + * yield 1, 2 + * => {|a|} # truncate + */ + th->mark_stack_len = argc = arg_size; + } + } + else { + int r = iseq->arg_rest; + + if (iseq->arg_post_len || + iseq->arg_opts) { /* TODO: implement simple version for (iseq->arg_post_len==0 && iseq->arg_opts > 0) */ + opt_pc = vm_yield_setup_block_args_complex(th, iseq, argc, argv); + } + else { + if (argc < r) { + /* yield 1 + * => {|a, b, *r|} + */ + for (i=argc; imark_stack_len = iseq->arg_size; + } + + /* {|&b|} */ + if (iseq->arg_block != -1) { + VALUE procval = Qnil; + + if (blockptr) { + procval = blockptr->proc; + } + + argv[iseq->arg_block] = procval; + } + + th->mark_stack_len = 0; + return opt_pc; +} + +static inline int +vm_yield_setup_args(rb_thread_t * const th, const rb_iseq_t *iseq, + int argc, VALUE *argv, + const rb_block_t *blockptr, int lambda) +{ + if (0) { /* for debug */ + printf(" argc: %d\n", argc); + printf("iseq argc: %d\n", iseq->argc); + printf("iseq opts: %d\n", iseq->arg_opts); + printf("iseq rest: %d\n", iseq->arg_rest); + printf("iseq post: %d\n", iseq->arg_post_len); + printf("iseq blck: %d\n", iseq->arg_block); + printf("iseq smpl: %d\n", iseq->arg_simple); + printf(" lambda: %s\n", lambda ? "true" : "false"); + } + + if (lambda) { + /* call as method */ + int opt_pc; + VM_CALLEE_SETUP_ARG(opt_pc, th, iseq, argc, argv, &blockptr); + return opt_pc; + } + else { + return vm_yield_setup_block_args(th, iseq, argc, argv, blockptr); + } +} + +static VALUE +vm_invoke_block(rb_thread_t *th, rb_control_frame_t *reg_cfp, rb_num_t num, rb_num_t flag) +{ + rb_block_t * const block = GET_BLOCK_PTR(); + rb_iseq_t *iseq; + int argc = num; + + if (GET_ISEQ()->local_iseq->type != ISEQ_TYPE_METHOD || block == 0) { + rb_vm_localjump_error("no block given (yield)", Qnil, 0); + } + iseq = block->iseq; + + argc = caller_setup_args(th, GET_CFP(), flag, argc, 0, 0); + + if (BUILTIN_TYPE(iseq) != T_NODE) { + int opt_pc; + const int arg_size = iseq->arg_size; + VALUE * const rsp = GET_SP() - argc; + SET_SP(rsp); + + CHECK_STACK_OVERFLOW(GET_CFP(), iseq->stack_max); + opt_pc = vm_yield_setup_args(th, iseq, argc, rsp, 0, + block_proc_is_lambda(block->proc)); + + vm_push_frame(th, iseq, + VM_FRAME_MAGIC_BLOCK, block->self, (VALUE) block->dfp, + iseq->iseq_encoded + opt_pc, rsp + arg_size, block->lfp, + iseq->local_size - arg_size); + + return Qundef; + } + else { + VALUE val = vm_yield_with_cfunc(th, block, block->self, argc, STACK_ADDR_FROM_TOP(argc), 0); + POPN(argc); /* TODO: should put before C/yield? */ + return val; + } +} + +/* svar */ + +static inline NODE * +lfp_svar_place(rb_thread_t *th, VALUE *lfp) +{ + VALUE *svar; + + if (lfp && th->local_lfp != lfp) { + svar = &lfp[-1]; + } + else { + svar = &th->local_svar; + } + if (NIL_P(*svar)) { + *svar = (VALUE)NEW_IF(Qnil, Qnil, Qnil); + } + return (NODE *)*svar; +} + +static VALUE +lfp_svar_get(rb_thread_t *th, VALUE *lfp, VALUE key) +{ + NODE *svar = lfp_svar_place(th, lfp); + + switch (key) { + case 0: + return svar->u1.value; + case 1: + return svar->u2.value; + default: { + const VALUE hash = svar->u3.value; + + if (hash == Qnil) { + return Qnil; + } + else { + return rb_hash_lookup(hash, key); + } + } + } +} + +static void +lfp_svar_set(rb_thread_t *th, VALUE *lfp, VALUE key, VALUE val) +{ + NODE *svar = lfp_svar_place(th, lfp); + + switch (key) { + case 0: + svar->u1.value = val; + return; + case 1: + svar->u2.value = val; + return; + default: { + VALUE hash = svar->u3.value; + + if (hash == Qnil) { + svar->u3.value = hash = rb_hash_new(); + } + rb_hash_aset(hash, key, val); + } + } +} + +static inline VALUE +vm_getspecial(rb_thread_t *th, VALUE *lfp, VALUE key, rb_num_t type) +{ + VALUE val; + + if (type == 0) { + VALUE k = key; + if (FIXNUM_P(key)) { + k = FIX2INT(key); + } + val = lfp_svar_get(th, lfp, k); + } + else { + VALUE backref = lfp_svar_get(th, lfp, 1); + + if (type & 0x01) { + switch (type >> 1) { + case '&': + val = rb_reg_last_match(backref); + break; + case '`': + val = rb_reg_match_pre(backref); + break; + case '\'': + val = rb_reg_match_post(backref); + break; + case '+': + val = rb_reg_match_last(backref); + break; + default: + rb_bug("unexpected back-ref"); + } + } + else { + val = rb_reg_nth_match(type >> 1, backref); + } + } + return val; +} + +static NODE * +vm_get_cref(const rb_iseq_t *iseq, const VALUE *lfp, const VALUE *dfp) +{ + NODE *cref = 0; + + while (1) { + if (lfp == dfp) { + cref = iseq->cref_stack; + break; + } + else if (dfp[-1] != Qnil) { + cref = (NODE *)dfp[-1]; + break; + } + dfp = GET_PREV_DFP(dfp); + } + + if (cref == 0) { + rb_bug("vm_get_cref: unreachable"); + } + return cref; +} + +static NODE * +vm_cref_push(rb_thread_t *th, VALUE klass, int noex) +{ + rb_control_frame_t *cfp = vm_get_ruby_level_caller_cfp(th, th->cfp); + NODE *cref = NEW_BLOCK(klass); + cref->nd_file = 0; + cref->nd_visi = noex; + + if (cfp) { + cref->nd_next = vm_get_cref(cfp->iseq, cfp->lfp, cfp->dfp); + } + + return cref; +} + +static inline VALUE +vm_get_cbase(const rb_iseq_t *iseq, const VALUE *lfp, const VALUE *dfp) +{ + NODE *cref = vm_get_cref(iseq, lfp, dfp); + VALUE klass = Qundef; + + while (cref) { + if ((klass = cref->nd_clss) != 0) { + break; + } + cref = cref->nd_next; + } + + return klass; +} + +static inline void +vm_check_if_namespace(VALUE klass) +{ + switch (TYPE(klass)) { + case T_CLASS: + case T_MODULE: + break; + default: + rb_raise(rb_eTypeError, "%s is not a class/module", + RSTRING_PTR(rb_inspect(klass))); + } +} + +static inline VALUE +vm_get_ev_const(rb_thread_t *th, const rb_iseq_t *iseq, + VALUE orig_klass, ID id, int is_defined) +{ + VALUE val; + + if (orig_klass == Qnil) { + /* in current lexical scope */ + const NODE *root_cref = vm_get_cref(iseq, th->cfp->lfp, th->cfp->dfp); + const NODE *cref = root_cref; + VALUE klass = orig_klass; + + while (cref && cref->nd_next) { + klass = cref->nd_clss; + cref = cref->nd_next; + + if (!NIL_P(klass)) { + VALUE am = 0; + search_continue: + if (RCLASS_IV_TBL(klass) && + st_lookup(RCLASS_IV_TBL(klass), id, &val)) { + if (val == Qundef) { + if (am == klass) break; + am = klass; + rb_autoload_load(klass, id); + goto search_continue; + } + else { + if (is_defined) { + return 1; + } + else { + return val; + } + } + } + } + } + + /* search self */ + klass = root_cref->nd_clss; + if (NIL_P(klass)) { + klass = CLASS_OF(th->cfp->self); + } + + if (is_defined) { + return rb_const_defined(klass, id); + } + else { + return rb_const_get(klass, id); + } + } + else { + vm_check_if_namespace(orig_klass); + if (is_defined) { + return rb_const_defined_from(orig_klass, id); + } + else { + return rb_const_get_from(orig_klass, id); + } + } +} + +static inline VALUE +vm_get_cvar_base(NODE *cref) +{ + VALUE klass; + + while (cref && cref->nd_next && + (NIL_P(cref->nd_clss) || FL_TEST(cref->nd_clss, FL_SINGLETON))) { + cref = cref->nd_next; + + if (!cref->nd_next) { + rb_warn("class variable access from toplevel"); + } + } + + klass = cref->nd_clss; + + if (NIL_P(klass)) { + rb_raise(rb_eTypeError, "no class variables available"); + } + return klass; +} + +static inline NODE * +vm_method_search(VALUE id, VALUE klass, IC ic) +{ + NODE *mn; + +#if OPT_INLINE_METHOD_CACHE + { + if (LIKELY(klass == ic->ic_class) && + LIKELY(GET_VM_STATE_VERSION() == ic->ic_vmstat)) { + mn = ic->ic_method; + } + else { + mn = rb_method_node(klass, id); + ic->ic_class = klass; + ic->ic_method = mn; + ic->ic_vmstat = GET_VM_STATE_VERSION(); + } + } +#else + mn = rb_method_node(klass, id); +#endif + return mn; +} + +static inline VALUE +vm_search_normal_superclass(VALUE klass, VALUE recv) +{ + if (BUILTIN_TYPE(klass) == T_CLASS) { + return RCLASS_SUPER(klass); + } + else if (BUILTIN_TYPE(klass) == T_MODULE) { + VALUE k = CLASS_OF(recv); + while (k) { + if (BUILTIN_TYPE(k) == T_ICLASS && RBASIC(k)->klass == klass) { + return RCLASS_SUPER(k); + } + k = RCLASS_SUPER(k); + } + return rb_cObject; + } + else { + rb_bug("vm_search_normal_superclass: should not be reach here"); + } +} + +static void +vm_search_superclass(rb_control_frame_t *reg_cfp, rb_iseq_t *ip, + VALUE recv, VALUE sigval, + ID *idp, VALUE *klassp) +{ + ID id; + VALUE klass; + + while (ip && !ip->klass) { + ip = ip->parent_iseq; + } + + if (ip == 0) { + rb_raise(rb_eNoMethodError, "super called outside of method"); + } + + id = ip->defined_method_id; + + if (ip != ip->local_iseq) { + /* defined by Module#define_method() */ + rb_control_frame_t *lcfp = GET_CFP(); + + if (!sigval) { + /* zsuper */ + rb_raise(rb_eRuntimeError, "implicit argument passing of super from method defined by define_method() is not supported. Specify all arguments explicitly."); + } + + while (lcfp->iseq != ip) { + VALUE *tdfp = GET_PREV_DFP(lcfp->dfp); + while (1) { + lcfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(lcfp); + if (lcfp->dfp == tdfp) { + break; + } + } + } + + id = lcfp->method_id; + klass = vm_search_normal_superclass(lcfp->method_class, recv); + } + else { + klass = vm_search_normal_superclass(ip->klass, recv); + } + + *idp = id; + *klassp = klass; +} + +static VALUE +vm_throw(rb_thread_t *th, rb_control_frame_t *reg_cfp, + rb_num_t throw_state, VALUE throwobj) +{ + rb_num_t state = throw_state & 0xff; + rb_num_t flag = throw_state & 0x8000; + rb_num_t level = throw_state >> 16; + + if (state != 0) { + VALUE *pt = 0; + int i; + if (flag != 0) { + pt = (void *) 1; + } + else { + if (state == TAG_BREAK) { + rb_control_frame_t *cfp = GET_CFP(); + VALUE *dfp = GET_DFP(); + int is_orphan = 1; + rb_iseq_t *base_iseq = GET_ISEQ(); + + search_parent: + if (cfp->iseq->type != ISEQ_TYPE_BLOCK) { + dfp = GC_GUARDED_PTR_REF((VALUE *) *dfp); + base_iseq = base_iseq->parent_iseq; + + while ((VALUE *) cfp < th->stack + th->stack_size) { + if (cfp->dfp == dfp) { + goto search_parent; + } + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + rb_bug("VM (throw): can't find break base."); + } + + if (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_LAMBDA) { + /* lambda{... break ...} */ + is_orphan = 0; + pt = cfp->dfp; + state = TAG_RETURN; + } + else { + dfp = GC_GUARDED_PTR_REF((VALUE *) *dfp); + + while ((VALUE *)cfp < th->stack + th->stack_size) { + if (cfp->dfp == dfp) { + VALUE epc = epc = cfp->pc - cfp->iseq->iseq_encoded; + rb_iseq_t *iseq = cfp->iseq; + int i; + + for (i=0; icatch_table_size; i++) { + struct iseq_catch_table_entry *entry = &iseq->catch_table[i]; + + if (entry->type == CATCH_TYPE_BREAK && + entry->start < epc && entry->end >= epc) { + if (entry->cont == epc) { + goto found; + } + else { + break; + } + } + } + break; + + found: + pt = dfp; + is_orphan = 0; + break; + } + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + } + + if (is_orphan) { + rb_vm_localjump_error("break from proc-closure", throwobj, TAG_BREAK); + } + } + else if (state == TAG_RETRY) { + pt = GC_GUARDED_PTR_REF((VALUE *) * GET_DFP()); + for (i = 0; i < level; i++) { + pt = GC_GUARDED_PTR_REF((VALUE *) * pt); + } + } + else if (state == TAG_RETURN) { + rb_control_frame_t *cfp = GET_CFP(); + VALUE *dfp = GET_DFP(); + VALUE * const lfp = GET_LFP(); + + /* check orphan and get dfp */ + while ((VALUE *) cfp < th->stack + th->stack_size) { + if (cfp->lfp == lfp) { + if (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_LAMBDA) { + VALUE *tdfp = dfp; + + while (lfp != tdfp) { + if (cfp->dfp == tdfp) { + /* in lambda */ + dfp = cfp->dfp; + goto valid_return; + } + tdfp = GC_GUARDED_PTR_REF((VALUE *)*dfp); + } + } + } + + if (cfp->dfp == lfp && cfp->iseq->type == ISEQ_TYPE_METHOD) { + dfp = lfp; + goto valid_return; + } + + cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); + } + + rb_vm_localjump_error("unexpected return", throwobj, TAG_RETURN); + + valid_return: + pt = dfp; + } + else { + rb_bug("isns(throw): unsupport throw type"); + } + } + th->state = state; + return (VALUE)NEW_THROW_OBJECT(throwobj, (VALUE) pt, state); + } + else { + /* continue throw */ + VALUE err = throwobj; + + if (FIXNUM_P(err)) { + th->state = FIX2INT(err); + } + else if (SYMBOL_P(err)) { + th->state = TAG_THROW; + } + else if (BUILTIN_TYPE(err) == T_NODE) { + th->state = GET_THROWOBJ_STATE(err); + } + else { + th->state = TAG_RAISE; + /*th->state = FIX2INT(rb_ivar_get(err, idThrowState));*/ + } + return err; + } +} + +static inline void +vm_expandarray(rb_control_frame_t *cfp, VALUE ary, int num, int flag) +{ + int is_splat = flag & 0x01; + int space_size = num + is_splat; + VALUE *base = cfp->sp, *ptr; + volatile VALUE tmp_ary; + int len; + + if (TYPE(ary) != T_ARRAY) { + ary = rb_ary_to_ary(ary); + } + + cfp->sp += space_size; + + tmp_ary = ary; + ptr = RARRAY_PTR(ary); + len = RARRAY_LEN(ary); + + if (flag & 0x02) { + /* post: ..., nil ,ary[-1], ..., ary[0..-num] # top */ + int i = 0, j; + + if (len < num) { + for (i=0; i len) { + *bptr = rb_ary_new(); + } + else { + *bptr = rb_ary_new4(len - num, ptr + num); + } + } + } +} + +static inline int +check_cfunc(const NODE *mn, VALUE (*func)()) +{ + if (mn && nd_type(mn->nd_body) == NODE_CFUNC && + mn->nd_body->nd_cfnc == func) { + return 1; + } + else { + return 0; + } +} + +static +#ifndef NO_BIG_INLINE +inline +#endif +VALUE +opt_eq_func(VALUE recv, VALUE obj, IC ic) +{ + VALUE val = Qundef; + + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_EQ)) { + if (recv == obj) { + val = Qtrue; + } + else { + val = Qfalse; + } + } + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { + if (HEAP_CLASS_OF(recv) == rb_cFloat && + HEAP_CLASS_OF(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_EQ)) { + double a = RFLOAT_VALUE(recv); + double b = RFLOAT_VALUE(obj); + + if (isnan(a) || isnan(b)) { + val = Qfalse; + } + else if (a == b) { + val = Qtrue; + } + else { + val = Qfalse; + } + } + else if (HEAP_CLASS_OF(recv) == rb_cString && + HEAP_CLASS_OF(obj) == rb_cString && + BASIC_OP_UNREDEFINED_P(BOP_EQ)) { + val = rb_str_equal(recv, obj); + } + else { + NODE *mn = vm_method_search(idEq, CLASS_OF(recv), ic); + extern VALUE rb_obj_equal(VALUE obj1, VALUE obj2); + + if (check_cfunc(mn, rb_obj_equal)) { + return recv == obj ? Qtrue : Qfalse; + } + } + } + + return val; +} + +struct opt_case_dispatch_i_arg { + VALUE obj; + int label; +}; + +static int +opt_case_dispatch_i(st_data_t key, st_data_t data, struct opt_case_dispatch_i_arg *arg) +{ + if (RTEST(rb_funcall((VALUE)key, idEqq, 1, arg->obj))) { + arg->label = FIX2INT((VALUE)data); + return ST_STOP; + } + else { + return ST_CONTINUE; + } +} + diff --git a/vm_insnhelper.h b/vm_insnhelper.h new file mode 100644 index 0000000..6f31ce4 --- /dev/null +++ b/vm_insnhelper.h @@ -0,0 +1,196 @@ +/********************************************************************** + + insnhelper.h - helper macros to implement each instructions + + $Author: yugui $ + created at: 04/01/01 15:50:34 JST + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + +#ifndef RUBY_INSNHELPER_H +#define RUBY_INSNHELPER_H + +/** + * VM Debug Level + * + * debug level: + * 0: no debug output + * 1: show instruction name + * 2: show stack frame when control stack frame is changed + * 3: show stack status + * 4: show register + * 5: + * 10: gc check + */ + +#ifndef VMDEBUG +#define VMDEBUG 0 +#endif + +#if 0 +#undef VMDEBUG +#define VMDEBUG 3 +#endif + +enum { + BOP_PLUS, + BOP_MINUS, + BOP_MULT, + BOP_DIV, + BOP_MOD, + BOP_EQ, + BOP_EQQ, + BOP_LT, + BOP_LE, + BOP_LTLT, + BOP_AREF, + BOP_ASET, + BOP_LENGTH, + BOP_SUCC, + BOP_GT, + BOP_GE, + BOP_NOT, + BOP_NEQ, + + BOP_LAST_ +}; + +extern char ruby_vm_redefined_flag[BOP_LAST_]; +extern VALUE ruby_vm_global_state_version; +extern VALUE ruby_vm_const_missing_count; + +#define GET_VM_STATE_VERSION() (ruby_vm_global_state_version) +#define INC_VM_STATE_VERSION() \ + (ruby_vm_global_state_version = (ruby_vm_global_state_version+1) & 0x8fffffff) + + +/**********************************************************/ +/* deal with stack */ +/**********************************************************/ + +#define PUSH(x) (SET_SV(x), INC_SP(1)) +#define TOPN(n) (*(GET_SP()-(n)-1)) +#define POPN(n) (DEC_SP(n)) +#define POP() (DEC_SP(1)) +#define STACK_ADDR_FROM_TOP(n) (GET_SP()-(n)) + +#define GET_TOS() (tos) /* dummy */ + +/**********************************************************/ +/* deal with registers */ +/**********************************************************/ + +#define REG_CFP (reg_cfp) +#define REG_PC (REG_CFP->pc) +#define REG_SP (REG_CFP->sp) +#define REG_LFP (REG_CFP->lfp) +#define REG_DFP (REG_CFP->dfp) + +#define RESTORE_REGS() do { \ + REG_CFP = th->cfp; \ +} while (0) + +#define REG_A reg_a +#define REG_B reg_b + +#ifdef COLLECT_USAGE_ANALYSIS +#define USAGE_ANALYSIS_REGISTER_HELPER(a, b, v) \ + (USAGE_ANALYSIS_REGISTER(a, b), (v)) +#else +#define USAGE_ANALYSIS_REGISTER_HELPER(a, b, v) (v) +#endif + +/* PC */ +#define GET_PC() (USAGE_ANALYSIS_REGISTER_HELPER(0, 0, REG_PC)) +#define SET_PC(x) (REG_PC = (USAGE_ANALYSIS_REGISTER_HELPER(0, 1, x))) +#define GET_CURRENT_INSN() (*GET_PC()) +#define GET_OPERAND(n) (GET_PC()[(n)]) +#define ADD_PC(n) (SET_PC(REG_PC + (n))) + +#define GET_PC_COUNT() (REG_PC - GET_ISEQ()->iseq_encoded) +#define JUMP(dst) (REG_PC += (dst)) + +/* FP */ +#define GET_CFP() (USAGE_ANALYSIS_REGISTER_HELPER(2, 0, REG_CFP)) +#define GET_LFP() (USAGE_ANALYSIS_REGISTER_HELPER(3, 0, REG_LFP)) +#define SET_LFP(x) (REG_LFP = (USAGE_ANALYSIS_REGISTER_HELPER(3, 1, (x)))) +#define GET_DFP() (USAGE_ANALYSIS_REGISTER_HELPER(4, 0, REG_DFP)) +#define SET_DFP(x) (REG_DFP = (USAGE_ANALYSIS_REGISTER_HELPER(4, 1, (x)))) + +/* SP */ +#define GET_SP() (USAGE_ANALYSIS_REGISTER_HELPER(1, 0, REG_SP)) +#define SET_SP(x) (REG_SP = (USAGE_ANALYSIS_REGISTER_HELPER(1, 1, (x)))) +#define INC_SP(x) (REG_SP += (USAGE_ANALYSIS_REGISTER_HELPER(1, 1, (x)))) +#define DEC_SP(x) (REG_SP -= (USAGE_ANALYSIS_REGISTER_HELPER(1, 1, (x)))) +#define SET_SV(x) (*GET_SP() = (x)) + /* set current stack value as x */ + +#define GET_SP_COUNT() (REG_SP - th->stack) + +/* instruction sequence C struct */ +#define GET_ISEQ() (GET_CFP()->iseq) + +/**********************************************************/ +/* deal with variables */ +/**********************************************************/ + +#define GET_PREV_DFP(dfp) ((VALUE *)((dfp)[0] & ~0x03)) + +#define GET_GLOBAL(entry) rb_gvar_get((struct global_entry*)entry) +#define SET_GLOBAL(entry, val) rb_gvar_set((struct global_entry*)entry, val) + +#define GET_CONST_INLINE_CACHE(dst) ((IC) * (GET_PC() + (dst) + 1)) + +/**********************************************************/ +/* deal with values */ +/**********************************************************/ + +#define GET_SELF() (USAGE_ANALYSIS_REGISTER_HELPER(5, 0, GET_CFP()->self)) + +/**********************************************************/ +/* deal with control flow 2: method/iterator */ +/**********************************************************/ + +#define COPY_CREF(c1, c2) do { \ + NODE *__tmp_c2 = (c2); \ + c1->nd_clss = __tmp_c2->nd_clss; \ + c1->nd_visi = __tmp_c2->nd_visi; \ + c1->nd_next = __tmp_c2->nd_next; \ +} while (0) + +#define CALL_METHOD(num, blockptr, flag, id, mn, recv) do { \ + VALUE v = vm_call_method(th, GET_CFP(), num, blockptr, flag, id, mn, recv); \ + if (v == Qundef) { \ + RESTORE_REGS(); \ + NEXT_INSN(); \ + } \ + else { \ + val = v; \ + } \ +} while (0) + +#define GET_BLOCK_PTR() \ + ((rb_block_t *)(GC_GUARDED_PTR_REF(GET_LFP()[0]))) + +/**********************************************************/ +/* deal with control flow 3: exception */ +/**********************************************************/ + + +/**********************************************************/ +/* others */ +/**********************************************************/ + +/* optimize insn */ +#define FIXNUM_2_P(a, b) ((a) & (b) & 1) +#define BASIC_OP_UNREDEFINED_P(op) (LIKELY(ruby_vm_redefined_flag[op] == 0)) +#define HEAP_CLASS_OF(obj) RBASIC(obj)->klass + +#define CALL_SIMPLE_METHOD(num, id, recv) do { \ + VALUE klass = CLASS_OF(recv); \ + CALL_METHOD(num, 0, 0, id, rb_method_node(klass, id), recv); \ +} while (0) + +#endif /* RUBY_INSNHELPER_H */ diff --git a/vm_method.c b/vm_method.c new file mode 100644 index 0000000..815de87 --- /dev/null +++ b/vm_method.c @@ -0,0 +1,1146 @@ +/* + * This file is included by vm.h + */ + +#define CACHE_SIZE 0x800 +#define CACHE_MASK 0x7ff +#define EXPR1(c,m) ((((c)>>3)^(m))&CACHE_MASK) + +static void rb_vm_check_redefinition_opt_method(const NODE *node); + +static ID object_id; +static ID removed, singleton_removed, undefined, singleton_undefined; +static ID added, singleton_added; + +struct cache_entry { /* method hash table. */ + ID mid; /* method's id */ + ID mid0; /* method's original id */ + VALUE klass; /* receiver's class */ + VALUE oklass; /* original's class */ + NODE *method; +}; + +static struct cache_entry cache[CACHE_SIZE]; +#define ruby_running (GET_VM()->running) +/* int ruby_running = 0; */ + +void +rb_clear_cache(void) +{ + struct cache_entry *ent, *end; + + rb_vm_change_state(); + + if (!ruby_running) + return; + ent = cache; + end = ent + CACHE_SIZE; + while (ent < end) { + ent->mid = 0; + ent++; + } +} + +static void +rb_clear_cache_for_undef(VALUE klass, ID id) +{ + struct cache_entry *ent, *end; + + rb_vm_change_state(); + + if (!ruby_running) + return; + ent = cache; + end = ent + CACHE_SIZE; + while (ent < end) { + if (ent->oklass == klass && ent->mid == id) { + ent->mid = 0; + } + ent++; + } +} + +static void +rb_clear_cache_by_id(ID id) +{ + struct cache_entry *ent, *end; + + rb_vm_change_state(); + + if (!ruby_running) + return; + ent = cache; + end = ent + CACHE_SIZE; + while (ent < end) { + if (ent->mid == id) { + ent->mid = 0; + } + ent++; + } +} + +void +rb_clear_cache_by_class(VALUE klass) +{ + struct cache_entry *ent, *end; + + rb_vm_change_state(); + + if (!ruby_running) + return; + ent = cache; + end = ent + CACHE_SIZE; + while (ent < end) { + if (ent->klass == klass || ent->oklass == klass) { + ent->mid = 0; + } + ent++; + } +} + +void +rb_add_method(VALUE klass, ID mid, NODE * node, int noex) +{ + NODE *body; + + if (NIL_P(klass)) { + klass = rb_cObject; + } + if (rb_safe_level() >= 4 && + (klass == rb_cObject || !OBJ_UNTRUSTED(klass))) { + rb_raise(rb_eSecurityError, "Insecure: can't define method"); + } + if (!FL_TEST(klass, FL_SINGLETON) && + node && nd_type(node) != NODE_ZSUPER && + (mid == rb_intern("initialize") || mid == rb_intern("initialize_copy"))) { + noex = NOEX_PRIVATE | noex; + } + else if (FL_TEST(klass, FL_SINGLETON) && node + && nd_type(node) == NODE_CFUNC && mid == rb_intern("allocate")) { + rb_warn + ("defining %s.allocate is deprecated; use rb_define_alloc_func()", + rb_class2name(rb_iv_get(klass, "__attached__"))); + mid = ID_ALLOCATOR; + } + if (OBJ_FROZEN(klass)) { + rb_error_frozen("class/module"); + } + rb_clear_cache_by_id(mid); + + /* + * NODE_METHOD (NEW_METHOD(body, klass, vis)): + * nd_file : original id // RBASIC()->klass (TODO: dirty hack) + * nd_body : method body // (2) // mark + * nd_clss : klass // (1) // mark + * nd_noex : visibility // (3) + * + * NODE_FBODY (NEW_FBODY(method, alias)): + * nd_body : method (NODE_METHOD) // (2) // mark + * nd_oid : original id // (1) + * nd_cnt : alias count // (3) + */ + if (node) { + NODE *method = NEW_METHOD(node, klass, NOEX_WITH_SAFE(noex)); + method->nd_file = (void *)mid; + body = NEW_FBODY(method, mid); + } + else { + body = 0; + } + + { + /* check re-definition */ + st_data_t data; + NODE *old_node; + + if (st_lookup(RCLASS_M_TBL(klass), mid, &data)) { + old_node = (NODE *)data; + if (old_node) { + if (nd_type(old_node->nd_body->nd_body) == NODE_CFUNC) { + rb_vm_check_redefinition_opt_method(old_node); + } + if (RTEST(ruby_verbose) && node && old_node->nd_cnt == 0 && old_node->nd_body) { + rb_warning("method redefined; discarding old %s", rb_id2name(mid)); + } + } + } + if (klass == rb_cObject && node && mid == idInitialize) { + rb_warn("redefining Object#initialize may cause infinite loop"); + } + + if (mid == object_id || mid == id__send__) { + if (node && nd_type(node) == RUBY_VM_METHOD_NODE) { + rb_warn("redefining `%s' may cause serious problem", + rb_id2name(mid)); + } + } + } + + st_insert(RCLASS_M_TBL(klass), mid, (st_data_t) body); + + if (node && mid != ID_ALLOCATOR && ruby_running) { + if (FL_TEST(klass, FL_SINGLETON)) { + rb_funcall(rb_iv_get(klass, "__attached__"), singleton_added, 1, + ID2SYM(mid)); + } + else { + rb_funcall(klass, added, 1, ID2SYM(mid)); + } + } +} + +void +rb_define_alloc_func(VALUE klass, VALUE (*func)(VALUE)) +{ + Check_Type(klass, T_CLASS); + rb_add_method(rb_singleton_class(klass), ID_ALLOCATOR, NEW_CFUNC(func, 0), + NOEX_PRIVATE); +} + +void +rb_undef_alloc_func(VALUE klass) +{ + Check_Type(klass, T_CLASS); + rb_add_method(rb_singleton_class(klass), ID_ALLOCATOR, 0, NOEX_UNDEF); +} + +rb_alloc_func_t +rb_get_alloc_func(VALUE klass) +{ + NODE *n; + Check_Type(klass, T_CLASS); + n = rb_method_node(CLASS_OF(klass), ID_ALLOCATOR); + if (!n) return 0; + if (nd_type(n) != NODE_METHOD) return 0; + n = n->nd_body; + if (nd_type(n) != NODE_CFUNC) return 0; + return (rb_alloc_func_t)n->nd_cfnc; +} + +static NODE * +search_method(VALUE klass, ID id, VALUE *klassp) +{ + st_data_t body; + + if (!klass) { + return 0; + } + + while (!st_lookup(RCLASS_M_TBL(klass), id, &body)) { + klass = RCLASS_SUPER(klass); + if (!klass) + return 0; + } + + if (klassp) { + *klassp = klass; + } + + return (NODE *)body; +} + +/* + * search method body (NODE_METHOD) + * with : klass and id + * without : method cache + * + * if you need method node with method cache, use + * rb_method_node() + */ +NODE * +rb_get_method_body(VALUE klass, ID id, ID *idp) +{ + NODE *volatile fbody, *body; + NODE *method; + + if ((fbody = search_method(klass, id, 0)) == 0 || !fbody->nd_body) { + /* store empty info in cache */ + struct cache_entry *ent; + ent = cache + EXPR1(klass, id); + ent->klass = klass; + ent->mid = ent->mid0 = id; + ent->method = 0; + ent->oklass = 0; + return 0; + } + + method = fbody->nd_body; + + if (ruby_running) { + /* store in cache */ + struct cache_entry *ent; + ent = cache + EXPR1(klass, id); + ent->klass = klass; + ent->mid = id; + ent->mid0 = fbody->nd_oid; + ent->method = body = method; + ent->oklass = method->nd_clss; + } + else { + body = method; + } + + if (idp) { + *idp = fbody->nd_oid; + } + + return body; +} + +NODE * +rb_method_node(VALUE klass, ID id) +{ + struct cache_entry *ent; + + ent = cache + EXPR1(klass, id); + if (ent->mid == id && ent->klass == klass) { + if (ent->method) return ent->method; + return 0; + } + + return rb_get_method_body(klass, id, 0); +} + +void +rb_remove_method_id(VALUE klass, ID mid) +{ + st_data_t data; + NODE *body = 0; + + if (klass == rb_cObject) { + rb_secure(4); + } + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(klass)) { + rb_raise(rb_eSecurityError, "Insecure: can't remove method"); + } + if (OBJ_FROZEN(klass)) + rb_error_frozen("class/module"); + if (mid == object_id || mid == id__send__ || mid == idInitialize) { + rb_warn("removing `%s' may cause serious problem", rb_id2name(mid)); + } + if (st_lookup(RCLASS_M_TBL(klass), mid, &data)) { + body = (NODE *)data; + if (!body || !body->nd_body) body = 0; + else { + st_delete(RCLASS_M_TBL(klass), &mid, &data); + } + } + if (!body) { + rb_name_error(mid, "method `%s' not defined in %s", + rb_id2name(mid), rb_class2name(klass)); + } + + if (nd_type(body->nd_body->nd_body) == NODE_CFUNC) { + rb_vm_check_redefinition_opt_method(body); + } + + rb_clear_cache_for_undef(klass, mid); + if (FL_TEST(klass, FL_SINGLETON)) { + rb_funcall(rb_iv_get(klass, "__attached__"), singleton_removed, 1, + ID2SYM(mid)); + } + else { + rb_funcall(klass, removed, 1, ID2SYM(mid)); + } +} + +#define remove_method(klass, mid) rb_remove_method_id(klass, mid) + +void +rb_remove_method(VALUE klass, const char *name) +{ + remove_method(klass, rb_intern(name)); +} + +/* + * call-seq: + * remove_method(symbol) => self + * + * Removes the method identified by _symbol_ from the current + * class. For an example, see Module.undef_method. + */ + +static VALUE +rb_mod_remove_method(int argc, VALUE *argv, VALUE mod) +{ + int i; + + for (i = 0; i < argc; i++) { + remove_method(mod, rb_to_id(argv[i])); + } + return mod; +} + +#undef rb_disable_super +#undef rb_enable_super + +void +rb_disable_super(VALUE klass, const char *name) +{ + /* obsolete - no use */ +} + +void +rb_enable_super(VALUE klass, const char *name) +{ + rb_warning("rb_enable_super() is obsolete"); +} + +static void +rb_export_method(VALUE klass, ID name, ID noex) +{ + NODE *fbody; + VALUE origin; + + if (klass == rb_cObject) { + rb_secure(4); + } + fbody = search_method(klass, name, &origin); + if (!fbody && TYPE(klass) == T_MODULE) { + fbody = search_method(rb_cObject, name, &origin); + } + if (!fbody || !fbody->nd_body) { + rb_print_undef(klass, name, 0); + } + if (fbody->nd_body->nd_noex != noex) { + if (nd_type(fbody->nd_body->nd_body) == NODE_CFUNC) { + rb_vm_check_redefinition_opt_method(fbody); + } + if (klass == origin) { + fbody->nd_body->nd_noex = noex; + } + else { + rb_add_method(klass, name, NEW_ZSUPER(), noex); + } + } +} + +int +rb_method_boundp(VALUE klass, ID id, int ex) +{ + NODE *method; + + if ((method = rb_method_node(klass, id)) != 0) { + if (ex && (method->nd_noex & NOEX_PRIVATE)) { + return Qfalse; + } + return Qtrue; + } + return Qfalse; +} + +void +rb_attr(VALUE klass, ID id, int read, int write, int ex) +{ + const char *name; + ID attriv; + VALUE aname; + int noex; + + if (!ex) { + noex = NOEX_PUBLIC; + } + else { + if (SCOPE_TEST(NOEX_PRIVATE)) { + noex = NOEX_PRIVATE; + rb_warning((SCOPE_CHECK(NOEX_MODFUNC)) ? + "attribute accessor as module_function" : + "private attribute?"); + } + else if (SCOPE_TEST(NOEX_PROTECTED)) { + noex = NOEX_PROTECTED; + } + else { + noex = NOEX_PUBLIC; + } + } + + if (!rb_is_local_id(id) && !rb_is_const_id(id)) { + rb_name_error(id, "invalid attribute name `%s'", rb_id2name(id)); + } + name = rb_id2name(id); + if (!name) { + rb_raise(rb_eArgError, "argument needs to be symbol or string"); + } + aname = rb_sprintf("@%s", name); + rb_enc_copy(aname, rb_id2str(id)); + attriv = rb_intern_str(aname); + if (read) { + rb_add_method(klass, id, NEW_IVAR(attriv), noex); + } + if (write) { + rb_add_method(klass, rb_id_attrset(id), NEW_ATTRSET(attriv), noex); + } +} + +void +rb_undef(VALUE klass, ID id) +{ + VALUE origin; + NODE *body; + + if (rb_vm_cbase() == rb_cObject && klass == rb_cObject) { + rb_secure(4); + } + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(klass)) { + rb_raise(rb_eSecurityError, "Insecure: can't undef `%s'", + rb_id2name(id)); + } + rb_frozen_class_p(klass); + if (id == object_id || id == id__send__ || id == idInitialize) { + rb_warn("undefining `%s' may cause serious problem", rb_id2name(id)); + } + body = search_method(klass, id, &origin); + if (!body || !body->nd_body) { + const char *s0 = " class"; + VALUE c = klass; + + if (FL_TEST(c, FL_SINGLETON)) { + VALUE obj = rb_iv_get(klass, "__attached__"); + + switch (TYPE(obj)) { + case T_MODULE: + case T_CLASS: + c = obj; + s0 = ""; + } + } + else if (TYPE(c) == T_MODULE) { + s0 = " module"; + } + rb_name_error(id, "undefined method `%s' for%s `%s'", + rb_id2name(id), s0, rb_class2name(c)); + } + + rb_add_method(klass, id, 0, NOEX_PUBLIC); + + if (FL_TEST(klass, FL_SINGLETON)) { + rb_funcall(rb_iv_get(klass, "__attached__"), + singleton_undefined, 1, ID2SYM(id)); + } + else { + rb_funcall(klass, undefined, 1, ID2SYM(id)); + } +} + +/* + * call-seq: + * undef_method(symbol) => self + * + * Prevents the current class from responding to calls to the named + * method. Contrast this with remove_method, which deletes + * the method from the particular class; Ruby will still search + * superclasses and mixed-in modules for a possible receiver. + * + * class Parent + * def hello + * puts "In parent" + * end + * end + * class Child < Parent + * def hello + * puts "In child" + * end + * end + * + * + * c = Child.new + * c.hello + * + * + * class Child + * remove_method :hello # remove from child, still in parent + * end + * c.hello + * + * + * class Child + * undef_method :hello # prevent any calls to 'hello' + * end + * c.hello + * + * produces: + * + * In child + * In parent + * prog.rb:23: undefined method `hello' for # (NoMethodError) + */ + +static VALUE +rb_mod_undef_method(int argc, VALUE *argv, VALUE mod) +{ + int i; + for (i = 0; i < argc; i++) { + rb_undef(mod, rb_to_id(argv[i])); + } + return mod; +} + +/* + * call-seq: + * mod.method_defined?(symbol) => true or false + * + * Returns +true+ if the named method is defined by + * _mod_ (or its included modules and, if _mod_ is a class, + * its ancestors). Public and protected methods are matched. + * + * module A + * def method1() end + * end + * class B + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.method_defined? "method1" #=> true + * C.method_defined? "method2" #=> true + * C.method_defined? "method3" #=> true + * C.method_defined? "method4" #=> false + */ + +static VALUE +rb_mod_method_defined(VALUE mod, VALUE mid) +{ + return rb_method_boundp(mod, rb_to_id(mid), 1); +} + +#define VISI_CHECK(x,f) (((x)&NOEX_MASK) == (f)) + +/* + * call-seq: + * mod.public_method_defined?(symbol) => true or false + * + * Returns +true+ if the named public method is defined by + * _mod_ (or its included modules and, if _mod_ is a class, + * its ancestors). + * + * module A + * def method1() end + * end + * class B + * protected + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.public_method_defined? "method1" #=> true + * C.public_method_defined? "method2" #=> false + * C.method_defined? "method2" #=> true + */ + +static VALUE +rb_mod_public_method_defined(VALUE mod, VALUE mid) +{ + ID id = rb_to_id(mid); + NODE *method; + + method = rb_method_node(mod, id); + if (method) { + if (VISI_CHECK(method->nd_noex, NOEX_PUBLIC)) + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * mod.private_method_defined?(symbol) => true or false + * + * Returns +true+ if the named private method is defined by + * _ mod_ (or its included modules and, if _mod_ is a class, + * its ancestors). + * + * module A + * def method1() end + * end + * class B + * private + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.private_method_defined? "method1" #=> false + * C.private_method_defined? "method2" #=> true + * C.method_defined? "method2" #=> false + */ + +static VALUE +rb_mod_private_method_defined(VALUE mod, VALUE mid) +{ + ID id = rb_to_id(mid); + NODE *method; + + method = rb_method_node(mod, id); + if (method) { + if (VISI_CHECK(method->nd_noex, NOEX_PRIVATE)) + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * mod.protected_method_defined?(symbol) => true or false + * + * Returns +true+ if the named protected method is defined + * by _mod_ (or its included modules and, if _mod_ is a + * class, its ancestors). + * + * module A + * def method1() end + * end + * class B + * protected + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.protected_method_defined? "method1" #=> false + * C.protected_method_defined? "method2" #=> true + * C.method_defined? "method2" #=> true + */ + +static VALUE +rb_mod_protected_method_defined(VALUE mod, VALUE mid) +{ + ID id = rb_to_id(mid); + NODE *method; + + method = rb_method_node(mod, id); + if (method) { + if (VISI_CHECK(method->nd_noex, NOEX_PROTECTED)) + return Qtrue; + } + return Qfalse; +} + +void +rb_alias(VALUE klass, ID name, ID def) +{ + NODE *orig_fbody, *node, *method; + VALUE singleton = 0; + st_data_t data; + + rb_frozen_class_p(klass); + if (klass == rb_cObject) { + rb_secure(4); + } + orig_fbody = search_method(klass, def, 0); + if (!orig_fbody || !orig_fbody->nd_body) { + if (TYPE(klass) == T_MODULE) { + orig_fbody = search_method(rb_cObject, def, 0); + } + } + if (!orig_fbody || !orig_fbody->nd_body) { + rb_print_undef(klass, def, 0); + } + if (FL_TEST(klass, FL_SINGLETON)) { + singleton = rb_iv_get(klass, "__attached__"); + } + + orig_fbody->nd_cnt++; + + if (st_lookup(RCLASS_M_TBL(klass), name, &data)) { + node = (NODE *)data; + if (node) { + if (RTEST(ruby_verbose) && node->nd_cnt == 0 && node->nd_body) { + rb_warning("discarding old %s", rb_id2name(name)); + } + if (nd_type(node->nd_body->nd_body) == NODE_CFUNC) { + rb_vm_check_redefinition_opt_method(node); + } + } + } + + st_insert(RCLASS_M_TBL(klass), name, + (st_data_t) NEW_FBODY( + method = NEW_METHOD(orig_fbody->nd_body->nd_body, + orig_fbody->nd_body->nd_clss, + NOEX_WITH_SAFE(orig_fbody->nd_body->nd_noex)), def)); + method->nd_file = (void *)def; + + rb_clear_cache_by_id(name); + + if (!ruby_running) return; + + if (singleton) { + rb_funcall(singleton, singleton_added, 1, ID2SYM(name)); + } + else { + rb_funcall(klass, added, 1, ID2SYM(name)); + } +} + +/* + * call-seq: + * alias_method(new_name, old_name) => self + * + * Makes new_name a new copy of the method old_name. This can + * be used to retain access to methods that are overridden. + * + * module Mod + * alias_method :orig_exit, :exit + * def exit(code=0) + * puts "Exiting with code #{code}" + * orig_exit(code) + * end + * end + * include Mod + * exit(99) + * + * produces: + * + * Exiting with code 99 + */ + +static VALUE +rb_mod_alias_method(VALUE mod, VALUE newname, VALUE oldname) +{ + rb_alias(mod, rb_to_id(newname), rb_to_id(oldname)); + return mod; +} + +static void +secure_visibility(VALUE self) +{ + if (rb_safe_level() >= 4 && !OBJ_UNTRUSTED(self)) { + rb_raise(rb_eSecurityError, + "Insecure: can't change method visibility"); + } +} + +static void +set_method_visibility(VALUE self, int argc, VALUE *argv, ID ex) +{ + int i; + secure_visibility(self); + for (i = 0; i < argc; i++) { + rb_export_method(self, rb_to_id(argv[i]), ex); + } + rb_clear_cache_by_class(self); +} + +/* + * call-seq: + * public => self + * public(symbol, ...) => self + * + * With no arguments, sets the default visibility for subsequently + * defined methods to public. With arguments, sets the named methods to + * have public visibility. + */ + +static VALUE +rb_mod_public(int argc, VALUE *argv, VALUE module) +{ + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(NOEX_PUBLIC); + } + else { + set_method_visibility(module, argc, argv, NOEX_PUBLIC); + } + return module; +} + +/* + * call-seq: + * protected => self + * protected(symbol, ...) => self + * + * With no arguments, sets the default visibility for subsequently + * defined methods to protected. With arguments, sets the named methods + * to have protected visibility. + */ + +static VALUE +rb_mod_protected(int argc, VALUE *argv, VALUE module) +{ + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(NOEX_PROTECTED); + } + else { + set_method_visibility(module, argc, argv, NOEX_PROTECTED); + } + return module; +} + +/* + * call-seq: + * private => self + * private(symbol, ...) => self + * + * With no arguments, sets the default visibility for subsequently + * defined methods to private. With arguments, sets the named methods + * to have private visibility. + * + * module Mod + * def a() end + * def b() end + * private + * def c() end + * private :a + * end + * Mod.private_instance_methods #=> [:a, :c] + */ + +static VALUE +rb_mod_private(int argc, VALUE *argv, VALUE module) +{ + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(NOEX_PRIVATE); + } + else { + set_method_visibility(module, argc, argv, NOEX_PRIVATE); + } + return module; +} + +/* + * call-seq: + * mod.public_class_method(symbol, ...) => mod + * + * Makes a list of existing class methods public. + */ + +static VALUE +rb_mod_public_method(int argc, VALUE *argv, VALUE obj) +{ + set_method_visibility(CLASS_OF(obj), argc, argv, NOEX_PUBLIC); + return obj; +} + +/* + * call-seq: + * mod.private_class_method(symbol, ...) => mod + * + * Makes existing class methods private. Often used to hide the default + * constructor new. + * + * class SimpleSingleton # Not thread safe + * private_class_method :new + * def SimpleSingleton.create(*args, &block) + * @me = new(*args, &block) if ! @me + * @me + * end + * end + */ + +static VALUE +rb_mod_private_method(int argc, VALUE *argv, VALUE obj) +{ + set_method_visibility(CLASS_OF(obj), argc, argv, NOEX_PRIVATE); + return obj; +} + +/* + * call-seq: + * public + * public(symbol, ...) + * + * With no arguments, sets the default visibility for subsequently + * defined methods to public. With arguments, sets the named methods to + * have public visibility. + */ + +static VALUE +top_public(int argc, VALUE *argv) +{ + return rb_mod_public(argc, argv, rb_cObject); +} + +static VALUE +top_private(int argc, VALUE *argv) +{ + return rb_mod_private(argc, argv, rb_cObject); +} + +/* + * call-seq: + * module_function(symbol, ...) => self + * + * Creates module functions for the named methods. These functions may + * be called with the module as a receiver, and also become available + * as instance methods to classes that mix in the module. Module + * functions are copies of the original, and so may be changed + * independently. The instance-method versions are made private. If + * used with no arguments, subsequently defined methods become module + * functions. + * + * module Mod + * def one + * "This is one" + * end + * module_function :one + * end + * class Cls + * include Mod + * def callOne + * one + * end + * end + * Mod.one #=> "This is one" + * c = Cls.new + * c.callOne #=> "This is one" + * module Mod + * def one + * "This is the new one" + * end + * end + * Mod.one #=> "This is one" + * c.callOne #=> "This is the new one" + */ + +static VALUE +rb_mod_modfunc(int argc, VALUE *argv, VALUE module) +{ + int i; + ID id; + NODE *fbody; + + if (TYPE(module) != T_MODULE) { + rb_raise(rb_eTypeError, "module_function must be called for modules"); + } + + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(NOEX_MODFUNC); + return module; + } + + set_method_visibility(module, argc, argv, NOEX_PRIVATE); + + for (i = 0; i < argc; i++) { + VALUE m = module; + + id = rb_to_id(argv[i]); + for (;;) { + fbody = search_method(m, id, &m); + if (fbody == 0) { + fbody = search_method(rb_cObject, id, &m); + } + if (fbody == 0 || fbody->nd_body == 0) { + rb_print_undef(module, id, 0); + } + if (nd_type(fbody->nd_body->nd_body) != NODE_ZSUPER) { + break; /* normal case: need not to follow 'super' link */ + } + m = RCLASS_SUPER(m); + if (!m) + break; + } + rb_add_method(rb_singleton_class(module), id, fbody->nd_body->nd_body, + NOEX_PUBLIC); + } + return module; +} + +int +rb_method_basic_definition_p(VALUE klass, ID id) +{ + NODE *node = rb_method_node(klass, id); + if (node && (node->nd_noex & NOEX_BASIC)) + return 1; + return 0; +} + +int +rb_obj_respond_to(VALUE obj, ID id, int priv) +{ + VALUE klass = CLASS_OF(obj); + + if (rb_method_basic_definition_p(klass, idRespond_to)) { + return rb_method_boundp(klass, id, !priv); + } + else { + VALUE args[2]; + int n = 0; + args[n++] = ID2SYM(id); + if (priv) + args[n++] = Qtrue; + return RTEST(rb_funcall2(obj, idRespond_to, n, args)); + } +} + +int +rb_respond_to(VALUE obj, ID id) +{ + return rb_obj_respond_to(obj, id, Qfalse); +} + +/* + * call-seq: + * obj.respond_to?(symbol, include_private=false) => true or false + * + * Returns +true+ if _obj_ responds to the given + * method. Private methods are included in the search only if the + * optional second parameter evaluates to +true+. + */ + +static VALUE +obj_respond_to(int argc, VALUE *argv, VALUE obj) +{ + VALUE mid, priv; + ID id; + + rb_scan_args(argc, argv, "11", &mid, &priv); + id = rb_to_id(mid); + if (rb_method_boundp(CLASS_OF(obj), id, !RTEST(priv))) { + return Qtrue; + } + return Qfalse; +} + +void +Init_eval_method(void) +{ +#undef rb_intern +#define rb_intern(str) rb_intern_const(str) + + rb_define_method(rb_mKernel, "respond_to?", obj_respond_to, -1); + + rb_define_private_method(rb_cModule, "remove_method", rb_mod_remove_method, -1); + rb_define_private_method(rb_cModule, "undef_method", rb_mod_undef_method, -1); + rb_define_private_method(rb_cModule, "alias_method", rb_mod_alias_method, 2); + rb_define_private_method(rb_cModule, "public", rb_mod_public, -1); + rb_define_private_method(rb_cModule, "protected", rb_mod_protected, -1); + rb_define_private_method(rb_cModule, "private", rb_mod_private, -1); + rb_define_private_method(rb_cModule, "module_function", rb_mod_modfunc, -1); + + rb_define_method(rb_cModule, "method_defined?", rb_mod_method_defined, 1); + rb_define_method(rb_cModule, "public_method_defined?", rb_mod_public_method_defined, 1); + rb_define_method(rb_cModule, "private_method_defined?", rb_mod_private_method_defined, 1); + rb_define_method(rb_cModule, "protected_method_defined?", rb_mod_protected_method_defined, 1); + rb_define_method(rb_cModule, "public_class_method", rb_mod_public_method, -1); + rb_define_method(rb_cModule, "private_class_method", rb_mod_private_method, -1); + + rb_define_singleton_method(rb_vm_top_self(), "public", top_public, -1); + rb_define_singleton_method(rb_vm_top_self(), "private", top_private, -1); + + object_id = rb_intern("object_id"); + added = rb_intern("method_added"); + singleton_added = rb_intern("singleton_method_added"); + removed = rb_intern("method_removed"); + singleton_removed = rb_intern("singleton_method_removed"); + undefined = rb_intern("method_undefined"); + singleton_undefined = rb_intern("singleton_method_undefined"); +} + diff --git a/vm_opts.h b/vm_opts.h new file mode 100644 index 0000000..13baea0 --- /dev/null +++ b/vm_opts.h @@ -0,0 +1,51 @@ +/*-*-c-*-*/ +/********************************************************************** + + vm_opts.h - VM optimize option + + $Author: ko1 $ + + Copyright (C) 2004-2007 Koichi Sasada + +**********************************************************************/ + + +#ifndef RUBY_VM_OPTS_H +#define RUBY_VM_OPTS_H + +/* Compile options. + * You can change these options at runtime by VM::CompileOption. + * Following definitions are default values. + */ + +#define OPT_TRACE_INSTRUCTION 1 +#define OPT_TAILCALL_OPTIMIZATION 0 +#define OPT_PEEPHOLE_OPTIMIZATION 1 +#define OPT_SPECIALISED_INSTRUCTION 1 +#define OPT_INLINE_CONST_CACHE 1 + + +/* Build Options. + * You can't change these options at runtime. + */ + +/* C compiler depend */ +#define OPT_DIRECT_THREADED_CODE 1 +#define OPT_TOKEN_THREADED_CODE 0 +#define OPT_CALL_THREADED_CODE 0 + +/* VM running option */ +#define OPT_CHECKED_RUN 1 +#define OPT_INLINE_METHOD_CACHE 1 +#define OPT_BLOCKINLINING 0 + +/* architecture independent, affects generated code */ +#define OPT_OPERANDS_UNIFICATION 0 +#define OPT_INSTRUCTIONS_UNIFICATION 0 +#define OPT_UNIFY_ALL_COMBINATION 0 +#define OPT_STACK_CACHING 0 + +/* misc */ +#define SUPPORT_JOKE 0 + +#endif /* RUBY_VM_OPTS_H */