From 509f9c4fe3a4e7a0b9e6293ab539a3ea3330b3e5 Mon Sep 17 00:00:00 2001 From: Shunsuke Shibayama Date: Wed, 6 Sep 2023 11:59:24 +0900 Subject: [PATCH] fix: pyc execution --- crates/erg_common/python_util.rs | 181 +++++++++++++----------- crates/erg_compiler/ty/codeobj.rs | 19 ++- doc/EN/python/bytecode_specification.md | 3 +- doc/JA/python/bytecode_specification.md | 3 +- src/dummy.rs | 2 +- tests/repl.rs | 7 +- 6 files changed, 121 insertions(+), 94 deletions(-) diff --git a/crates/erg_common/python_util.rs b/crates/erg_common/python_util.rs index 0ac7c147f..22396f71d 100644 --- a/crates/erg_common/python_util.rs +++ b/crates/erg_common/python_util.rs @@ -1,8 +1,8 @@ //! utilities for calling CPython. //! //! CPythonを呼び出すためのユーティリティー -use std::env; -use std::fs::{self, File}; +use std::env::{current_dir, set_current_dir, temp_dir}; +use std::fs::{canonicalize, remove_file, File}; use std::io::Write; use std::path::{Path, PathBuf}; use std::process::{Command, ExitStatus, Stdio}; @@ -10,6 +10,7 @@ use std::process::{Command, ExitStatus, Stdio}; use crate::fn_name_full; use crate::io::Output; use crate::pathutil::remove_verbatim; +use crate::random::random; use crate::serialize::get_magic_num_from_bytes; #[cfg(unix)] @@ -552,6 +553,10 @@ pub const EXT_COMMON_ALIAS: [&str; 7] = [ "urllib3", ]; +fn escape_py_code(code: &str) -> String { + code.replace('"', "\\\"").replace('`', "\\`") +} + pub fn opt_which_python() -> Result { let (cmd, python) = if cfg!(windows) { ("where", "python") @@ -719,7 +724,7 @@ pub fn env_python_version() -> PythonVersion { } pub fn get_sys_path(working_dir: Option<&Path>) -> Result, std::io::Error> { - let working_dir = fs::canonicalize(working_dir.unwrap_or(Path::new(""))).unwrap_or_default(); + let working_dir = canonicalize(working_dir.unwrap_or(Path::new(""))).unwrap_or_default(); let working_dir = remove_verbatim(&working_dir); let py_command = opt_which_python().map_err(|e| { std::io::Error::new( @@ -750,13 +755,61 @@ pub fn get_sys_path(working_dir: Option<&Path>) -> Result, std::io: Ok(res) } +fn exec_pyc_in( + file: impl AsRef, + py_command: Option<&str>, + working_dir: impl AsRef, + args: &[&str], + stdout: impl Into, +) -> std::io::Result { + let current_dir = current_dir()?; + set_current_dir(working_dir.as_ref())?; + let code = format!( + "import marshal; exec(marshal.loads(open(r\"{}\", \"rb\").read()[16:]))", + file.as_ref().display() + ); + let command = py_command + .map(ToString::to_string) + .unwrap_or(which_python()); + let mut out = if cfg!(windows) { + Command::new("cmd") + .arg("/C") + .arg(command) + .arg("-c") + .arg(code) + .args(args) + .stdout(stdout) + .spawn() + .expect("cannot execute python") + } else { + let exec_command = format!( + "{command} -c \"{}\" {}", + escape_py_code(&code), + args.join(" ") + ); + Command::new("sh") + .arg("-c") + .arg(exec_command) + .stdout(stdout) + .spawn() + .expect("cannot execute python") + }; + let res = out.wait(); + set_current_dir(current_dir)?; + res +} + /// executes over a shell, cause `python` may not exist as an executable file (like pyenv) -pub fn exec_pyc, T: Into>( - file: S, +pub fn exec_pyc( + file: impl AsRef, py_command: Option<&str>, - argv: &[&'static str], - stdout: T, -) -> Option { + working_dir: Option>, + args: &[&str], + stdout: impl Into, +) -> std::io::Result { + if let Some(working_dir) = working_dir { + return exec_pyc_in(file, py_command, working_dir, args, stdout); + } let command = py_command .map(ToString::to_string) .unwrap_or_else(which_python); @@ -764,13 +817,13 @@ pub fn exec_pyc, T: Into>( Command::new("cmd") .arg("/C") .arg(command) - .arg(&file.into()) - .args(argv) + .arg(file.as_ref()) + .args(args) .stdout(stdout) .spawn() .expect("cannot execute python") } else { - let exec_command = format!("{command} {} {}", file.into(), argv.join(" ")); + let exec_command = format!("{command} {} {}", file.as_ref().display(), args.join(" ")); Command::new("sh") .arg("-c") .arg(exec_command) @@ -778,7 +831,7 @@ pub fn exec_pyc, T: Into>( .spawn() .expect("cannot execute python") }; - out.wait().expect("python doesn't work").code() + out.wait() } /// evaluates over a shell, cause `python` may not exist as an executable file (like pyenv) @@ -805,21 +858,21 @@ pub fn _eval_pyc>(file: S, py_command: Option<&str>) -> String { String::from_utf8_lossy(&out.stdout).to_string() } -pub fn exec_py(file: &str) -> Option { +pub fn exec_py(file: impl AsRef) -> std::io::Result { let mut child = if cfg!(windows) { Command::new(which_python()) - .arg(file) + .arg(file.as_ref()) .spawn() .expect("cannot execute python") } else { - let exec_command = format!("{} {file}", which_python()); + let exec_command = format!("{} {}", which_python(), file.as_ref().display()); Command::new("sh") .arg("-c") .arg(exec_command) .spawn() .expect("cannot execute python") }; - child.wait().expect("python doesn't work").code() + child.wait() } pub fn env_spawn_py(code: &str) { @@ -830,7 +883,7 @@ pub fn env_spawn_py(code: &str) { .spawn() .expect("cannot execute python"); } else { - let exec_command = format!("{} -c \"{}\"", which_python(), code); + let exec_command = format!("{} -c \"{}\"", which_python(), escape_py_code(code)); Command::new("sh") .arg("-c") .arg(exec_command) @@ -847,7 +900,11 @@ pub fn spawn_py(py_command: Option<&str>, code: &str) { .spawn() .expect("cannot execute python"); } else { - let exec_command = format!("{} -c \"{}\"", py_command.unwrap_or(&which_python()), code); + let exec_command = format!( + "{} -c \"{}\"", + py_command.unwrap_or(&which_python()), + escape_py_code(code) + ); Command::new("sh") .arg("-c") .arg(exec_command) @@ -856,83 +913,37 @@ pub fn spawn_py(py_command: Option<&str>, code: &str) { } } -pub fn exec_py_code(code: &str, args: &[&str], output: Output) -> std::io::Result { - let mut out = if cfg!(windows) { - let fallback = |err: std::io::Error| { - // if the filename or extension is too long - // create a temporary file and execute it - if err.raw_os_error() == Some(206) { - let tmp_dir = env::temp_dir(); - let tmp_file = tmp_dir.join("tmp.py"); - File::create(&tmp_file) - .unwrap() - .write_all(code.as_bytes()) - .unwrap(); - Command::new(which_python()) - .arg(tmp_file) - .args(args) - .stdout(output.clone()) - .spawn() - } else { - Err(err) - } - }; - Command::new(which_python()) - .arg("-c") - .arg(code) - .args(args) - .stdout(output.clone()) - .spawn() - .or_else(fallback) - .expect("cannot execute python") - } else { - let code = code.replace('"', "\\\"").replace('`', "\\`"); - let exec_command = format!("{} -c \"{code}\" {}", which_python(), args.join(" ")); - Command::new("sh") - .arg("-c") - .arg(exec_command) - .stdout(output) - .spawn() - .expect("cannot execute python") - }; - out.wait() +pub fn exec_pyc_code(code: &[u8], args: &[&str], output: Output) -> std::io::Result { + let tmp_dir = temp_dir(); + let tmp_file = tmp_dir.join(format!("{}.pyc", random())); + File::create(&tmp_file).unwrap().write_all(code).unwrap(); + let res = exec_pyc(&tmp_file, None, current_dir().ok(), args, output); + remove_file(tmp_file)?; + res } pub fn exec_py_code_with_output( code: &str, args: &[&str], ) -> std::io::Result { + let tmp_dir = temp_dir(); + let tmp_file = tmp_dir.join(format!("{}.py", random())); + File::create(&tmp_file) + .unwrap() + .write_all(code.as_bytes()) + .unwrap(); + let command = which_python(); let out = if cfg!(windows) { - let fallback = |err: std::io::Error| { - // if the filename or extension is too long - // create a temporary file and execute it - if err.raw_os_error() == Some(206) { - let tmp_dir = env::temp_dir(); - let tmp_file = tmp_dir.join("tmp.py"); - File::create(&tmp_file) - .unwrap() - .write_all(code.as_bytes()) - .unwrap(); - Command::new(which_python()) - .arg(tmp_file) - .args(args) - .stdout(Stdio::piped()) - .spawn() - } else { - Err(err) - } - }; - Command::new(which_python()) - .arg("-c") - .arg(code) + Command::new("cmd") + .arg("/C") + .arg(command) + .arg(&tmp_file) .args(args) .stdout(Stdio::piped()) .spawn() - .or_else(fallback) .expect("cannot execute python") } else { - let code = code.replace('"', "\\\"").replace('`', "\\`"); - let exec_command = format!("{} -c \"{code}\" {}", which_python(), args.join(" ")); + let exec_command = format!("{command} {} {}", tmp_file.display(), args.join(" ")); Command::new("sh") .arg("-c") .arg(exec_command) @@ -940,5 +951,7 @@ pub fn exec_py_code_with_output( .spawn() .expect("cannot execute python") }; - out.wait_with_output() + let res = out.wait_with_output(); + remove_file(tmp_file)?; + res } diff --git a/crates/erg_compiler/ty/codeobj.rs b/crates/erg_compiler/ty/codeobj.rs index f47feccb7..e4a8917e0 100644 --- a/crates/erg_compiler/ty/codeobj.rs +++ b/crates/erg_compiler/ty/codeobj.rs @@ -14,7 +14,7 @@ use erg_common::opcode308::Opcode308; use erg_common::opcode309::Opcode309; use erg_common::opcode310::Opcode310; use erg_common::opcode311::{BinOpCode, Opcode311}; -use erg_common::python_util::{env_magic_number, exec_py_code, PythonVersion}; +use erg_common::python_util::{env_magic_number, exec_pyc_code, PythonVersion}; use erg_common::serialize::*; use erg_common::Str; @@ -443,6 +443,12 @@ impl CodeObj { py_magic_num: Option, ) -> std::io::Result<()> { let mut file = File::create(path)?; + let bytes = self.into_bytecode(py_magic_num); + file.write_all(&bytes[..])?; + Ok(()) + } + + pub fn into_bytecode(self, py_magic_num: Option) -> Vec { let mut bytes = Vec::with_capacity(16); let py_magic_num = py_magic_num.unwrap_or_else(env_magic_number); let python_ver = get_ver_from_magic_num(py_magic_num); @@ -451,11 +457,12 @@ impl CodeObj { bytes.append(&mut get_timestamp_bytes().to_vec()); bytes.append(&mut vec![0; 4]); // padding bytes.append(&mut self.into_bytes(python_ver)); - file.write_all(&bytes[..])?; - Ok(()) + bytes } - pub fn executable_code(self, py_magic_num: Option) -> String { + /// Embed bytecode in a Python script. + /// This may generate a huge script, so don't pass it to `python -c`, but dump the bytecode and exec it. + pub fn into_script(self, py_magic_num: Option) -> String { let mut bytes = Vec::with_capacity(16); let py_magic_num = py_magic_num.unwrap_or_else(env_magic_number); let python_ver = get_ver_from_magic_num(py_magic_num); @@ -468,8 +475,8 @@ impl CodeObj { } pub fn exec(self, cfg: &ErgConfig) -> std::io::Result { - exec_py_code( - &self.executable_code(cfg.py_magic_num), + exec_pyc_code( + &self.into_bytecode(cfg.py_magic_num), &cfg.runtime_args, cfg.output.clone(), ) diff --git a/doc/EN/python/bytecode_specification.md b/doc/EN/python/bytecode_specification.md index 9cf67b88a..8f25a7bed 100644 --- a/doc/EN/python/bytecode_specification.md +++ b/doc/EN/python/bytecode_specification.md @@ -5,7 +5,8 @@ * 0~3 byte(u32): magic number (see common/bytecode.rs for details) * 4~7 byte(u32): 0 padding * 8~12 byte(u32): timestamp -* 13~ byte(PyCodeObject): code object +* 13~16 byte(u32): 0 padding +* 17~ byte(PyCodeObject): code object ## PyCodeObject diff --git a/doc/JA/python/bytecode_specification.md b/doc/JA/python/bytecode_specification.md index b4048535e..89d931b20 100644 --- a/doc/JA/python/bytecode_specification.md +++ b/doc/JA/python/bytecode_specification.md @@ -7,7 +7,8 @@ * 0~3 byte(u32): magic number (see common/bytecode.rs for details) * 4~7 byte(u32): 0 padding * 8~12 byte(u32): timestamp -* 13~ byte(PyCodeObject): code object +* 13~16 byte(u32): 0 padding +* 17~ byte(PyCodeObject): code object ## PyCodeObject diff --git a/src/dummy.rs b/src/dummy.rs index badff97d0..f5a90d0bb 100644 --- a/src/dummy.rs +++ b/src/dummy.rs @@ -318,7 +318,7 @@ impl Runnable for DummyVM { if let Err(err) = self.stream.as_mut().unwrap().send_msg(&Message::new( Inst::Execute, Some( - code.executable_code(self.compiler.cfg.py_magic_num) + code.into_script(self.compiler.cfg.py_magic_num) .into_bytes(), ), )) { diff --git a/tests/repl.rs b/tests/repl.rs index d674df803..2f310647c 100644 --- a/tests/repl.rs +++ b/tests/repl.rs @@ -198,6 +198,11 @@ fn exec_repl_invalid_def_after_the_at_sign() -> Result<(), ()> { #[test] #[ignore] fn exec_repl_server_mock_test() -> Result<(), ()> { - assert_eq!(exec_py("src/scripts/repl_server_test.py"), Some(0)); + assert_eq!( + exec_py("src/scripts/repl_server_test.py") + .ok() + .and_then(|s| s.code()), + Some(0) + ); Ok(()) }