From 40dc70d2f9c4c60fffce97566ea5c75e9836a522 Mon Sep 17 00:00:00 2001 From: Charles Samborski Date: Fri, 21 Feb 2020 02:28:48 +0100 Subject: [PATCH] Update to avm1-types 0.10 This commit updates the AVM1 libraries to the version `0.10`. With this version, the general structure of these libraries should be complete. Initially, AVM1 support was included in the SWF libraries (`swf-types`, `swf-parser`). It used the model defined by Adobe's SWF spec were you can parse actions into a vec by simply reading them sequentially. It quickly occurred that this model is too simple and did not reflect how the player interprets bytecode. Adobe's interpreter supports jumps to arbitrary offsets, it treats the bytecode as an opaque buffer and reads only one action at a time. Because of this, parsing AVM1 actions ahead of time became much more complicated and AVM1 support was moved to its own libraries. Initially, there was only support for reading a single low-level action at a time. This was the only API provided by the version used until now by Flashback. And Flashback used it to read the actions sequentially (as in Adobe's spec). The issue with this version was that you were on your own to perform static analysis of AVM1 bytecode. It also meant that control-flow actions such as `Jump` or `If` were tightly dependent on the actual encoding because they included byte offsets. The various versions of the AVM1 libraries were focused on bringing back support for static analysis of AVM1 bytecode. The latest versions solve this by providing two "modes" to view actions: `raw` or `cfg`. The raw mode corresponds to how the interpreter reads bytecode: a single action at a time, using byte offsets for control flow. The Control Flow Graph (CFG) mode represents the code as a graph were nodes correspond to linear sections of code (where you can safely advance through the sequence of actions) and edges are jumps in the code. The graph itself is represented as a non-empty vector of blocks. Each block has a unique label, a list of simple actions (with no impact on control flow) and a flow action. The flow action describes the outgoing edges and how they are chosen. The target of the jump is identified by its label, the value `None` means the end of the current function. The two main variants are `CfgFlow::Simple` for unconditional jumps and `CfgFlow::If` for jumps based on truthiness of the top of the stack. In the case of Flashback, AVM1 support was minimal. Thanks to this, updating the AVM1 libraries to their latest version was fairly easy. For AVM1 bytecode that does not use any form of control flow (`Jump`, `If`, `WaitForFrame`, `Throw`, etc.) the behavior should be the same. For AVM1 bytecode _with_ control flow, this commit introduce a difference. The previous implementation ignored any form of control flow and just ran everything (e.g. both branches of an `If` were executed). The new CFG representation forces consumers to handle it properly. In this commit the compiler simply stops when it hits its first control-flow action. Support for control-flow may require larger changes that are best left for some future commit in my opinion. --- Cargo.toml | 4 +-- src/avm1.rs | 84 ++++++++++++++++++++++++++++------------------------- 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d12476d..45094e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,8 +10,8 @@ readme = "README.md" description = "Adobe Flash / SWF preservation tools." [dependencies] -avm1-parser = "0.2.0" -avm1-tree = "0.2.1" +avm1-parser = "0.10.0" +avm1-types = "0.10.0" swf-parser = "0.11.0" swf-types = "0.11.0" svg = "0.5.12" diff --git a/src/avm1.rs b/src/avm1.rs index 12f03ff..54b8218 100644 --- a/src/avm1.rs +++ b/src/avm1.rs @@ -1,4 +1,7 @@ use crate::timeline::Frame; +use avm1_parser::parse_cfg; +use avm1_types::cfg::{Action, Cfg, CfgBlock, CfgFlow}; +use avm1_types::PushValue; #[derive(Clone, Debug)] pub enum Value { @@ -54,19 +57,13 @@ pub struct Code { } impl Code { - pub fn parse_and_compile(mut data: &[u8]) -> Self { - let mut actions = vec![]; - while data[0] != 0 { - let (rest, action) = avm1_parser::parse_action(data).unwrap(); - data = rest; - actions.push(action); - } - assert_eq!(data, [0]); + pub fn parse_and_compile(data: &[u8]) -> Self { + let cfg = parse_cfg(data); - Code::compile(actions) + Code::compile(cfg) } - pub fn compile(actions: Vec) -> Self { + pub fn compile(cfg: Cfg) -> Self { let mut consts = vec![]; let mut regs = vec![]; let mut stack = vec![]; @@ -77,48 +74,44 @@ impl Code { regs.push(Value::Undefined); regs.pop(); - for action in actions { + // FIXME(demurgos) Handle control flow, we're currently only compiling the first block + let block: CfgBlock = cfg.blocks.into_vec().remove(0); + + for action in block.actions { match action { - avm1_tree::Action::Play => ops.push(Op::Play), - avm1_tree::Action::Stop => ops.push(Op::Stop), - avm1_tree::Action::GotoFrame(goto) => { + Action::Play => ops.push(Op::Play), + Action::Stop => ops.push(Op::Stop), + Action::GotoFrame(goto) => { ops.push(Op::GotoFrame(Frame(goto.frame as u16))); } - avm1_tree::Action::GotoLabel(goto) => { + Action::GotoLabel(goto) => { ops.push(Op::GotoLabel(goto.label)); } - avm1_tree::Action::GetUrl(get_url) => { + Action::GetUrl(get_url) => { ops.push(Op::GetUrl(get_url.url, get_url.target)); } - - // All of frames are loaded ahead of time, no waiting needed. - avm1_tree::Action::WaitForFrame(_) => {} - avm1_tree::Action::WaitForFrame2(_) => { - stack.pop(); - } - - avm1_tree::Action::ConstantPool(pool) => { - consts = pool.constant_pool; + Action::ConstantPool(pool) => { + consts = pool.pool; } - avm1_tree::Action::Push(push) => { + Action::Push(push) => { stack.extend(push.values.into_iter().map(|value| match value { - avm1_tree::Value::Undefined => Value::Undefined, - avm1_tree::Value::Null => Value::Null, - avm1_tree::Value::Boolean(x) => Value::Bool(x), - avm1_tree::Value::Sint32(x) => Value::I32(x), - avm1_tree::Value::Float32(x) => Value::F32(x), - avm1_tree::Value::Float64(x) => Value::F64(x), - avm1_tree::Value::String(s) => Value::Str(s), + PushValue::Undefined => Value::Undefined, + PushValue::Null => Value::Null, + PushValue::Boolean(x) => Value::Bool(x), + PushValue::Sint32(x) => Value::I32(x), + PushValue::Float32(x) => Value::F32(x), + PushValue::Float64(x) => Value::F64(x), + PushValue::String(s) => Value::Str(s), // FIXME(eddyb) avoid per-use cloning. - avm1_tree::Value::Constant(i) => Value::Str(consts[i as usize].to_string()), - avm1_tree::Value::Register(i) => regs[i as usize].clone(), + PushValue::Constant(i) => Value::Str(consts[i as usize].to_string()), + PushValue::Register(i) => regs[i as usize].clone(), })); } - avm1_tree::Action::Pop => { + Action::Pop => { stack.pop(); } - avm1_tree::Action::GetVariable => match stack.pop().unwrap() { + Action::GetVariable => match stack.pop().unwrap() { Value::Str(name) => { ops.push(Op::GetVar(name)); stack.push(Value::OpRes(ops.len() - 1)); @@ -128,7 +121,7 @@ impl Code { break; } }, - avm1_tree::Action::SetVariable => { + Action::SetVariable => { let value = stack.pop().unwrap(); match stack.pop().unwrap() { Value::Str(name) => { @@ -141,7 +134,7 @@ impl Code { } } } - avm1_tree::Action::CallFunction => { + Action::CallFunction => { let name = stack.pop().unwrap(); let arg_count = stack.pop().unwrap(); match (name, arg_count.as_i32()) { @@ -160,7 +153,7 @@ impl Code { } } } - avm1_tree::Action::CallMethod => { + Action::CallMethod => { let mut name = stack.pop().unwrap(); let this = stack.pop().unwrap(); let arg_count = stack.pop().unwrap(); @@ -195,6 +188,17 @@ impl Code { } } + match block.flow { + // All of frames are loaded ahead of time, no waiting needed. + CfgFlow::WaitForFrame(_) => {} + CfgFlow::WaitForFrame2(_) => { + stack.pop(); + } + _ => { + eprintln!("unknown flow: {:?}", block.flow); + } + } + Code { ops } } }