slang_backend/
bytecode.rs

1use crate::value::Value;
2use slang_derive::NumericEnum;
3pub use std::io::{Read, Write};
4
5/// Operation codes for the bytecode interpreter
6#[derive(Debug, PartialEq, NumericEnum)]
7pub enum OpCode {
8    /// Push a constant onto the stack
9    Constant,
10    /// Add the top two stack values
11    Add,
12    /// Subtract the top stack value from the second stack value
13    Subtract,
14    /// Multiply the top two stack values
15    Multiply,
16    /// Divide the second stack value by the top stack value
17    Divide,
18    /// Negate the top stack value
19    Negate,
20    /// Return from the current function
21    Return,
22    /// Print the top stack value
23    Print,
24    /// Push the value of a variable onto the stack
25    GetVariable,
26    /// Set a variable to the top stack value
27    SetVariable,
28    /// Remove the top stack value
29    Pop,
30    /// Define a function
31    DefineFunction,
32    /// Call a function
33    Call,
34    /// Jump if the top stack value is false
35    JumpIfFalse,
36    /// Jump unconditionally
37    Jump,
38    /// Negate a boolean value (logical NOT)
39    BoolNot,
40    /// Boolean AND operation
41    BoolAnd,
42    /// Boolean OR operation
43    BoolOr,
44    /// Greater than comparison
45    Greater,
46    /// Less than comparison
47    Less,
48    /// Greater than or equal comparison
49    GreaterEqual,
50    /// Less than or equal comparison
51    LessEqual,
52    /// Equal comparison
53    Equal,
54    /// Not equal comparison
55    NotEqual,
56    /// Begin a new scope (save variable state)
57    BeginScope,
58    /// End the current scope (restore variable state)
59    EndScope,
60}
61
62/// Function representation in bytecode
63#[derive(Debug, Clone)]
64pub struct Function {
65    /// Name of the function
66    pub name: String,
67    /// Number of parameters
68    pub arity: u8,
69    /// Offset in the chunk where this function's code begins
70    pub code_offset: usize,
71    /// Local variable names used by this function
72    pub locals: Vec<String>,
73}
74
75/// Type for native function implementations
76pub type NativeFn = fn(&[Value]) -> Result<Value, String>;
77
78/// Native (built-in) function representation
79#[derive(Clone)]
80pub struct NativeFunction {
81    /// Name of the native function
82    pub name: String,
83    /// Number of parameters
84    pub arity: u8,
85    /// The Rust function that implements this native function
86    pub function: NativeFn,
87}
88
89impl std::fmt::Debug for NativeFunction {
90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91        write!(f, "<native fn {}>", self.name)
92    }
93}
94
95/// A chunk of bytecode representing a compiled program
96#[derive(Debug)]
97pub struct Chunk {
98    /// The actual bytecode instructions
99    pub code: Vec<u8>,
100    /// Constant values used by the program
101    pub constants: Vec<Value>,
102    /// Source code line numbers for debugging
103    pub lines: Vec<usize>,
104    /// Variable and function names used in the program
105    pub identifiers: Vec<String>,
106}
107
108impl Default for Chunk {
109    fn default() -> Self {
110        Chunk::new()
111    }
112}
113
114impl Chunk {
115    /// Creates a new, empty bytecode chunk
116    pub fn new() -> Self {
117        Chunk {
118            code: Vec::new(),
119            constants: Vec::new(),
120            lines: Vec::new(),
121            identifiers: Vec::new(),
122        }
123    }
124
125    /// Writes a byte to the chunk
126    ///
127    /// ### Arguments
128    ///
129    /// * `byte` - The byte to write
130    /// * `line` - The source code line number
131    pub fn write_byte(&mut self, byte: u8, line: usize) {
132        self.code.push(byte);
133        self.lines.push(line);
134    }
135
136    /// Writes an opcode to the chunk
137    ///
138    /// ### Arguments
139    ///
140    /// * `op` - The opcode to write
141    /// * `line` - The source code line number
142    pub fn write_op(&mut self, op: OpCode, line: usize) {
143        self.write_byte(op as u8, line);
144    }
145
146    /// Adds a constant to the chunk's constant pool
147    ///
148    /// ### Arguments
149    ///
150    /// * `value` - The constant value to add
151    ///
152    /// ### Returns
153    ///
154    /// The index of the constant in the constant pool
155    pub fn add_constant(&mut self, value: Value) -> usize {
156        self.constants.push(value);
157        self.constants.len() - 1
158    }
159
160    /// Adds an identifier to the chunk's identifier pool
161    ///
162    /// ### Arguments
163    ///
164    /// * `name` - The identifier name to add
165    ///
166    /// ### Returns
167    ///
168    /// The index of the identifier in the identifier pool
169    pub fn add_identifier(&mut self, name: String) -> usize {
170        for (i, id) in self.identifiers.iter().enumerate() {
171            if id == &name {
172                return i;
173            }
174        }
175        self.identifiers.push(name);
176        self.identifiers.len() - 1
177    }
178
179    /// Serializes the chunk to binary data
180    ///
181    /// ### Arguments
182    ///
183    /// * `writer` - The writer to write the binary data to
184    ///
185    /// ### Returns
186    ///
187    /// IO result indicating success or failure
188    pub fn serialize(&self, writer: &mut dyn Write) -> std::io::Result<()> {
189        let code_len = self.code.len() as u32;
190        writer.write_all(&code_len.to_le_bytes())?;
191        writer.write_all(&self.code)?;
192
193        let constants_len = self.constants.len() as u32;
194        writer.write_all(&constants_len.to_le_bytes())?;
195
196        for value in &self.constants {
197            writer.write_all(&[value.type_tag()])?;
198            match value {
199                Value::I32(i) => {
200                    writer.write_all(&i.to_le_bytes())?;
201                }
202                Value::I64(i) => {
203                    writer.write_all(&i.to_le_bytes())?;
204                }
205                Value::U32(i) => {
206                    writer.write_all(&i.to_le_bytes())?;
207                }
208                Value::U64(i) => {
209                    writer.write_all(&i.to_le_bytes())?;
210                }
211                Value::String(s) => {
212                    let bytes = s.as_bytes();
213                    let len = bytes.len() as u32;
214                    writer.write_all(&len.to_le_bytes())?;
215                    writer.write_all(bytes)?;
216                }
217                Value::F32(f) => {
218                    writer.write_all(&f.to_le_bytes())?;
219                }
220                Value::F64(f) => {
221                    writer.write_all(&f.to_le_bytes())?;
222                }
223                Value::Function(func) => {
224                    let name_bytes = func.name.as_bytes();
225                    let name_len = name_bytes.len() as u32;
226                    writer.write_all(&name_len.to_le_bytes())?;
227                    writer.write_all(name_bytes)?;
228
229                    writer.write_all(&[func.arity])?;
230                    writer.write_all(&(func.code_offset as u32).to_le_bytes())?;
231
232                    let locals_len = func.locals.len() as u32;
233                    writer.write_all(&locals_len.to_le_bytes())?;
234                    for local in &func.locals {
235                        let local_bytes = local.as_bytes();
236                        let local_len = local_bytes.len() as u32;
237                        writer.write_all(&local_len.to_le_bytes())?;
238                        writer.write_all(local_bytes)?;
239                    }
240                }
241                Value::NativeFunction(func) => {
242                    let name_bytes = func.name.as_bytes();
243                    let name_len = name_bytes.len() as u32;
244                    writer.write_all(&name_len.to_le_bytes())?;
245                    writer.write_all(name_bytes)?;
246
247                    writer.write_all(&[func.arity])?;
248                }
249                Value::Boolean(b) => {
250                    writer.write_all(&[*b as u8])?;
251                }
252                Value::Unit(_) => {}
253            }
254        }
255
256        let identifiers_len = self.identifiers.len() as u32;
257        writer.write_all(&identifiers_len.to_le_bytes())?;
258
259        for id in &self.identifiers {
260            let bytes = id.as_bytes();
261            let len = bytes.len() as u32;
262            writer.write_all(&len.to_le_bytes())?;
263            writer.write_all(bytes)?;
264        }
265
266        Ok(())
267    }
268
269    /// Deserializes a chunk from binary data
270    ///
271    /// ### Arguments
272    ///
273    /// * `reader` - The reader to read the binary data from
274    ///
275    /// ### Returns
276    ///
277    /// The deserialized chunk or an IO error
278    pub fn deserialize(reader: &mut dyn Read) -> std::io::Result<Self> {
279        let mut chunk = Chunk::new();
280
281        let mut code_len_bytes = [0u8; 4];
282        reader.read_exact(&mut code_len_bytes)?;
283        let code_len = u32::from_le_bytes(code_len_bytes) as usize;
284
285        let mut code = vec![0u8; code_len];
286        reader.read_exact(&mut code)?;
287        chunk.code = code;
288
289        chunk.lines = vec![0; code_len];
290
291        let mut constants_len_bytes = [0u8; 4];
292        reader.read_exact(&mut constants_len_bytes)?;
293        let constants_len = u32::from_le_bytes(constants_len_bytes) as usize;
294
295        for _ in 0..constants_len {
296            let mut type_tag = [0u8; 1];
297            reader.read_exact(&mut type_tag)?;
298
299            let value = Value::deserialize_from_type_tag(type_tag[0], reader)?;
300            chunk.constants.push(value);
301        }
302
303        let mut identifiers_len_bytes = [0u8; 4];
304        reader.read_exact(&mut identifiers_len_bytes)?;
305        let identifiers_len = u32::from_le_bytes(identifiers_len_bytes) as usize;
306
307        for _ in 0..identifiers_len {
308            let mut len_bytes = [0u8; 4];
309            reader.read_exact(&mut len_bytes)?;
310            let len = u32::from_le_bytes(len_bytes) as usize;
311
312            let mut string_bytes = vec![0u8; len];
313            reader.read_exact(&mut string_bytes)?;
314            let string = String::from_utf8(string_bytes).map_err(|_| {
315                std::io::Error::new(std::io::ErrorKind::InvalidData, "Invalid UTF-8")
316            })?;
317
318            chunk.identifiers.push(string);
319        }
320
321        Ok(chunk)
322    }
323
324    /// Debugging function to print the chunk's bytecode
325    ///
326    /// ### Arguments
327    /// * `name` - The name of the chunk (for debugging purposes)
328    #[cfg(feature = "print-byte_code")]
329    pub fn disassemble(&self, name: &str) {
330        println!("== {} ==", name);
331
332        let mut offset = 0;
333        while offset < self.code.len() {
334            offset = self.disassemble_instruction(offset);
335        }
336    }
337
338    /// Disassembles a single instruction for debugging
339    #[cfg(feature = "print-byte_code")]
340    pub fn disassemble_instruction(&self, offset: usize) -> usize {
341        print!("{:04} ", offset);
342
343        // Add line info
344        if offset > 0 && self.lines[offset] == self.lines[offset - 1] {
345            print!("   | ");
346        } else {
347            print!("{:4} ", self.lines[offset]);
348        }
349
350        let instruction = self.code[offset];
351        match OpCode::from_int(instruction) {
352            Some(OpCode::Constant) => self.simple_instruction_with_operand("CONSTANT", offset),
353            Some(OpCode::Add) => self.simple_instruction("ADD", offset),
354            Some(OpCode::Subtract) => self.simple_instruction("SUBTRACT", offset),
355            Some(OpCode::Multiply) => self.simple_instruction("MULTIPLY", offset),
356            Some(OpCode::Divide) => self.simple_instruction("DIVIDE", offset),
357            Some(OpCode::Negate) => self.simple_instruction("NEGATE", offset),
358            Some(OpCode::Return) => self.simple_instruction("RETURN", offset),
359            Some(OpCode::Print) => self.simple_instruction("PRINT", offset),
360            Some(OpCode::GetVariable) => self.variable_instruction("GET_VARIABLE", offset),
361            Some(OpCode::SetVariable) => self.variable_instruction("SET_VARIABLE", offset),
362            Some(OpCode::Pop) => self.simple_instruction("POP", offset),
363            Some(OpCode::DefineFunction) => self.variable_instruction("DEFINE_FUNCTION", offset),
364            Some(OpCode::Call) => {
365                let arg_count = self.code[offset + 1];
366                println!("{:<16} {:4} args", "CALL", arg_count);
367                offset + 2
368            }
369            Some(OpCode::JumpIfFalse) => {
370                let jump_offset =
371                    ((self.code[offset + 1] as usize) << 8) | (self.code[offset + 2] as usize);
372                println!(
373                    "{:<16} {:4} -> {}",
374                    "JUMP_IF_FALSE",
375                    offset,
376                    offset + 3 + jump_offset
377                );
378                offset + 3
379            }
380            Some(OpCode::Jump) => {
381                let jump_offset =
382                    ((self.code[offset + 1] as usize) << 8) | (self.code[offset + 2] as usize);
383                println!(
384                    "{:<16} {:4} -> {}",
385                    "JUMP",
386                    offset,
387                    offset + 3 + jump_offset
388                );
389                offset + 3
390            }
391            Some(OpCode::BoolNot) => self.simple_instruction("BOOL_NOT", offset),
392            Some(OpCode::BoolAnd) => self.simple_instruction("BOOL_AND", offset),
393            Some(OpCode::BoolOr) => self.simple_instruction("BOOL_OR", offset),
394            Some(OpCode::Greater) => self.simple_instruction("GREATER", offset),
395            Some(OpCode::Less) => self.simple_instruction("LESS", offset),
396            Some(OpCode::GreaterEqual) => self.simple_instruction("GREATER_EQUAL", offset),
397            Some(OpCode::LessEqual) => self.simple_instruction("LESS_EQUAL", offset),
398            Some(OpCode::Equal) => self.simple_instruction("EQUAL", offset),
399            Some(OpCode::NotEqual) => self.simple_instruction("NOT_EQUAL", offset),
400            Some(OpCode::BeginScope) => self.simple_instruction("BEGIN_SCOPE", offset),
401            Some(OpCode::EndScope) => self.simple_instruction("END_SCOPE", offset),
402            None => {
403                println!("Unknown opcode: {}", instruction);
404                offset + 1
405            }
406        }
407    }
408
409    /// Helper for disassembling simple instructions
410    ///
411    /// ### Arguments
412    /// * `name` - The name of the instruction
413    /// * `offset` - The offset in the bytecode
414    ///
415    /// ### Returns
416    /// The new offset after disassembling the instruction
417    #[cfg(feature = "print-byte_code")]
418    fn simple_instruction(&self, name: &str, offset: usize) -> usize {
419        println!("{}", name);
420        offset + 1
421    }
422
423    /// Helper for disassembling instructions with constant operands
424    ///
425    /// ### Arguments
426    /// * `name` - The name of the instruction
427    /// * `offset` - The offset in the bytecode
428    ///
429    /// ### Returns
430    /// The new offset after disassembling the instruction
431    #[cfg(feature = "print-byte_code")]
432    fn simple_instruction_with_operand(&self, name: &str, offset: usize) -> usize {
433        let constant_index = self.code[offset + 1];
434        println!(
435            "{:<16} {:4} '{}'",
436            name, constant_index, self.constants[constant_index as usize]
437        );
438        offset + 2
439    }
440
441    /// Helper for disassembling instructions with variable operands
442    ///
443    /// ### Arguments
444    /// * `name` - The name of the instruction
445    /// * `offset` - The offset in the bytecode
446    ///
447    /// ### Returns
448    /// The new offset after disassembling the instruction
449    #[cfg(feature = "print-byte_code")]
450    fn variable_instruction(&self, name: &str, offset: usize) -> usize {
451        let var_index = self.code[offset + 1];
452        println!(
453            "{:<16} {:4} '{}'",
454            name, var_index, self.identifiers[var_index as usize]
455        );
456        offset + 2
457    }
458}