This commit is contained in:
hkau 2024-01-25 20:00:23 -05:00
commit 7854d28102
7 changed files with 419 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
*.py
Cargo.lock

9
Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "amethystine"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
regex = "1.10.3"

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 hkau
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

13
README.md Normal file
View File

@ -0,0 +1,13 @@
# Amethystine
Amethystine (named after the "Amethystine Python") is a lightweight Python interpretter written in Rust. It uses AST (abstract syntax tree) interpretation. The tree is generated manually by Amethystine.
## Timeline
- **2023-01-25**: (initial) Ability to parse function calls, function declarations, floats, strings, and variable declarations into an AST
## (final project information)
Amethystine was written for my final project. While not technically written in Python, the output shows a deep understanding of Python and its syntax. Amethystine at v0.1.0 implements only the functions required for me to pass the project.
You can pass the `--ast` flag to the interpreter to see the AST in the console.

303
src/ast.rs Normal file
View File

@ -0,0 +1,303 @@
use std::collections::HashMap;
#[derive(Default, Debug, PartialEq, Clone)]
pub struct AMProgram {
pub blocks: Vec<AMBlock>,
}
#[derive(Default, Debug, PartialEq, Clone)]
pub struct AMBlock {
pub btype: String,
pub start: usize,
pub end: usize,
pub value: Option<String>,
pub children: Vec<AMBlock>,
pub arguments: Vec<String>,
}
// ...
pub fn to_tree(input: String) -> AMProgram {
// get chars
let chars = input.chars();
// ...
let mut opening_marks: Vec<&str> = Vec::new(); // we're going to store the opening marks that we use to detect a new block here
opening_marks.push("\""); // String
opening_marks.push("("); // FnCall
opening_marks.push("="); // VarDec
let mut forbidden_raw_buffers: Vec<&str> = Vec::new(); // we're going to store things that should't be used as the value of "Raw" in here
forbidden_raw_buffers.push("def");
forbidden_raw_buffers.push("for");
// create out
let mut out: AMProgram = AMProgram { blocks: Vec::new() };
// ...
let mut state: HashMap<String, String> = HashMap::new(); // store state values (k,v store)
let mut arguments_state: Vec<String> = Vec::new(); // store state arguments
let mut buffer = String::new(); // the "buffer" holds the current string being inspected,
// it's not stored in the state HashMap because it would be too expensive to update the map every char
for (i, char) in chars.enumerate() {
let char_str = &char.to_string();
let state_c = state.clone();
let current_btype = state_c.get("btype");
let mut add_block: Option<AMBlock> = Option::None; // stored the block that we want to add to the tree
// append to buffer if char_str isn't a quote
// this way String blocks don't include the quotation marks
if opening_marks.contains(&char_str.as_str()) == false {
// technically, state.start COULD be defined here *if* buffer.is_empty() is true
// this would allow blocks like FnCall and VarDec to actually have a start value, but ending only is okay
buffer += char_str;
}
// string literal
// if buffer begins with '"', we're defining a string (store it as AMBlock type "String")
if char_str == opening_marks.get(0).unwrap() {
// if the block type isn't already "String" we're opening a string
if current_btype.is_none() {
state.insert(String::from("btype"), String::from("String"));
state.insert(String::from("start"), i.to_string());
// clear buffer (remove straggling characters)
buffer = String::new();
} else {
// otherwise, we're terminating a string
state.remove("btype");
// push block
add_block = Option::Some(AMBlock {
btype: String::from("String"),
start: state.get("start").unwrap().parse::<usize>().unwrap(), // get start from "start" value in HashMap
end: i, // we're ending here
value: Option::Some(buffer.clone()), // String contains a value
children: Vec::new(), // String contains no children
arguments: Vec::new(),
});
}
}
// function call
else if char_str == opening_marks.get(1).unwrap() {
if current_btype.is_none() {
// if buffer contains \n, the function call is everything after
if buffer.contains("\n") {
let split = buffer.split("\n");
let new_split = split.skip(1); // remove first
buffer = new_split.map(|s| s.to_string()).collect::<String>()
}
// push block
add_block = Option::Some(AMBlock {
// if the buffer starts with "def", we're looking at a FnDec NOT an FnCall!
btype: if buffer.starts_with("def") {
buffer = buffer.get(3..buffer.len()).unwrap().to_string(); // remove "def" from buffer
// ...a side effect of this is function names cannot start with "def"
String::from("FnDec")
} else {
String::from("FnCall")
},
start: 0,
end: i,
value: Option::Some(buffer.clone()),
children: Vec::new(),
arguments: Vec::new(),
});
}
} else if char_str == ")" {
// make sure that argument exists before trying to remove it!
let index = arguments_state.iter().position(|a| a == "next_is_arg");
if index.is_some() {
arguments_state.remove(index.unwrap());
}
}
// variable declaration
else if char_str == opening_marks.get(2).unwrap() {
if current_btype.is_none() {
arguments_state.push(String::from("next_is_val")); // this will tell us that the next block is the value
// push block
add_block = Option::Some(AMBlock {
btype: String::from("VarDec"),
start: 0,
end: i,
value: Option::Some(buffer.clone()),
children: Vec::new(),
arguments: Vec::new(),
});
}
}
// numbers
// python doesn't have number types like i32, f32, usize, etc... python just basically uses float for everything
else if char.is_ascii_digit() {
// this is the first demonstration of ending with whitespace instead of a termination character!
let next_chars = input.get(i..i + 5); // we're going to look ahead 5 characters,
// if this look ahead includes a FnCall start ("("), don't start float
if current_btype.is_none() && next_chars.is_some() && !next_chars.unwrap().contains("(")
{
// denote that we're handling a number (append-only state)
state.insert(String::from("btype"), String::from("Float"));
state.insert(String::from("start"), i.to_string());
// clear buffer (remove straggling characters)
buffer = String::from(char_str);
}
}
// return statement
else if buffer.trim().ends_with("return") {
if current_btype.is_none() {
arguments_state.push(String::from("return_next_block"));
}
}
// others (terminates all active append-only states)
else if current_btype.is_some() {
let unwrap: &String = current_btype.unwrap();
if unwrap == "Float" && !char.is_ascii_digit() && char_str != "(" {
// float is terminated by all characters that aren't a number (and aren't opening a function)
// making sure we're not begining FnCall means we can have numbers in function names!
state.remove("btype");
// remove everything from buffer that isn't a number (regex)
let regex: regex::Regex = regex::RegexBuilder::new(r"[^\d]").build().unwrap();
for capture in regex.captures_iter(&buffer.clone()) {
buffer = buffer.replace(capture.get(0).unwrap().as_str(), "");
}
// push block
add_block = Option::Some(AMBlock {
btype: String::from("Float"),
start: state.get("start").unwrap().parse::<usize>().unwrap(),
end: i,
value: Option::Some(buffer.clone()),
children: Vec::new(),
arguments: Vec::new(),
});
}
} else if char.is_whitespace() {
// clear buffer on whitespace where we aren't in a block
// buffer.clear();
// save as raw block (variable mentions, function arguments in FnDec, etc)
// the actual usage of these will be determined by the interpreter
if current_btype.is_none() && buffer.len() > 0 {
// if buffer includes ", ", ready the parser to accept a function parameter!
if buffer.contains(", ") {
// (must be pushed three times)
arguments_state.push(String::from("next_is_arg"));
arguments_state.push(String::from("next_is_arg"));
arguments_state.push(String::from("next_is_arg"));
}
// remove everything from buffer that isn't a word character (regex)
let regex: regex::Regex = regex::RegexBuilder::new(r"[^\w]").build().unwrap();
for capture in regex.captures_iter(&buffer.clone()) {
buffer = buffer.replace(capture.get(0).unwrap().as_str(), "");
}
// push block
if buffer.len() > 0 && !forbidden_raw_buffers.contains(&buffer.as_str()) {
add_block = Option::Some(AMBlock {
btype: String::from("Raw"),
start: i - buffer.len(),
end: i,
value: Option::Some(buffer.clone()),
children: Vec::new(),
arguments: Vec::new(),
});
}
}
}
// if we're in an FnCall and the character is a comma, enable "next_is_arg" (again)
let blocks_len = out.blocks.iter().len();
if blocks_len > 1 {
let previous_block = out.blocks.get(blocks_len - 1);
if previous_block.is_some()
&& previous_block
.unwrap()
.arguments
.contains(&String::from("arg_of_previous_FnCall"))
&& char_str == ","
{
arguments_state.push(String::from("next_is_arg"));
arguments_state.push(String::from("next_is_arg")); // (insert it twice so this AND the next block are arguments)
// fixes bug that I didn't include in the errors sheet
}
}
// ...
if add_block.is_some() {
let mut block = add_block.unwrap();
// if this block is supposed to be an argument ("next_is_arg" is some),
// mention it's an argument in its attributes
if arguments_state.contains(&String::from("next_is_arg"))
&& block.btype != String::from("FnCall")
&& block.btype != String::from("FnDec")
{
arguments_state.remove(
arguments_state
.iter()
.position(|a| a == "next_is_arg")
.unwrap(),
);
// we're going to denote that this is an argument of the previous function call
// this value can later be used when interpreting to all arguments of an FnCall
block.arguments.push(String::from("arg_of_previous_FnCall"));
}
// if this block is supposed to be a value for a variable ("next_is_val" is some),
// mention it's an variable in its attributes
else if arguments_state.contains(&String::from("next_is_val"))
&& block.btype != String::from("VarDec")
{
arguments_state.remove(
arguments_state
.iter()
.position(|a| a == "next_is_val")
.unwrap(),
);
block.arguments.push(String::from("arg_of_previous_VarDec"));
}
// return statement
else if arguments_state.contains(&String::from("return_next_block")) {
arguments_state.remove(
arguments_state
.iter()
.position(|a| a == "return_next_block")
.unwrap(),
);
block.arguments.push(String::from("return_value"));
}
// append block to tree
if block.value.is_some() {
// trim whitespace
block.value = Option::Some(block.value.unwrap().trim().to_string())
}
out.blocks.push(block);
// clear buffer
buffer.clear();
// clear state stores
// state.clear();
// arguments_state.clear();
}
}
// return
return out;
}

36
src/config.rs Normal file
View File

@ -0,0 +1,36 @@
use std::{env, ops::Index};
pub fn collect_arguments() -> Vec<String> {
return env::args().collect::<Vec<String>>();
}
pub fn get_named_argument(args: &Vec<String>, name: &str) -> Option<String> {
for (i, v) in args.iter().enumerate() {
// if name does not match, continue
if v != &format!("--{}", name) {
continue;
};
// return value
let val: &String = args.index(i + 1);
// ...make sure val exists (return None if it doesn't!)
if val.is_empty() {
return Option::None;
}
return Option::Some(String::from(val));
}
return Option::None;
}
pub fn get_var(var: &str) -> Option<String> {
let res = env::var(var);
if res.is_ok() {
Option::Some(res.unwrap())
} else {
Option::None
}
}

34
src/main.rs Normal file
View File

@ -0,0 +1,34 @@
// amethystine entry
mod ast;
mod config;
// (main)
fn main() {
// collect argv
let args: Vec<String> = config::collect_arguments();
// get file path (required)
let file_path = args.get(1);
if file_path.is_none() {
panic!("Failed to read argument");
}
// read file
let file = std::fs::read_to_string(file_path.unwrap());
if file.is_err() {
panic!("Failed to read passed file! (did you specify a file?)")
}
// get options
let do_log_ast = args.contains(&String::from("--ast"));
// begin
let tree: ast::AMProgram = ast::to_tree(file.unwrap());
if do_log_ast {
dbg!(tree);
}
}