542 lines
20 KiB
C++
542 lines
20 KiB
C++
#include "tree_structurer.hpp"
|
||
#include <map>
|
||
#include <algorithm>
|
||
#include <filesystem>
|
||
#include <fstream>
|
||
#include <sstream>
|
||
#include <stdexcept>
|
||
#include <functional>
|
||
#include <iostream>
|
||
|
||
namespace fs = std::filesystem;
|
||
|
||
bool TreeStructurer::should_ignore_dir(const std::string& dirname) {
|
||
static const std::vector<std::string> ignore_list = {
|
||
"build", "venv", "myenv", "dist", "node_modules", "CMakeFiles",
|
||
".git", ".idea", ".vscode", "__pycache__", "**pycache**"
|
||
};
|
||
|
||
// Allow these Python files even if their names start with underscores.
|
||
if (!dirname.empty() && (dirname == "__main__.py" || dirname == "__init__.py")) {
|
||
return false;
|
||
}
|
||
|
||
if (std::find(ignore_list.begin(), ignore_list.end(), dirname) != ignore_list.end()) {
|
||
return true;
|
||
}
|
||
|
||
if (!dirname.empty()) {
|
||
if (dirname[0] == '.' || dirname[0] == '_') {
|
||
return true;
|
||
}
|
||
if (dirname.find("__") == 0 && dirname.find("__", 2) != std::string::npos) {
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
bool TreeStructurer::should_ignore_file(const std::string& filename) {
|
||
static const std::vector<std::string> ignore_extensions = {
|
||
".pyc", ".pyo", ".pyd", ".so", ".dll", ".dylib",
|
||
".o", ".obj", ".a", ".lib"
|
||
};
|
||
|
||
if (!filename.empty() && (filename == "__main__.py" || filename == "__init__.py")) {
|
||
return false;
|
||
}
|
||
|
||
if (!filename.empty() && (filename[0] == '.' || filename[0] == '_')) {
|
||
return true;
|
||
}
|
||
|
||
fs::path path(filename);
|
||
std::string ext = path.extension().string();
|
||
if (std::find(ignore_extensions.begin(), ignore_extensions.end(), ext) != ignore_extensions.end()) {
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
std::string TreeStructurer::get_relative_path(const fs::path& path, const fs::path& base) {
|
||
fs::path rel = fs::relative(path, base);
|
||
return rel.string();
|
||
}
|
||
|
||
|
||
std::vector<fs::path> TreeStructurer::get_filtered_paths(const fs::path& start) {
|
||
std::vector<fs::path> paths;
|
||
fs::directory_options options = fs::directory_options::skip_permission_denied;
|
||
|
||
try {
|
||
if (!fs::exists(start)) {
|
||
throw std::runtime_error("Directory does not exist: " + start.string());
|
||
}
|
||
|
||
if (!fs::is_directory(start)) {
|
||
throw std::runtime_error("Path is not a directory: " + start.string());
|
||
}
|
||
|
||
paths.push_back(start);
|
||
|
||
bool is_empty = fs::directory_iterator(start) == fs::directory_iterator();
|
||
if (is_empty) {
|
||
throw std::runtime_error("Directory is empty: " + start.string());
|
||
}
|
||
|
||
for (const auto& entry : fs::recursive_directory_iterator(start, options)) {
|
||
const auto& path = entry.path();
|
||
|
||
bool should_skip = false;
|
||
for (const auto& component : path) {
|
||
if (should_ignore_dir(component.string())) {
|
||
should_skip = true;
|
||
break;
|
||
}
|
||
}
|
||
if (should_skip) continue;
|
||
|
||
if (entry.is_directory()) {
|
||
if (!should_ignore_dir(path.filename().string())) {
|
||
paths.push_back(path);
|
||
}
|
||
} else {
|
||
if (!should_ignore_file(path.filename().string())) {
|
||
paths.push_back(path);
|
||
}
|
||
}
|
||
}
|
||
} catch (const fs::filesystem_error& e) {
|
||
throw std::runtime_error("Error accessing path: " + std::string(e.what()));
|
||
}
|
||
|
||
std::sort(paths.begin(), paths.end());
|
||
return paths;
|
||
}
|
||
|
||
|
||
// -----------------------------------------------------------------------------
|
||
// Directory Structure Generation (tree printing)
|
||
// -----------------------------------------------------------------------------
|
||
|
||
std::vector<std::string> TreeStructurer::get_directory_structure(const std::string& startpath) {
|
||
std::vector<std::string> result;
|
||
std::string normalized_path = startpath;
|
||
if (normalized_path.size() >= 2 &&
|
||
(normalized_path.substr(0, 2) == ".\\" || normalized_path.substr(0, 2) == "./")) {
|
||
normalized_path = normalized_path.substr(2);
|
||
}
|
||
fs::path start = normalized_path.empty() ? fs::current_path() : fs::path(normalized_path);
|
||
|
||
try {
|
||
if (!fs::exists(start)) {
|
||
throw std::runtime_error("Directory does not exist: " + start.string());
|
||
}
|
||
|
||
if (!fs::is_directory(start)) {
|
||
throw std::runtime_error("Path is not a directory: " + start.string());
|
||
}
|
||
|
||
std::vector<fs::path> paths = get_filtered_paths(start);
|
||
std::vector<bool> is_last_per_level;
|
||
|
||
// Skip the first path as it's the root
|
||
for (size_t i = 1; i < paths.size(); ++i) {
|
||
fs::path relative = fs::relative(paths[i], start);
|
||
std::vector<std::string> components;
|
||
for (const auto& comp : relative) {
|
||
components.push_back(comp.string());
|
||
}
|
||
|
||
// Calculate the current level
|
||
size_t level = components.size() - 1;
|
||
|
||
// Adjust is_last_per_level vector size
|
||
while (is_last_per_level.size() <= level) {
|
||
is_last_per_level.push_back(false);
|
||
}
|
||
|
||
// Build the line prefix
|
||
std::string line;
|
||
for (size_t j = 0; j <= level; ++j) {
|
||
if (j == level) {
|
||
// This is the connector for the current item
|
||
bool is_last_sibling = true;
|
||
|
||
// Look ahead to find next sibling at the same level
|
||
for (size_t k = i + 1; k < paths.size(); ++k) {
|
||
fs::path next_relative = fs::relative(paths[k], start);
|
||
std::vector<std::string> next_components;
|
||
for (const auto& comp : next_relative) {
|
||
next_components.push_back(comp.string());
|
||
}
|
||
|
||
// Check if it's a sibling (same parent, same level)
|
||
if (next_components.size() == components.size()) {
|
||
bool same_parent = true;
|
||
for (size_t l = 0; l < level; ++l) {
|
||
if (l >= next_components.size() || components[l] != next_components[l]) {
|
||
same_parent = false;
|
||
break;
|
||
}
|
||
}
|
||
if (same_parent) {
|
||
is_last_sibling = false;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
line += is_last_sibling ? "└── " : "├── ";
|
||
} else {
|
||
// This is a vertical line for parent levels
|
||
bool needs_vertical_line = false;
|
||
|
||
// Check if there are future items that share this ancestor
|
||
for (size_t k = i + 1; k < paths.size(); ++k) {
|
||
fs::path next_relative = fs::relative(paths[k], start);
|
||
std::vector<std::string> next_components;
|
||
for (const auto& comp : next_relative) {
|
||
next_components.push_back(comp.string());
|
||
}
|
||
|
||
// If next item shares the same path up to level j
|
||
if (next_components.size() > j) {
|
||
bool shares_ancestor = true;
|
||
for (size_t l = 0; l <= j; ++l) {
|
||
if (l >= next_components.size() || l >= components.size() ||
|
||
components[l] != next_components[l]) {
|
||
shares_ancestor = false;
|
||
break;
|
||
}
|
||
}
|
||
if (shares_ancestor) {
|
||
needs_vertical_line = true;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
line += needs_vertical_line ? "│ " : " ";
|
||
}
|
||
}
|
||
|
||
// Add the file/directory name
|
||
line += components.back();
|
||
if (fs::is_directory(paths[i])) {
|
||
line += "/";
|
||
}
|
||
|
||
result.push_back(line);
|
||
}
|
||
|
||
} catch (const fs::filesystem_error& e) {
|
||
throw std::runtime_error("Failed to access directory: " + std::string(e.what()));
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------------
|
||
// Structure Creation from a Tree-like File or String
|
||
// -----------------------------------------------------------------------------
|
||
void TreeStructurer::create_structure_from_file(const std::string& filepath,
|
||
const std::string& target_path) {
|
||
std::vector<std::string> lines = read_structure_file(filepath);
|
||
validate_structure(lines);
|
||
TreeNode root = build_tree_from_lines(lines);
|
||
|
||
fs::path target(target_path);
|
||
|
||
// Check if the root directory name matches the target directory name
|
||
if (root.name == target.filename().string()) {
|
||
// If names match, create the children directly in the target directory
|
||
for (const auto& child : root.children) {
|
||
create_node(child, target);
|
||
}
|
||
} else {
|
||
// If names don't match, create the full structure including root
|
||
create_node(root, target);
|
||
}
|
||
}
|
||
|
||
// -----------------------------------------------------------------------------
|
||
// Private Helper Functions (no duplicates)
|
||
// -----------------------------------------------------------------------------
|
||
|
||
// Returns a string consisting of (level * 4) spaces.
|
||
std::string TreeStructurer::create_indent(size_t level) {
|
||
return std::string(level * 4, ' ');
|
||
}
|
||
|
||
// Determines the "indent level" of a line by scanning it in 4‑character groups.
|
||
// Recognizes either a blank indent (" " or "│ ") or a branch marker ("├── " or "└── ").
|
||
|
||
size_t TreeStructurer::get_indent_level(const std::string& line) {
|
||
size_t indent = 0;
|
||
size_t pos = 0;
|
||
|
||
while (pos < line.length()) {
|
||
if (pos >= line.length()) break;
|
||
|
||
// Check for basic space indent (4 spaces)
|
||
if (pos + 3 < line.length() && line.substr(pos, 4) == " ") {
|
||
indent++;
|
||
pos += 4;
|
||
continue;
|
||
}
|
||
|
||
// Check for │ (vertical line) followed by 3 spaces
|
||
// Bytes: E2 94 82 20 20 20
|
||
if (pos + 5 < line.length() &&
|
||
static_cast<unsigned char>(line[pos]) == 0xE2 &&
|
||
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
|
||
static_cast<unsigned char>(line[pos + 2]) == 0x82 &&
|
||
line[pos + 3] == ' ' &&
|
||
line[pos + 4] == ' ' &&
|
||
line[pos + 5] == ' ') {
|
||
indent++;
|
||
pos += 6;
|
||
continue;
|
||
}
|
||
|
||
// Check for ├── or └── (branch or corner followed by dashes and space)
|
||
// ├ = E2 94 9C
|
||
// └ = E2 94 94
|
||
// ─ = E2 94 80
|
||
if (pos + 8 < line.length() &&
|
||
static_cast<unsigned char>(line[pos]) == 0xE2 &&
|
||
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
|
||
(static_cast<unsigned char>(line[pos + 2]) == 0x9C || // ├
|
||
static_cast<unsigned char>(line[pos + 2]) == 0x94) && // └
|
||
static_cast<unsigned char>(line[pos + 3]) == 0xE2 &&
|
||
static_cast<unsigned char>(line[pos + 4]) == 0x94 &&
|
||
static_cast<unsigned char>(line[pos + 5]) == 0x80 && // ─
|
||
static_cast<unsigned char>(line[pos + 6]) == 0xE2 &&
|
||
static_cast<unsigned char>(line[pos + 7]) == 0x94 &&
|
||
static_cast<unsigned char>(line[pos + 8]) == 0x80 && // ─
|
||
(pos + 9 >= line.length() || line[pos + 9] == ' ')) {
|
||
indent++;
|
||
break; // We've found our indent marker, stop here
|
||
}
|
||
|
||
// If we get here without finding a valid indent pattern, we're done
|
||
break;
|
||
}
|
||
|
||
return indent;
|
||
}
|
||
// Parses a single line of the structure (after knowing its indent level) and returns a TreeNode.
|
||
// The function "consumes" indent groups until the branch marker.
|
||
TreeStructurer::TreeNode TreeStructurer::parse_structure_line(const std::string& line, size_t indent_level) {
|
||
size_t pos = 0;
|
||
size_t current_indent = 0;
|
||
|
||
// Skip through indentation patterns
|
||
while (current_indent < indent_level && pos < line.length()) {
|
||
// Check for basic space indent
|
||
if (pos + 3 < line.length() && line.substr(pos, 4) == " ") {
|
||
pos += 4;
|
||
current_indent++;
|
||
continue;
|
||
}
|
||
|
||
// Check for │ followed by spaces
|
||
if (pos + 5 < line.length() &&
|
||
static_cast<unsigned char>(line[pos]) == 0xE2 &&
|
||
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
|
||
static_cast<unsigned char>(line[pos + 2]) == 0x82 &&
|
||
line[pos + 3] == ' ' &&
|
||
line[pos + 4] == ' ' &&
|
||
line[pos + 5] == ' ') {
|
||
pos += 6;
|
||
current_indent++;
|
||
continue;
|
||
}
|
||
|
||
// Check for ├── or └── pattern
|
||
if (pos + 9 < line.length() &&
|
||
static_cast<unsigned char>(line[pos]) == 0xE2 &&
|
||
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
|
||
(static_cast<unsigned char>(line[pos + 2]) == 0x9C ||
|
||
static_cast<unsigned char>(line[pos + 2]) == 0x94)) {
|
||
pos += 10; // Skip the entire pattern including space
|
||
current_indent++;
|
||
break;
|
||
}
|
||
|
||
pos++;
|
||
}
|
||
|
||
// Extract the name (everything after the indentation)
|
||
std::string name = line.substr(pos);
|
||
name = sanitize_path(name);
|
||
|
||
bool is_file = true;
|
||
if (!name.empty() && name.back() == '/') {
|
||
is_file = false;
|
||
name.pop_back();
|
||
}
|
||
|
||
return TreeNode{name, is_file, {}};
|
||
}
|
||
// Builds a tree (with TreeNode nodes) from the vector of structure lines.
|
||
// The first line is assumed to be the root.
|
||
TreeStructurer::TreeNode TreeStructurer::build_tree_from_lines(const std::vector<std::string>& lines) {
|
||
if (lines.empty()) {
|
||
throw std::runtime_error("Empty structure provided");
|
||
}
|
||
// Process the first line as the root.
|
||
TreeNode root = parse_structure_line(lines[0], 0);
|
||
if (root.is_file) {
|
||
throw std::runtime_error("Root must be a directory");
|
||
}
|
||
std::vector<TreeNode*> stack;
|
||
stack.push_back(&root);
|
||
|
||
// Process each subsequent line.
|
||
for (size_t i = 1; i < lines.size(); ++i) {
|
||
size_t indent = get_indent_level(lines[i]);
|
||
TreeNode node = parse_structure_line(lines[i], indent);
|
||
if (indent > stack.size()) {
|
||
throw std::runtime_error("Invalid indentation structure in the file");
|
||
}
|
||
while (stack.size() > indent) {
|
||
stack.pop_back();
|
||
}
|
||
if (stack.empty()) {
|
||
throw std::runtime_error("Invalid indentation structure in the file");
|
||
}
|
||
stack.back()->children.push_back(node);
|
||
if (!node.is_file) {
|
||
// Push a pointer to the newly added child.
|
||
stack.push_back(&stack.back()->children.back());
|
||
}
|
||
}
|
||
return root;
|
||
}
|
||
|
||
// Recursively creates directories and files on disk according to the tree.
|
||
void TreeStructurer::create_node(const TreeNode& node, const fs::path& current_path) {
|
||
fs::path new_path = current_path / node.name;
|
||
try {
|
||
if (node.is_file) {
|
||
// Ensure the parent directory exists.
|
||
fs::path parent = new_path.parent_path();
|
||
if (!fs::exists(parent)) {
|
||
fs::create_directories(parent);
|
||
}
|
||
create_file(new_path);
|
||
} else {
|
||
create_directory(new_path);
|
||
for (const auto& child : node.children) {
|
||
create_node(child, new_path);
|
||
}
|
||
}
|
||
} catch (const fs::filesystem_error& e) {
|
||
throw std::runtime_error("Failed to create path '" + new_path.string() + "': " + e.what());
|
||
}
|
||
}
|
||
|
||
// Returns true if the given line’s last character is a directory marker.
|
||
bool TreeStructurer::is_directory_marker(const std::string& line) {
|
||
return (!line.empty() && line.back() == static_cast<char>(DIRECTORY_MARKER));
|
||
}
|
||
|
||
// Creates a directory (and any necessary parent directories).
|
||
void TreeStructurer::create_directory(const fs::path& path) {
|
||
if (!fs::exists(path)) {
|
||
fs::create_directories(path);
|
||
}
|
||
}
|
||
|
||
// Creates an empty file.
|
||
void TreeStructurer::create_file(const fs::path& path) {
|
||
if (!fs::exists(path)) {
|
||
std::ofstream ofs(path);
|
||
if (!ofs.is_open()) {
|
||
throw std::runtime_error("Failed to create file: " + path.string());
|
||
}
|
||
ofs.close();
|
||
}
|
||
}
|
||
|
||
// Reads a structure file into a vector of non-empty lines, ignoring comments.
|
||
std::vector<std::string> TreeStructurer::read_structure_file(const std::string& filepath) {
|
||
std::vector<std::string> lines;
|
||
// Open file in binary mode to avoid Windows CRLF conversion
|
||
std::ifstream file(filepath, std::ios::binary);
|
||
if (!file.is_open()) {
|
||
throw std::runtime_error("Failed to open file: " + filepath);
|
||
}
|
||
|
||
std::string line;
|
||
|
||
while (std::getline(file, line)) {
|
||
// Remove carriage return if present (Windows files)
|
||
if (!line.empty() && line.back() == '\r') {
|
||
line.pop_back();
|
||
}
|
||
|
||
size_t hash_pos = line.find('#');
|
||
size_t single_line_comment_pos = line.find("//");
|
||
size_t multi_line_comment_start_pos = line.find("/*");
|
||
|
||
if (hash_pos != std::string::npos) {
|
||
// Trim the line at the hash comment
|
||
line = line.substr(0, hash_pos);
|
||
} else if (single_line_comment_pos != std::string::npos) {
|
||
// Trim the line at the single-line comment
|
||
line = line.substr(0, single_line_comment_pos);
|
||
} else if (multi_line_comment_start_pos != std::string::npos) {
|
||
// Trim the line at the multi-line comment start
|
||
line = line.substr(0, multi_line_comment_start_pos);
|
||
}
|
||
|
||
// Remove leading and trailing whitespace
|
||
line.erase(0, line.find_first_not_of(" \t\n\r\f\v"));
|
||
line.erase(line.find_last_not_of(" \t\n\r\f\v") + 1);
|
||
|
||
if (!line.empty()) {
|
||
lines.push_back(line);
|
||
} else {
|
||
}
|
||
}
|
||
|
||
return lines;
|
||
}
|
||
// Checks the structure for obvious mistakes (e.g. a jump in indentation).
|
||
void TreeStructurer::validate_structure(const std::vector<std::string>& lines) {
|
||
if (lines.empty()) {
|
||
throw std::runtime_error("Empty structure provided");
|
||
}
|
||
size_t prev_indent = 0;
|
||
for (size_t i = 0; i < lines.size(); i++) {
|
||
const auto& line = lines[i];
|
||
if (line.empty()) continue;
|
||
|
||
size_t indent = get_indent_level(line);
|
||
if (indent > prev_indent + 1) {
|
||
throw std::runtime_error(
|
||
"Invalid indentation jump at line " + std::to_string(i + 1) +
|
||
": from level " + std::to_string(prev_indent) +
|
||
" to " + std::to_string(indent)
|
||
);
|
||
}
|
||
prev_indent = indent;
|
||
}
|
||
}
|
||
|
||
// Removes any disallowed characters from a node name (here we allow printable ASCII and '/').
|
||
std::string TreeStructurer::sanitize_path(const std::string& path) {
|
||
std::string result;
|
||
for (char c : path) {
|
||
if ((c >= 32 && c <= 126) || c == '/') {
|
||
result.push_back(c);
|
||
}
|
||
}
|
||
return result;
|
||
}
|