prodir/src/prodir/cpp/tree_structurer.cpp

542 lines
20 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "tree_structurer.hpp"
#include <map>
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <functional>
#include <iostream>
namespace fs = std::filesystem;
bool TreeStructurer::should_ignore_dir(const std::string& dirname) {
static const std::vector<std::string> ignore_list = {
"build", "venv", "myenv", "dist", "node_modules", "CMakeFiles",
".git", ".idea", ".vscode", "__pycache__", "**pycache**"
};
// Allow these Python files even if their names start with underscores.
if (!dirname.empty() && (dirname == "__main__.py" || dirname == "__init__.py")) {
return false;
}
if (std::find(ignore_list.begin(), ignore_list.end(), dirname) != ignore_list.end()) {
return true;
}
if (!dirname.empty()) {
if (dirname[0] == '.' || dirname[0] == '_') {
return true;
}
if (dirname.find("__") == 0 && dirname.find("__", 2) != std::string::npos) {
return true;
}
}
return false;
}
bool TreeStructurer::should_ignore_file(const std::string& filename) {
static const std::vector<std::string> ignore_extensions = {
".pyc", ".pyo", ".pyd", ".so", ".dll", ".dylib",
".o", ".obj", ".a", ".lib"
};
if (!filename.empty() && (filename == "__main__.py" || filename == "__init__.py")) {
return false;
}
if (!filename.empty() && (filename[0] == '.' || filename[0] == '_')) {
return true;
}
fs::path path(filename);
std::string ext = path.extension().string();
if (std::find(ignore_extensions.begin(), ignore_extensions.end(), ext) != ignore_extensions.end()) {
return true;
}
return false;
}
std::string TreeStructurer::get_relative_path(const fs::path& path, const fs::path& base) {
fs::path rel = fs::relative(path, base);
return rel.string();
}
std::vector<fs::path> TreeStructurer::get_filtered_paths(const fs::path& start) {
std::vector<fs::path> paths;
fs::directory_options options = fs::directory_options::skip_permission_denied;
try {
if (!fs::exists(start)) {
throw std::runtime_error("Directory does not exist: " + start.string());
}
if (!fs::is_directory(start)) {
throw std::runtime_error("Path is not a directory: " + start.string());
}
paths.push_back(start);
bool is_empty = fs::directory_iterator(start) == fs::directory_iterator();
if (is_empty) {
throw std::runtime_error("Directory is empty: " + start.string());
}
for (const auto& entry : fs::recursive_directory_iterator(start, options)) {
const auto& path = entry.path();
bool should_skip = false;
for (const auto& component : path) {
if (should_ignore_dir(component.string())) {
should_skip = true;
break;
}
}
if (should_skip) continue;
if (entry.is_directory()) {
if (!should_ignore_dir(path.filename().string())) {
paths.push_back(path);
}
} else {
if (!should_ignore_file(path.filename().string())) {
paths.push_back(path);
}
}
}
} catch (const fs::filesystem_error& e) {
throw std::runtime_error("Error accessing path: " + std::string(e.what()));
}
std::sort(paths.begin(), paths.end());
return paths;
}
// -----------------------------------------------------------------------------
// Directory Structure Generation (tree printing)
// -----------------------------------------------------------------------------
std::vector<std::string> TreeStructurer::get_directory_structure(const std::string& startpath) {
std::vector<std::string> result;
std::string normalized_path = startpath;
if (normalized_path.size() >= 2 &&
(normalized_path.substr(0, 2) == ".\\" || normalized_path.substr(0, 2) == "./")) {
normalized_path = normalized_path.substr(2);
}
fs::path start = normalized_path.empty() ? fs::current_path() : fs::path(normalized_path);
try {
if (!fs::exists(start)) {
throw std::runtime_error("Directory does not exist: " + start.string());
}
if (!fs::is_directory(start)) {
throw std::runtime_error("Path is not a directory: " + start.string());
}
std::vector<fs::path> paths = get_filtered_paths(start);
std::vector<bool> is_last_per_level;
// Skip the first path as it's the root
for (size_t i = 1; i < paths.size(); ++i) {
fs::path relative = fs::relative(paths[i], start);
std::vector<std::string> components;
for (const auto& comp : relative) {
components.push_back(comp.string());
}
// Calculate the current level
size_t level = components.size() - 1;
// Adjust is_last_per_level vector size
while (is_last_per_level.size() <= level) {
is_last_per_level.push_back(false);
}
// Build the line prefix
std::string line;
for (size_t j = 0; j <= level; ++j) {
if (j == level) {
// This is the connector for the current item
bool is_last_sibling = true;
// Look ahead to find next sibling at the same level
for (size_t k = i + 1; k < paths.size(); ++k) {
fs::path next_relative = fs::relative(paths[k], start);
std::vector<std::string> next_components;
for (const auto& comp : next_relative) {
next_components.push_back(comp.string());
}
// Check if it's a sibling (same parent, same level)
if (next_components.size() == components.size()) {
bool same_parent = true;
for (size_t l = 0; l < level; ++l) {
if (l >= next_components.size() || components[l] != next_components[l]) {
same_parent = false;
break;
}
}
if (same_parent) {
is_last_sibling = false;
break;
}
}
}
line += is_last_sibling ? "└── " : "├── ";
} else {
// This is a vertical line for parent levels
bool needs_vertical_line = false;
// Check if there are future items that share this ancestor
for (size_t k = i + 1; k < paths.size(); ++k) {
fs::path next_relative = fs::relative(paths[k], start);
std::vector<std::string> next_components;
for (const auto& comp : next_relative) {
next_components.push_back(comp.string());
}
// If next item shares the same path up to level j
if (next_components.size() > j) {
bool shares_ancestor = true;
for (size_t l = 0; l <= j; ++l) {
if (l >= next_components.size() || l >= components.size() ||
components[l] != next_components[l]) {
shares_ancestor = false;
break;
}
}
if (shares_ancestor) {
needs_vertical_line = true;
break;
}
}
}
line += needs_vertical_line ? "" : " ";
}
}
// Add the file/directory name
line += components.back();
if (fs::is_directory(paths[i])) {
line += "/";
}
result.push_back(line);
}
} catch (const fs::filesystem_error& e) {
throw std::runtime_error("Failed to access directory: " + std::string(e.what()));
}
return result;
}
// -----------------------------------------------------------------------------
// Structure Creation from a Tree-like File or String
// -----------------------------------------------------------------------------
void TreeStructurer::create_structure_from_file(const std::string& filepath,
const std::string& target_path) {
std::vector<std::string> lines = read_structure_file(filepath);
validate_structure(lines);
TreeNode root = build_tree_from_lines(lines);
fs::path target(target_path);
// Check if the root directory name matches the target directory name
if (root.name == target.filename().string()) {
// If names match, create the children directly in the target directory
for (const auto& child : root.children) {
create_node(child, target);
}
} else {
// If names don't match, create the full structure including root
create_node(root, target);
}
}
// -----------------------------------------------------------------------------
// Private Helper Functions (no duplicates)
// -----------------------------------------------------------------------------
// Returns a string consisting of (level * 4) spaces.
std::string TreeStructurer::create_indent(size_t level) {
return std::string(level * 4, ' ');
}
// Determines the "indent level" of a line by scanning it in 4character groups.
// Recognizes either a blank indent (" " or "│ ") or a branch marker ("├── " or "└── ").
size_t TreeStructurer::get_indent_level(const std::string& line) {
size_t indent = 0;
size_t pos = 0;
while (pos < line.length()) {
if (pos >= line.length()) break;
// Check for basic space indent (4 spaces)
if (pos + 3 < line.length() && line.substr(pos, 4) == " ") {
indent++;
pos += 4;
continue;
}
// Check for │ (vertical line) followed by 3 spaces
// Bytes: E2 94 82 20 20 20
if (pos + 5 < line.length() &&
static_cast<unsigned char>(line[pos]) == 0xE2 &&
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
static_cast<unsigned char>(line[pos + 2]) == 0x82 &&
line[pos + 3] == ' ' &&
line[pos + 4] == ' ' &&
line[pos + 5] == ' ') {
indent++;
pos += 6;
continue;
}
// Check for ├── or └── (branch or corner followed by dashes and space)
// ├ = E2 94 9C
// └ = E2 94 94
// ─ = E2 94 80
if (pos + 8 < line.length() &&
static_cast<unsigned char>(line[pos]) == 0xE2 &&
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
(static_cast<unsigned char>(line[pos + 2]) == 0x9C || // ├
static_cast<unsigned char>(line[pos + 2]) == 0x94) && // └
static_cast<unsigned char>(line[pos + 3]) == 0xE2 &&
static_cast<unsigned char>(line[pos + 4]) == 0x94 &&
static_cast<unsigned char>(line[pos + 5]) == 0x80 && // ─
static_cast<unsigned char>(line[pos + 6]) == 0xE2 &&
static_cast<unsigned char>(line[pos + 7]) == 0x94 &&
static_cast<unsigned char>(line[pos + 8]) == 0x80 && // ─
(pos + 9 >= line.length() || line[pos + 9] == ' ')) {
indent++;
break; // We've found our indent marker, stop here
}
// If we get here without finding a valid indent pattern, we're done
break;
}
return indent;
}
// Parses a single line of the structure (after knowing its indent level) and returns a TreeNode.
// The function "consumes" indent groups until the branch marker.
TreeStructurer::TreeNode TreeStructurer::parse_structure_line(const std::string& line, size_t indent_level) {
size_t pos = 0;
size_t current_indent = 0;
// Skip through indentation patterns
while (current_indent < indent_level && pos < line.length()) {
// Check for basic space indent
if (pos + 3 < line.length() && line.substr(pos, 4) == " ") {
pos += 4;
current_indent++;
continue;
}
// Check for │ followed by spaces
if (pos + 5 < line.length() &&
static_cast<unsigned char>(line[pos]) == 0xE2 &&
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
static_cast<unsigned char>(line[pos + 2]) == 0x82 &&
line[pos + 3] == ' ' &&
line[pos + 4] == ' ' &&
line[pos + 5] == ' ') {
pos += 6;
current_indent++;
continue;
}
// Check for ├── or └── pattern
if (pos + 9 < line.length() &&
static_cast<unsigned char>(line[pos]) == 0xE2 &&
static_cast<unsigned char>(line[pos + 1]) == 0x94 &&
(static_cast<unsigned char>(line[pos + 2]) == 0x9C ||
static_cast<unsigned char>(line[pos + 2]) == 0x94)) {
pos += 10; // Skip the entire pattern including space
current_indent++;
break;
}
pos++;
}
// Extract the name (everything after the indentation)
std::string name = line.substr(pos);
name = sanitize_path(name);
bool is_file = true;
if (!name.empty() && name.back() == '/') {
is_file = false;
name.pop_back();
}
return TreeNode{name, is_file, {}};
}
// Builds a tree (with TreeNode nodes) from the vector of structure lines.
// The first line is assumed to be the root.
TreeStructurer::TreeNode TreeStructurer::build_tree_from_lines(const std::vector<std::string>& lines) {
if (lines.empty()) {
throw std::runtime_error("Empty structure provided");
}
// Process the first line as the root.
TreeNode root = parse_structure_line(lines[0], 0);
if (root.is_file) {
throw std::runtime_error("Root must be a directory");
}
std::vector<TreeNode*> stack;
stack.push_back(&root);
// Process each subsequent line.
for (size_t i = 1; i < lines.size(); ++i) {
size_t indent = get_indent_level(lines[i]);
TreeNode node = parse_structure_line(lines[i], indent);
if (indent > stack.size()) {
throw std::runtime_error("Invalid indentation structure in the file");
}
while (stack.size() > indent) {
stack.pop_back();
}
if (stack.empty()) {
throw std::runtime_error("Invalid indentation structure in the file");
}
stack.back()->children.push_back(node);
if (!node.is_file) {
// Push a pointer to the newly added child.
stack.push_back(&stack.back()->children.back());
}
}
return root;
}
// Recursively creates directories and files on disk according to the tree.
void TreeStructurer::create_node(const TreeNode& node, const fs::path& current_path) {
fs::path new_path = current_path / node.name;
try {
if (node.is_file) {
// Ensure the parent directory exists.
fs::path parent = new_path.parent_path();
if (!fs::exists(parent)) {
fs::create_directories(parent);
}
create_file(new_path);
} else {
create_directory(new_path);
for (const auto& child : node.children) {
create_node(child, new_path);
}
}
} catch (const fs::filesystem_error& e) {
throw std::runtime_error("Failed to create path '" + new_path.string() + "': " + e.what());
}
}
// Returns true if the given lines last character is a directory marker.
bool TreeStructurer::is_directory_marker(const std::string& line) {
return (!line.empty() && line.back() == static_cast<char>(DIRECTORY_MARKER));
}
// Creates a directory (and any necessary parent directories).
void TreeStructurer::create_directory(const fs::path& path) {
if (!fs::exists(path)) {
fs::create_directories(path);
}
}
// Creates an empty file.
void TreeStructurer::create_file(const fs::path& path) {
if (!fs::exists(path)) {
std::ofstream ofs(path);
if (!ofs.is_open()) {
throw std::runtime_error("Failed to create file: " + path.string());
}
ofs.close();
}
}
// Reads a structure file into a vector of non-empty lines, ignoring comments.
std::vector<std::string> TreeStructurer::read_structure_file(const std::string& filepath) {
std::vector<std::string> lines;
// Open file in binary mode to avoid Windows CRLF conversion
std::ifstream file(filepath, std::ios::binary);
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + filepath);
}
std::string line;
while (std::getline(file, line)) {
// Remove carriage return if present (Windows files)
if (!line.empty() && line.back() == '\r') {
line.pop_back();
}
size_t hash_pos = line.find('#');
size_t single_line_comment_pos = line.find("//");
size_t multi_line_comment_start_pos = line.find("/*");
if (hash_pos != std::string::npos) {
// Trim the line at the hash comment
line = line.substr(0, hash_pos);
} else if (single_line_comment_pos != std::string::npos) {
// Trim the line at the single-line comment
line = line.substr(0, single_line_comment_pos);
} else if (multi_line_comment_start_pos != std::string::npos) {
// Trim the line at the multi-line comment start
line = line.substr(0, multi_line_comment_start_pos);
}
// Remove leading and trailing whitespace
line.erase(0, line.find_first_not_of(" \t\n\r\f\v"));
line.erase(line.find_last_not_of(" \t\n\r\f\v") + 1);
if (!line.empty()) {
lines.push_back(line);
} else {
}
}
return lines;
}
// Checks the structure for obvious mistakes (e.g. a jump in indentation).
void TreeStructurer::validate_structure(const std::vector<std::string>& lines) {
if (lines.empty()) {
throw std::runtime_error("Empty structure provided");
}
size_t prev_indent = 0;
for (size_t i = 0; i < lines.size(); i++) {
const auto& line = lines[i];
if (line.empty()) continue;
size_t indent = get_indent_level(line);
if (indent > prev_indent + 1) {
throw std::runtime_error(
"Invalid indentation jump at line " + std::to_string(i + 1) +
": from level " + std::to_string(prev_indent) +
" to " + std::to_string(indent)
);
}
prev_indent = indent;
}
}
// Removes any disallowed characters from a node name (here we allow printable ASCII and '/').
std::string TreeStructurer::sanitize_path(const std::string& path) {
std::string result;
for (char c : path) {
if ((c >= 32 && c <= 126) || c == '/') {
result.push_back(c);
}
}
return result;
}