debug prints to find out at which indent we are falling, uft 8 error back
Gitea Actions For Tree-Structurer / Explore-Gitea-Actions (push) Failing after 23s Details

This commit is contained in:
Falko Victor Habel 2025-02-08 18:02:50 +01:00
parent 0d01f188e7
commit 2a2c13bddf
2 changed files with 246 additions and 252 deletions

View File

@ -1,9 +1,12 @@
#include "tree_structurer.hpp"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <functional>
#include <iostream>
namespace fs = std::filesystem;
@ -13,6 +16,7 @@ bool TreeStructurer::should_ignore_dir(const std::string& dirname) {
".git", ".idea", ".vscode", "__pycache__", "**pycache**"
};
// Allow these Python files even if their names start with underscores.
if (!dirname.empty() && (dirname == "__main__.py" || dirname == "__init__.py")) {
return false;
}
@ -111,289 +115,278 @@ std::vector<fs::path> TreeStructurer::get_filtered_paths(const fs::path& start)
return paths;
}
// -----------------------------------------------------------------------------
// Directory Structure Generation (tree printing)
// -----------------------------------------------------------------------------
std::vector<std::string> TreeStructurer::get_directory_structure(const std::string& startpath) {
std::vector<std::string> result;
std::string normalized_path = startpath;
if (startpath.substr(0, 2) == ".\\" || startpath.substr(0, 2) == "./") {
normalized_path = startpath.substr(2);
}
fs::path start = normalized_path.empty() ? fs::current_path() : fs::path(normalized_path);
try {
auto paths = get_filtered_paths(start);
if (paths.empty()) {
throw std::runtime_error("No valid files or directories found in: " + start.string());
}
std::vector<bool> is_last_at_level(256, false);
for (size_t i = 1; i < paths.size(); ++i) {
const auto& path = paths[i];
std::string rel_path = get_relative_path(path, start);
int level = std::count(rel_path.begin(), rel_path.end(), fs::path::preferred_separator);
bool is_last = true;
for (size_t j = i + 1; j < paths.size(); ++j) {
std::string next_rel_path = get_relative_path(paths[j], start);
int next_level = std::count(next_rel_path.begin(), next_rel_path.end(), fs::path::preferred_separator);
if (next_level == level) {
is_last = false;
break;
}
if (next_level < level) {
break;
}
}
is_last_at_level[level] = is_last;
std::string line;
for (int j = 0; j < level; ++j) {
if (j == level - 1) {
line += is_last ? "└── " : "├── ";
} else {
line += is_last_at_level[j] ? " " : "";
}
}
line += path.filename().string();
if (fs::is_directory(path)) {
line += "/";
}
result.push_back(line);
}
} catch (const fs::filesystem_error& e) {
throw std::runtime_error("Failed to access directory: " + std::string(e.what()));
std::vector<std::string> lines;
fs::path start(startpath);
if (!fs::exists(start)) {
throw std::runtime_error("Path does not exist: " + startpath);
}
return result;
// Recursive lambda that traverses the filesystem.
std::function<void(const fs::path&, const std::string&, bool)> traverse;
traverse = [&](const fs::path& path, const std::string& indent, bool isLast) {
std::string line;
if (indent.empty()) {
// For the root we simply output the name (appending a directory marker if needed)
line = path.filename().string();
} else {
// If not at the root, prepend a branch marker.
std::string branch = isLast ?
(std::string(1, static_cast<char>(TREE_CORNER)) + "── ") :
(std::string(1, static_cast<char>(TREE_BRANCH)) + "── ");
line = indent + branch + path.filename().string();
}
if (fs::is_directory(path)) {
line.push_back(static_cast<char>(DIRECTORY_MARKER));
}
lines.push_back(line);
if (fs::is_directory(path)) {
// Collect and sort the children.
std::vector<fs::path> children;
for (auto& entry : fs::directory_iterator(path)) {
std::string filename = entry.path().filename().string();
if (entry.is_directory() && should_ignore_dir(filename))
continue;
if (!entry.is_directory() && should_ignore_file(filename))
continue;
children.push_back(entry.path());
}
std::sort(children.begin(), children.end(), [](const fs::path& a,
const fs::path& b) {
return a.filename().string() < b.filename().string();
});
// Recurse into children.
for (size_t i = 0; i < children.size(); i++) {
bool last = (i == children.size() - 1);
// New indent: if we are not at the root then extend the current indent.
std::string newIndent = indent;
if (!indent.empty()) {
newIndent += isLast ? " " : u8"";
}
traverse(children[i], newIndent, last);
}
}
};
// Start the traversal with an empty indent.
traverse(start, "", true);
return lines;
}
//
// UPDATED: Instead of processing the lines “on the fly,” we now build the tree and then create the project.
// This also allows the same parsing logic to be reused by create_structure_from_string().
//
void TreeStructurer::create_structure_from_file(const std::string& filepath, const std::string& target_path) {
// -----------------------------------------------------------------------------
// Structure Creation from a Tree-like File or String
// -----------------------------------------------------------------------------
void TreeStructurer::create_structure_from_file(const std::string& filepath,
const std::string& target_path) {
std::vector<std::string> lines = read_structure_file(filepath);
validate_structure(lines);
TreeNode root = build_tree_from_lines(lines);
create_node(root, target_path);
create_node(root, fs::path(target_path));
}
void TreeStructurer::create_structure_from_string(const std::string& structure, const std::string& target_path) {
std::istringstream iss(structure);
void TreeStructurer::create_structure_from_string(const std::string& structure,
const std::string& target_path) {
std::vector<std::string> lines;
std::istringstream iss(structure);
std::string line;
while (std::getline(iss, line)) {
if (!line.empty()) {
lines.push_back(line);
}
}
validate_structure(lines);
TreeNode root = build_tree_from_lines(lines);
create_node(root, target_path);
create_node(root, fs::path(target_path));
}
//
// UPDATED: Now using a Unicodeaware (tokenbased) indent calculation.
//
void TreeStructurer::validate_structure(const std::vector<std::string>& lines) {
if (lines.empty()) {
throw std::runtime_error("Empty structure provided");
}
int prev_indent = -1;
for (const auto& line : lines) {
int current_indent = static_cast<int>(get_indent_level(line));
if (current_indent > prev_indent + 1) {
throw std::runtime_error("Invalid indentation level in structure");
}
prev_indent = current_indent;
}
}
//
// UPDATED: Parse the indent level by looking for 4character tokens.
//
size_t TreeStructurer::get_indent_level(const std::string& line) {
size_t level = 0;
size_t pos = 0;
// Look for “blank” or vertical bar segments (each 4 characters)
while (pos + 4 <= line.size() && (line.compare(pos, 4, " ") == 0 || line.compare(pos, 4, "") == 0)) {
level++;
pos += 4;
}
// Then look for the branch indicator at this level
if (pos + 4 <= line.size() && (line.compare(pos, 4, "└── ") == 0 || line.compare(pos, 4, "├── ") == 0)) {
level++;
}
return level;
}
//
// UPDATED: Build a tree of nodes from the structure lines using the same tokenbased parsing.
// The first (root) line is expected to be a directory (with a trailing '/').
TreeStructurer::TreeNode TreeStructurer::build_tree_from_lines(const std::vector<std::string>& lines) {
if (lines.empty()) {
throw std::runtime_error("Empty structure provided");
}
// Process the first line as the root.
std::string first_line = lines[0];
size_t pos = 0;
// Skip any indent tokens (if any)
while (pos + 4 <= first_line.size() && (first_line.compare(pos, 4, " ") == 0 || first_line.compare(pos, 4, "") == 0)) {
pos += 4;
}
if (pos + 4 <= first_line.size() && (first_line.compare(pos, 4, "└── ") == 0 || first_line.compare(pos, 4, "├── ") == 0)) {
pos += 4;
}
std::string root_name = first_line.substr(pos);
if (root_name.empty() || root_name.back() != DIRECTORY_MARKER) {
throw std::runtime_error("Root must be a directory (ending with '" + std::string(1, DIRECTORY_MARKER) + "')");
}
root_name.pop_back(); // Remove trailing '/'
TreeNode root{root_name, false, {}};
std::vector<TreeNode*> stack;
stack.push_back(&root);
// Process remaining lines.
for (size_t i = 1; i < lines.size(); ++i) {
const std::string& line = lines[i];
if (line.empty()) continue;
size_t pos = 0;
size_t current_level = 0;
// Process any “blank” or vertical indent segments (4 characters each)
while (pos + 4 <= line.size() && (line.compare(pos, 4, " ") == 0 || line.compare(pos, 4, "") == 0)) {
current_level++;
pos += 4;
}
// Process the branch token if present
if (pos + 4 <= line.size() && (line.compare(pos, 4, "└── ") == 0 || line.compare(pos, 4, "├── ") == 0)) {
current_level++;
pos += 4;
}
std::string name = line.substr(pos);
if (name.empty()) continue;
bool is_file = true;
if (name.back() == DIRECTORY_MARKER) {
is_file = false;
name.pop_back();
}
name = sanitize_path(name);
if (name.empty()) continue;
// Adjust the stack to the proper level.
while (stack.size() > current_level) {
stack.pop_back();
}
if (stack.empty()) {
throw std::runtime_error("Invalid indentation structure in the file.");
}
TreeNode new_node{name, is_file, {}};
stack.back()->children.push_back(new_node);
if (!is_file) {
// For a directory, push the new node onto the stack.
stack.push_back(&stack.back()->children.back());
}
}
return root;
}
void TreeStructurer::create_node(const TreeNode& node, const std::filesystem::path& current_path) {
std::filesystem::path new_path = current_path / node.name;
if (node.is_file) {
// Create file
std::filesystem::path parent_path = new_path.parent_path();
if (!std::filesystem::exists(parent_path)) {
std::filesystem::create_directories(parent_path);
}
create_file(new_path);
} else {
// Create directory
create_directory(new_path);
// Recursively create children
for (const auto& child : node.children) {
create_node(child, new_path);
}
}
}
void TreeStructurer::create_directory(const std::filesystem::path& path) {
if (!std::filesystem::exists(path)) {
std::filesystem::create_directories(path);
}
}
void TreeStructurer::create_file(const std::filesystem::path& path) {
if (!std::filesystem::exists(path)) {
std::ofstream file(path);
if (!file.is_open()) {
throw std::runtime_error("Failed to create file: " + path.string());
}
file.close();
}
}
std::string TreeStructurer::sanitize_path(const std::string& path) {
std::string result;
for (char c : path) {
// Allow alphanumeric, common punctuation, the directory separator, and our tree tokens.
if (std::isalnum(static_cast<unsigned char>(c)) ||
c == '.' || c == '_' || c == '-' || c == '/' ||
c == TREE_PIPE || c == TREE_BRANCH || c == TREE_CORNER || c == TREE_DASH) {
result += c;
}
}
return result;
}
TreeStructurer::TreeNode TreeStructurer::parse_structure_line(const std::string& line, size_t indent_level) {
size_t name_start = line.find_first_not_of(" │├└─");
if (name_start == std::string::npos) {
return TreeNode{"", true, {}};
}
std::string name = line.substr(name_start);
name = sanitize_path(name);
bool is_file = !name.empty() && name.back() != DIRECTORY_MARKER;
if (!is_file) {
name.pop_back();
}
return TreeNode{name, is_file, {}};
}
bool TreeStructurer::is_directory_marker(const std::string& line) {
return !line.empty() && line.back() == DIRECTORY_MARKER;
}
// -----------------------------------------------------------------------------
// Private Helper Functions (no duplicates)
// -----------------------------------------------------------------------------
// Returns a string consisting of (level * 4) spaces.
std::string TreeStructurer::create_indent(size_t level) {
return std::string(level * 4, ' ');
}
// Determines the "indent level" of a line by scanning it in 4character groups.
// Recognizes either a blank indent (" " or "│ ") or a branch marker ("├── " or "└── ").
size_t TreeStructurer::get_indent_level(const std::string& line) {
size_t indent = 0;
size_t pos = 0;
while (pos + 4 <= line.size()) {
std::string group = line.substr(pos, 4);
if (group == " " || group == u8"") {
indent++;
pos += 4;
continue;
}
if (group == u8"├── " || group == u8"└── ") {
indent++;
break;
}
break;
}
return indent;
}
// Parses a single line of the structure (after knowing its indent level) and returns a TreeNode.
// The function "consumes" indent groups until the branch marker.
TreeStructurer::TreeNode TreeStructurer::parse_structure_line(const std::string& line,
size_t indent_level) {
size_t pos = 0;
size_t groups = 0;
while (pos + 4 <= line.size() && groups < indent_level) {
std::string group = line.substr(pos, 4);
if (group == " " || group == u8"") {
pos += 4;
groups++;
continue;
}
if (group == u8"├── " || group == u8"└── ") {
pos += 4;
groups++;
break;
}
break;
}
std::string name = line.substr(pos);
name = sanitize_path(name);
bool is_file = true;
if (!name.empty() && name.back() == static_cast<char>(DIRECTORY_MARKER)) {
is_file = false;
name.pop_back(); // Remove the directory marker.
}
return TreeNode{name, is_file, {}};
}
// Builds a tree (with TreeNode nodes) from the vector of structure lines.
// The first line is assumed to be the root.
TreeStructurer::TreeNode TreeStructurer::build_tree_from_lines(const std::vector<std::string>& lines) {
if (lines.empty()) {
throw std::runtime_error("Empty structure provided");
}
// Process the first line as the root.
TreeNode root = parse_structure_line(lines[0], 0);
if (root.is_file) {
throw std::runtime_error("Root must be a directory");
}
std::vector<TreeNode*> stack;
stack.push_back(&root);
// Process each subsequent line.
for (size_t i = 1; i < lines.size(); ++i) {
size_t indent = get_indent_level(lines[i]);
TreeNode node = parse_structure_line(lines[i], indent);
if (indent > stack.size()) {
throw std::runtime_error("Invalid indentation structure in the file");
}
while (stack.size() > indent) {
stack.pop_back();
}
if (stack.empty()) {
throw std::runtime_error("Invalid indentation structure in the file");
}
stack.back()->children.push_back(node);
if (!node.is_file) {
// Push a pointer to the newly added child.
stack.push_back(&stack.back()->children.back());
}
}
return root;
}
// Recursively creates directories and files on disk according to the tree.
void TreeStructurer::create_node(const TreeNode& node, const fs::path& current_path) {
fs::path new_path = current_path / node.name;
try {
if (node.is_file) {
// Ensure the parent directory exists.
fs::path parent = new_path.parent_path();
if (!fs::exists(parent)) {
fs::create_directories(parent);
}
create_file(new_path);
} else {
create_directory(new_path);
for (const auto& child : node.children) {
create_node(child, new_path);
}
}
} catch (const fs::filesystem_error& e) {
throw std::runtime_error("Failed to create path '" + new_path.string() + "': " + e.what());
}
}
// Returns true if the given lines last character is a directory marker.
bool TreeStructurer::is_directory_marker(const std::string& line) {
return (!line.empty() && line.back() == static_cast<char>(DIRECTORY_MARKER));
}
// Creates a directory (and any necessary parent directories).
void TreeStructurer::create_directory(const fs::path& path) {
if (!fs::exists(path)) {
fs::create_directories(path);
}
}
// Creates an empty file.
void TreeStructurer::create_file(const fs::path& path) {
if (!fs::exists(path)) {
std::ofstream ofs(path);
if (!ofs.is_open()) {
throw std::runtime_error("Failed to create file: " + path.string());
}
ofs.close();
}
}
// Reads a structure file into a vector of non-empty lines.
std::vector<std::string> TreeStructurer::read_structure_file(const std::string& filepath) {
std::vector<std::string> lines;
std::ifstream file(filepath);
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + filepath);
}
std::string line;
while (std::getline(file, line)) {
if (!line.empty()) {
lines.push_back(line);
}
}
return lines;
}
// Checks the structure for obvious mistakes (e.g. a jump in indentation).
void TreeStructurer::validate_structure(const std::vector<std::string>& lines) {
if (lines.empty()) {
throw std::runtime_error("Empty structure provided");
}
size_t prev_indent = 0;
for (const auto& line : lines) {
if (line.empty()) continue;
size_t indent = get_indent_level(line);
std::cout << "Line: \"" << line << "\" Indent level: " << indent << std::endl;
if (indent > prev_indent + 1) {
throw std::runtime_error("Invalid indentation structure in the file");
}
prev_indent = indent;
}
}
// Removes any disallowed characters from a node name (here we allow printable ASCII and '/').
std::string TreeStructurer::sanitize_path(const std::string& path) {
std::string result;
for (char c : path) {
if ((c >= 32 && c <= 126) || c == '/') {
result.push_back(c);
}
}
return result;
}

View File

@ -38,9 +38,10 @@ private:
void validate_structure(const std::vector<std::string>& lines);
std::string sanitize_path(const std::string& path);
static const char DIRECTORY_MARKER = '/';
static const char TREE_PIPE = '';
static const char TREE_BRANCH = '';
static const char TREE_CORNER = '';
static const char TREE_DASH = '';
};
// Update the constants to use wide characters
static const wchar_t DIRECTORY_MARKER = L'/';
static const wchar_t TREE_PIPE = L'';
static const wchar_t TREE_BRANCH = L'';
static const wchar_t TREE_CORNER = L'';
static const wchar_t TREE_DASH = L'';
};