diff --git a/src/prodir/cpp/tree_structurer.cpp b/src/prodir/cpp/tree_structurer.cpp index baae534..0558e06 100644 --- a/src/prodir/cpp/tree_structurer.cpp +++ b/src/prodir/cpp/tree_structurer.cpp @@ -1,9 +1,12 @@ #include "tree_structurer.hpp" + #include #include #include #include #include +#include +#include namespace fs = std::filesystem; @@ -13,6 +16,7 @@ bool TreeStructurer::should_ignore_dir(const std::string& dirname) { ".git", ".idea", ".vscode", "__pycache__", "**pycache**" }; + // Allow these Python files even if their names start with underscores. if (!dirname.empty() && (dirname == "__main__.py" || dirname == "__init__.py")) { return false; } @@ -111,289 +115,278 @@ std::vector TreeStructurer::get_filtered_paths(const fs::path& start) return paths; } +// ----------------------------------------------------------------------------- +// Directory Structure Generation (tree printing) +// ----------------------------------------------------------------------------- + std::vector TreeStructurer::get_directory_structure(const std::string& startpath) { - std::vector result; - - std::string normalized_path = startpath; - if (startpath.substr(0, 2) == ".\\" || startpath.substr(0, 2) == "./") { - normalized_path = startpath.substr(2); - } - - fs::path start = normalized_path.empty() ? fs::current_path() : fs::path(normalized_path); - - try { - auto paths = get_filtered_paths(start); - if (paths.empty()) { - throw std::runtime_error("No valid files or directories found in: " + start.string()); - } - std::vector is_last_at_level(256, false); - - for (size_t i = 1; i < paths.size(); ++i) { - const auto& path = paths[i]; - std::string rel_path = get_relative_path(path, start); - - int level = std::count(rel_path.begin(), rel_path.end(), fs::path::preferred_separator); - - bool is_last = true; - for (size_t j = i + 1; j < paths.size(); ++j) { - std::string next_rel_path = get_relative_path(paths[j], start); - int next_level = std::count(next_rel_path.begin(), next_rel_path.end(), fs::path::preferred_separator); - if (next_level == level) { - is_last = false; - break; - } - if (next_level < level) { - break; - } - } - - is_last_at_level[level] = is_last; - - std::string line; - for (int j = 0; j < level; ++j) { - if (j == level - 1) { - line += is_last ? "└── " : "├── "; - } else { - line += is_last_at_level[j] ? " " : "│ "; - } - } - - line += path.filename().string(); - if (fs::is_directory(path)) { - line += "/"; - } - result.push_back(line); - } - } catch (const fs::filesystem_error& e) { - throw std::runtime_error("Failed to access directory: " + std::string(e.what())); + std::vector lines; + fs::path start(startpath); + if (!fs::exists(start)) { + throw std::runtime_error("Path does not exist: " + startpath); } - return result; + // Recursive lambda that traverses the filesystem. + std::function traverse; + traverse = [&](const fs::path& path, const std::string& indent, bool isLast) { + std::string line; + if (indent.empty()) { + // For the root we simply output the name (appending a directory marker if needed) + line = path.filename().string(); + } else { + // If not at the root, prepend a branch marker. + std::string branch = isLast ? + (std::string(1, static_cast(TREE_CORNER)) + "── ") : + (std::string(1, static_cast(TREE_BRANCH)) + "── "); + line = indent + branch + path.filename().string(); + } + if (fs::is_directory(path)) { + line.push_back(static_cast(DIRECTORY_MARKER)); + } + lines.push_back(line); + + if (fs::is_directory(path)) { + // Collect and sort the children. + std::vector children; + for (auto& entry : fs::directory_iterator(path)) { + std::string filename = entry.path().filename().string(); + if (entry.is_directory() && should_ignore_dir(filename)) + continue; + if (!entry.is_directory() && should_ignore_file(filename)) + continue; + children.push_back(entry.path()); + } + std::sort(children.begin(), children.end(), [](const fs::path& a, + const fs::path& b) { + return a.filename().string() < b.filename().string(); + }); + + // Recurse into children. + for (size_t i = 0; i < children.size(); i++) { + bool last = (i == children.size() - 1); + // New indent: if we are not at the root then extend the current indent. + std::string newIndent = indent; + if (!indent.empty()) { + newIndent += isLast ? " " : u8"│ "; + } + traverse(children[i], newIndent, last); + } + } + }; + + // Start the traversal with an empty indent. + traverse(start, "", true); + return lines; } -// -// UPDATED: Instead of processing the lines “on the fly,” we now build the tree and then create the project. -// This also allows the same parsing logic to be reused by create_structure_from_string(). -// -void TreeStructurer::create_structure_from_file(const std::string& filepath, const std::string& target_path) { +// ----------------------------------------------------------------------------- +// Structure Creation from a Tree-like File or String +// ----------------------------------------------------------------------------- + +void TreeStructurer::create_structure_from_file(const std::string& filepath, + const std::string& target_path) { std::vector lines = read_structure_file(filepath); validate_structure(lines); TreeNode root = build_tree_from_lines(lines); - create_node(root, target_path); + create_node(root, fs::path(target_path)); } -void TreeStructurer::create_structure_from_string(const std::string& structure, const std::string& target_path) { - std::istringstream iss(structure); +void TreeStructurer::create_structure_from_string(const std::string& structure, + const std::string& target_path) { std::vector lines; + std::istringstream iss(structure); std::string line; - while (std::getline(iss, line)) { if (!line.empty()) { lines.push_back(line); } } - validate_structure(lines); TreeNode root = build_tree_from_lines(lines); - create_node(root, target_path); + create_node(root, fs::path(target_path)); } -// -// UPDATED: Now using a Unicode‑aware (token‑based) indent calculation. -// -void TreeStructurer::validate_structure(const std::vector& lines) { - if (lines.empty()) { - throw std::runtime_error("Empty structure provided"); - } - - int prev_indent = -1; - for (const auto& line : lines) { - int current_indent = static_cast(get_indent_level(line)); - if (current_indent > prev_indent + 1) { - throw std::runtime_error("Invalid indentation level in structure"); - } - prev_indent = current_indent; - } -} - -// -// UPDATED: Parse the indent level by looking for 4‑character tokens. -// -size_t TreeStructurer::get_indent_level(const std::string& line) { - size_t level = 0; - size_t pos = 0; - // Look for “blank” or vertical bar segments (each 4 characters) - while (pos + 4 <= line.size() && (line.compare(pos, 4, " ") == 0 || line.compare(pos, 4, "│ ") == 0)) { - level++; - pos += 4; - } - // Then look for the branch indicator at this level - if (pos + 4 <= line.size() && (line.compare(pos, 4, "└── ") == 0 || line.compare(pos, 4, "├── ") == 0)) { - level++; - } - return level; -} - -// -// UPDATED: Build a tree of nodes from the structure lines using the same token‐based parsing. -// The first (root) line is expected to be a directory (with a trailing '/'). -TreeStructurer::TreeNode TreeStructurer::build_tree_from_lines(const std::vector& lines) { - if (lines.empty()) { - throw std::runtime_error("Empty structure provided"); - } - - // Process the first line as the root. - std::string first_line = lines[0]; - size_t pos = 0; - // Skip any indent tokens (if any) - while (pos + 4 <= first_line.size() && (first_line.compare(pos, 4, " ") == 0 || first_line.compare(pos, 4, "│ ") == 0)) { - pos += 4; - } - if (pos + 4 <= first_line.size() && (first_line.compare(pos, 4, "└── ") == 0 || first_line.compare(pos, 4, "├── ") == 0)) { - pos += 4; - } - std::string root_name = first_line.substr(pos); - if (root_name.empty() || root_name.back() != DIRECTORY_MARKER) { - throw std::runtime_error("Root must be a directory (ending with '" + std::string(1, DIRECTORY_MARKER) + "')"); - } - root_name.pop_back(); // Remove trailing '/' - - TreeNode root{root_name, false, {}}; - std::vector stack; - stack.push_back(&root); - - // Process remaining lines. - for (size_t i = 1; i < lines.size(); ++i) { - const std::string& line = lines[i]; - if (line.empty()) continue; - size_t pos = 0; - size_t current_level = 0; - // Process any “blank” or vertical indent segments (4 characters each) - while (pos + 4 <= line.size() && (line.compare(pos, 4, " ") == 0 || line.compare(pos, 4, "│ ") == 0)) { - current_level++; - pos += 4; - } - // Process the branch token if present - if (pos + 4 <= line.size() && (line.compare(pos, 4, "└── ") == 0 || line.compare(pos, 4, "├── ") == 0)) { - current_level++; - pos += 4; - } - std::string name = line.substr(pos); - if (name.empty()) continue; - bool is_file = true; - if (name.back() == DIRECTORY_MARKER) { - is_file = false; - name.pop_back(); - } - name = sanitize_path(name); - if (name.empty()) continue; - - // Adjust the stack to the proper level. - while (stack.size() > current_level) { - stack.pop_back(); - } - if (stack.empty()) { - throw std::runtime_error("Invalid indentation structure in the file."); - } - TreeNode new_node{name, is_file, {}}; - stack.back()->children.push_back(new_node); - if (!is_file) { - // For a directory, push the new node onto the stack. - stack.push_back(&stack.back()->children.back()); - } - } - - return root; -} - -void TreeStructurer::create_node(const TreeNode& node, const std::filesystem::path& current_path) { - std::filesystem::path new_path = current_path / node.name; - - if (node.is_file) { - // Create file - std::filesystem::path parent_path = new_path.parent_path(); - if (!std::filesystem::exists(parent_path)) { - std::filesystem::create_directories(parent_path); - } - create_file(new_path); - } else { - // Create directory - create_directory(new_path); - // Recursively create children - for (const auto& child : node.children) { - create_node(child, new_path); - } - } -} - -void TreeStructurer::create_directory(const std::filesystem::path& path) { - if (!std::filesystem::exists(path)) { - std::filesystem::create_directories(path); - } -} - -void TreeStructurer::create_file(const std::filesystem::path& path) { - if (!std::filesystem::exists(path)) { - std::ofstream file(path); - if (!file.is_open()) { - throw std::runtime_error("Failed to create file: " + path.string()); - } - file.close(); - } -} - -std::string TreeStructurer::sanitize_path(const std::string& path) { - std::string result; - for (char c : path) { - // Allow alphanumeric, common punctuation, the directory separator, and our tree tokens. - if (std::isalnum(static_cast(c)) || - c == '.' || c == '_' || c == '-' || c == '/' || - c == TREE_PIPE || c == TREE_BRANCH || c == TREE_CORNER || c == TREE_DASH) { - result += c; - } - } - return result; -} - -TreeStructurer::TreeNode TreeStructurer::parse_structure_line(const std::string& line, size_t indent_level) { - size_t name_start = line.find_first_not_of(" │├└─"); - if (name_start == std::string::npos) { - return TreeNode{"", true, {}}; - } - - std::string name = line.substr(name_start); - name = sanitize_path(name); - - bool is_file = !name.empty() && name.back() != DIRECTORY_MARKER; - if (!is_file) { - name.pop_back(); - } - - return TreeNode{name, is_file, {}}; -} - -bool TreeStructurer::is_directory_marker(const std::string& line) { - return !line.empty() && line.back() == DIRECTORY_MARKER; -} +// ----------------------------------------------------------------------------- +// Private Helper Functions (no duplicates) +// ----------------------------------------------------------------------------- +// Returns a string consisting of (level * 4) spaces. std::string TreeStructurer::create_indent(size_t level) { return std::string(level * 4, ' '); } +// Determines the "indent level" of a line by scanning it in 4‑character groups. +// Recognizes either a blank indent (" " or "│ ") or a branch marker ("├── " or "└── "). +size_t TreeStructurer::get_indent_level(const std::string& line) { + size_t indent = 0; + size_t pos = 0; + while (pos + 4 <= line.size()) { + std::string group = line.substr(pos, 4); + if (group == " " || group == u8"│ ") { + indent++; + pos += 4; + continue; + } + if (group == u8"├── " || group == u8"└── ") { + indent++; + break; + } + break; + } + return indent; +} + +// Parses a single line of the structure (after knowing its indent level) and returns a TreeNode. +// The function "consumes" indent groups until the branch marker. +TreeStructurer::TreeNode TreeStructurer::parse_structure_line(const std::string& line, + size_t indent_level) { + size_t pos = 0; + size_t groups = 0; + while (pos + 4 <= line.size() && groups < indent_level) { + std::string group = line.substr(pos, 4); + if (group == " " || group == u8"│ ") { + pos += 4; + groups++; + continue; + } + if (group == u8"├── " || group == u8"└── ") { + pos += 4; + groups++; + break; + } + break; + } + std::string name = line.substr(pos); + name = sanitize_path(name); + bool is_file = true; + if (!name.empty() && name.back() == static_cast(DIRECTORY_MARKER)) { + is_file = false; + name.pop_back(); // Remove the directory marker. + } + return TreeNode{name, is_file, {}}; +} + +// Builds a tree (with TreeNode nodes) from the vector of structure lines. +// The first line is assumed to be the root. +TreeStructurer::TreeNode TreeStructurer::build_tree_from_lines(const std::vector& lines) { + if (lines.empty()) { + throw std::runtime_error("Empty structure provided"); + } + // Process the first line as the root. + TreeNode root = parse_structure_line(lines[0], 0); + if (root.is_file) { + throw std::runtime_error("Root must be a directory"); + } + std::vector stack; + stack.push_back(&root); + + // Process each subsequent line. + for (size_t i = 1; i < lines.size(); ++i) { + size_t indent = get_indent_level(lines[i]); + TreeNode node = parse_structure_line(lines[i], indent); + if (indent > stack.size()) { + throw std::runtime_error("Invalid indentation structure in the file"); + } + while (stack.size() > indent) { + stack.pop_back(); + } + if (stack.empty()) { + throw std::runtime_error("Invalid indentation structure in the file"); + } + stack.back()->children.push_back(node); + if (!node.is_file) { + // Push a pointer to the newly added child. + stack.push_back(&stack.back()->children.back()); + } + } + return root; +} + +// Recursively creates directories and files on disk according to the tree. +void TreeStructurer::create_node(const TreeNode& node, const fs::path& current_path) { + fs::path new_path = current_path / node.name; + try { + if (node.is_file) { + // Ensure the parent directory exists. + fs::path parent = new_path.parent_path(); + if (!fs::exists(parent)) { + fs::create_directories(parent); + } + create_file(new_path); + } else { + create_directory(new_path); + for (const auto& child : node.children) { + create_node(child, new_path); + } + } + } catch (const fs::filesystem_error& e) { + throw std::runtime_error("Failed to create path '" + new_path.string() + "': " + e.what()); + } +} + +// Returns true if the given line’s last character is a directory marker. +bool TreeStructurer::is_directory_marker(const std::string& line) { + return (!line.empty() && line.back() == static_cast(DIRECTORY_MARKER)); +} + +// Creates a directory (and any necessary parent directories). +void TreeStructurer::create_directory(const fs::path& path) { + if (!fs::exists(path)) { + fs::create_directories(path); + } +} + +// Creates an empty file. +void TreeStructurer::create_file(const fs::path& path) { + if (!fs::exists(path)) { + std::ofstream ofs(path); + if (!ofs.is_open()) { + throw std::runtime_error("Failed to create file: " + path.string()); + } + ofs.close(); + } +} + +// Reads a structure file into a vector of non-empty lines. std::vector TreeStructurer::read_structure_file(const std::string& filepath) { std::vector lines; std::ifstream file(filepath); - if (!file.is_open()) { throw std::runtime_error("Failed to open file: " + filepath); } - std::string line; while (std::getline(file, line)) { if (!line.empty()) { lines.push_back(line); } } - return lines; } + +// Checks the structure for obvious mistakes (e.g. a jump in indentation). +void TreeStructurer::validate_structure(const std::vector& lines) { + if (lines.empty()) { + throw std::runtime_error("Empty structure provided"); + } + size_t prev_indent = 0; + for (const auto& line : lines) { + if (line.empty()) continue; + size_t indent = get_indent_level(line); + std::cout << "Line: \"" << line << "\" Indent level: " << indent << std::endl; + if (indent > prev_indent + 1) { + throw std::runtime_error("Invalid indentation structure in the file"); + } + prev_indent = indent; + } +} + +// Removes any disallowed characters from a node name (here we allow printable ASCII and '/'). +std::string TreeStructurer::sanitize_path(const std::string& path) { + std::string result; + for (char c : path) { + if ((c >= 32 && c <= 126) || c == '/') { + result.push_back(c); + } + } + return result; +} diff --git a/src/prodir/cpp/tree_structurer.hpp b/src/prodir/cpp/tree_structurer.hpp index c9e29c0..95ec860 100644 --- a/src/prodir/cpp/tree_structurer.hpp +++ b/src/prodir/cpp/tree_structurer.hpp @@ -38,9 +38,10 @@ private: void validate_structure(const std::vector& lines); std::string sanitize_path(const std::string& path); - static const char DIRECTORY_MARKER = '/'; - static const char TREE_PIPE = '│'; - static const char TREE_BRANCH = '├'; - static const char TREE_CORNER = '└'; - static const char TREE_DASH = '─'; -}; \ No newline at end of file + // Update the constants to use wide characters + static const wchar_t DIRECTORY_MARKER = L'/'; + static const wchar_t TREE_PIPE = L'│'; + static const wchar_t TREE_BRANCH = L'├'; + static const wchar_t TREE_CORNER = L'└'; + static const wchar_t TREE_DASH = L'─'; +};