About python : Using-oswalk-to-recursively-traverse-directories-in-Python
Question Detail
I want to navigate from the root directory to all other directories within and print the same.
Here’s my code:
#!/usr/bin/python
import os
import fnmatch
for root, dir, files in os.walk(“.”):
print root
print “”
for items in fnmatch.filter(files, “*”):
print “…” + items
print “”
And here’s my O/P:
.
…Python_Notes
…pypy.py
…pypy.py.save
…classdemo.py
….goutputstream-J9ZUXW
…latest.py
…pack.py
…classdemo.pyc
…Python_Notes~
…module-demo.py
…filetype.py
./packagedemo
…classdemo.py
…__init__.pyc
…__init__.py
…classdemo.pyc
Above, . and ./packagedemo are directories.
However, I need to print the O/P in the following manner:
A
—a.txt
—b.txt
—B
——c.out
Above, A and B are directories and the rest are files.
Question Answer
This will give you the desired result
#!/usr/bin/python
import os
# traverse root directory, and list directories as dirs and files as files
for root, dirs, files in os.walk(“.”):
path = root.split(os.sep)
print((len(path) – 1) * ‘—‘, os.path.basename(root))
for file in files:
print(len(path) * ‘—‘, file)
……………………………………………………
try this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
“””FileTreeMaker.py: …”””
__author__ = “legendmohe”
import os
import argparse
import time
class FileTreeMaker(object):
def _recurse(self, parent_path, file_list, prefix, output_buf, level):
if len(file_list) == 0 \
or (self.max_level != -1 and self.max_level <= level):
return
else:
file_list.sort(key=lambda f: os.path.isfile(os.path.join(parent_path, f)))
for idx, sub_path in enumerate(file_list):
if any(exclude_name in sub_path for exclude_name in self.exn):
continue
full_path = os.path.join(parent_path, sub_path)
idc = "┣━"
if idx == len(file_list) - 1:
idc = "┗━"
if os.path.isdir(full_path) and sub_path not in self.exf:
output_buf.append("%s%s[%s]" % (prefix, idc, sub_path))
if len(file_list) > 1 and idx != len(file_list) – 1:
tmp_prefix = prefix + “┃ ”
else:
tmp_prefix = prefix + ” ”
self._recurse(full_path, os.listdir(full_path), tmp_prefix, output_buf, level + 1)
elif os.path.isfile(full_path):
output_buf.append(“%s%s%s” % (prefix, idc, sub_path))
def make(self, args):
self.root = args.root
self.exf = args.exclude_folder
self.exn = args.exclude_name
self.max_level = args.max_level
print(“root:%s” % self.root)
buf = []
path_parts = self.root.rsplit(os.path.sep, 1)
buf.append(“[%s]” % (path_parts[-1],))
self._recurse(self.root, os.listdir(self.root), “”, buf, 0)
output_str = “\n”.join(buf)
if len(args.output) != 0:
with open(args.output, ‘w’) as of:
of.write(output_str)
return output_str
if __name__ == “__main__”:
parser = argparse.ArgumentParser()
parser.add_argument(“-r”, “–root”, help=”root of file tree”, default=”.”)
parser.add_argument(“-o”, “–output”, help=”output file name”, default=””)
parser.add_argument(“-xf”, “–exclude_folder”, nargs=’*’, help=”exclude folder”, default=[])
parser.add_argument(“-xn”, “–exclude_name”, nargs=’*’, help=”exclude name”, default=[])
parser.add_argument(“-m”, “–max_level”, help=”max level”,
type=int, default=-1)
args = parser.parse_args()
print(FileTreeMaker().make(args))
you will get this:
root:.
[.]
┣━[.idea]
┃ ┣━[scopes]
┃ ┃ ┗━scope_settings.xml
┃ ┣━.name
┃ ┣━Demo.iml
┃ ┣━encodings.xml
┃ ┣━misc.xml
┃ ┣━modules.xml
┃ ┣━vcs.xml
┃ ┗━workspace.xml
┣━[test1]
┃ ┗━test1.txt
┣━[test2]
┃ ┣━[test2-2]
┃ ┃ ┗━[test2-3]
┃ ┃ ┣━test2
┃ ┃ ┗━test2-3-1
┃ ┗━test2
┣━folder_tree_maker.py
┗━tree.py
……………………………………………………
Recursive walk through a directory where you get ALL files from all dirs in the current directory and you get ALL dirs from the current directory – because codes above don’t have a simplicity (imho):
for root, dirs, files in os.walk(rootFolderPath):
for filename in files:
doSomethingWithFile(os.path.join(root, filename))
for dirname in dirs:
doSomewthingWithDir(os.path.join(root, dirname))
……………………………………………………
There are more suitable functions for this in os package. But if you have to use os.walk, here is what I come up with
def walkdir(dirname):
for cur, _dirs, files in os.walk(dirname):
pref = ”
head, tail = os.path.split(cur)
while head:
pref += ‘—‘
head, _tail = os.path.split(head)
print(pref+tail)
for f in files:
print(pref+’—‘+f)
output:
>>> walkdir(‘.’)
.
—file3
—file2
—my.py
—file1
—A
——file2
——file1
—B
——file3
——file2
——file4
——file1
—__pycache__
——my.cpython-33.pyc
……………………………………………………
You could also recursively walk through a folder and lists all it’s contents using pathlib.Path()
from pathlib import Path
def check_out_path(target_path, level=0):
“”””
This function recursively prints all contents of a pathlib.Path object
“””
def print_indented(folder, level):
print(‘\t’ * level + folder)
print_indented(target_path.name, level)
for file in target_path.iterdir():
if file.is_dir():
check_out_path(file, level+1)
else:
print_indented(file.name, level+1)
my_path = Path(r’C:\example folder’)
check_out_path(my_path)
Output:
example folder
folder
textfile3.txt
textfile1.txt
textfile2.txt
……………………………………………………
You can use os.walk, and that is probably the easiest solution, but here is another idea to explore:
import sys, os
FILES = False
def main():
if len(sys.argv) > 2 and sys.argv[2].upper() == ‘/F’:
global FILES; FILES = True
try:
tree(sys.argv[1])
except:
print(‘Usage: {}
def tree(path):
path = os.path.abspath(path)
dirs, files = listdir(path)[:2]
print(path)
walk(path, dirs, files)
if not dirs:
print(‘No subfolders exist’)
def walk(root, dirs, files, prefix=”):
if FILES and files:
file_prefix = prefix + (‘|’ if dirs else ‘ ‘) + ‘ ‘
for name in files:
print(file_prefix + name)
print(file_prefix)
dir_prefix, walk_prefix = prefix + ‘+—‘, prefix + ‘| ‘
for pos, neg, name in enumerate2(dirs):
if neg == -1:
dir_prefix, walk_prefix = prefix + ‘\\—‘, prefix + ‘ ‘
print(dir_prefix + name)
path = os.path.join(root, name)
try:
dirs, files = listdir(path)[:2]
except:
pass
else:
walk(path, dirs, files, walk_prefix)
def listdir(path):
dirs, files, links = [], [], []
for name in os.listdir(path):
path_name = os.path.join(path, name)
if os.path.isdir(path_name):
dirs.append(name)
elif os.path.isfile(path_name):
files.append(name)
elif os.path.islink(path_name):
links.append(name)
return dirs, files, links
def enumerate2(sequence):
length = len(sequence)
for count, value in enumerate(sequence):
yield count, count – length, value
if __name__ == ‘__main__’:
main()
You might recognize the following documentation from the TREE command in the Windows terminal:
Graphically displays the folder structure of a drive or path.
TREE [drive:][path] [/F] [/A]
/F Display the names of the files in each folder.
/A Use ASCII instead of extended characters.
……………………………………………………
This does it for folder names:
def printFolderName(init_indent, rootFolder):
fname = rootFolder.split(os.sep)[-1]
root_levels = rootFolder.count(os.sep)
# os.walk treats dirs breadth-first, but files depth-first (go figure)
for root, dirs, files in os.walk(rootFolder):
# print the directories below the root
levels = root.count(os.sep) – root_levels
indent = ‘ ‘*(levels*2)
print init_indent + indent + root.split(os.sep)[-1]
……………………………………………………
#!/usr/bin/python
import os
def tracing(a):
global i>
for item in os.listdir(a):
if os.path.isfile(item):
print i + item
else:
print i + item
i+=i
tracing(item)
i = “—”
tracing(“.”)
……………………………………………………
Given a folder name, walk through its entire hierarchy recursively.
#! /usr/local/bin/python3
# findLargeFiles.py – given a folder name, walk through its entire hierarchy
# – print folders and files within each folder
import os
def recursive_walk(folder):
for folderName, subfolders, filenames in os.walk(folder):
if subfolders:
for subfolder in subfolders:
recursive_walk(subfolder)
print(‘\nFolder: ‘ + folderName + ‘\n’)
for filename in filenames:
print(filename + ‘\n’)
recursive_walk(‘/name/of/folder’)
……………………………………………………
Would be the best way
def traverse_dir_recur(dir):
import os
l = os.listdir(dir)
for d in l:
if os.path.isdir(dir + d):
traverse_dir_recur(dir+ d +”/”)
else:
print(dir + d)
……………………………………………………
Try this:
import os
root_name = next(os.walk(“.”))[0]
dir_names = next(os.walk(“.”))[1]
file_names = next(os.walk(“.”))[2]
Here I’m assuming your path as “.” in which the root_file and other directories are there.
So, Basically we are just iterating throughout the tree by using next() call, as our os.walk is only generative function.
By doing this we can save all the Directory and file names in dir_names and file_names respectively.
……………………………………………………
Do try this; easy one
#!/usr/bin/python
import os
# Creating an empty list that will contain the already traversed paths
donePaths = []
def direct(path):
for paths,dirs,files in os.walk(path):
if paths not in donePaths:
count = paths.count(‘/’)
if files:
for ele1 in files:
print ‘———‘ * (count), ele1
if dirs:
for ele2 in dirs:
print ‘———‘ * (count), ele2
absPath = os.path.join(paths,ele2)
# recursively calling the direct function on each directory
direct(absPath)
# adding the paths to the list that got traversed
donePaths.append(absPath)
path = raw_input(“Enter any path to get the following Dir Tree …\n”)
direct(path)
========OUTPUT below========
/home/test
—————— b.txt
—————— a.txt
—————— a
————————— a1.txt
—————— b
————————— b1.txt
————————— b2.txt
————————— cde
———————————— cde.txt
———————————— cdeDir
——————————————— cdeDir.txt
—————— c
————————— c.txt
————————— c1
———————————— c1.txt
———————————— c2.txt
……………………………………………………
Let’s say you have an arbitrary parent directory with subdirectories as such:
/home/parent_dir
├── 0_N
├── 1_M
├── 2_P
├── 3_R
└── 4_T
And here is what you can do to estimate the approximate percent distribution #files in each subdirectory relative to the total #files in parent:
from os import listdir as osl
from os import walk as osw
from os.path import join as osj
def subdir_summary(parent_dir):
parent_dir_len = sum([len(files) for _, _, files in osw(parent_dir)])
print(f”Total files in parent: {parent_dir_len}”)
for subdir in sorted(osl(parent_dir)):
subdir_files_len = len(osl(osj(parent_dir, subdir)))
print(subdir, subdir_files_len, f”{int(100*(subdir_files_len / parent_dir_len))}%”)
subdir_summary(“/home/parent_dir”)
It will print in terminal as follows:
Total files in parent: 5876
0_N 3254 55%
1_M 509 8%
2_P 1187 20%
3_R 594 10%
4_T 332 5%
……………………………………………………
import os
os.chdir(‘/your/working/path/’)
dir = os.getcwd()
list = sorted(os.listdir(dir))
marks = “”
for s_list in list:
print marks + s_list
marks += “—”
tree_list = sorted(os.listdir(dir + “/” + s_list))
for i in tree_list:
print marks + i