This is a neat way to generate file names in a directory that match a specific pattern, I use this to generate a list of files exported out of hive to load into S3.
1 2 3 4 5 6 |
def find_files(directory, pattern): for root, dirs, files in os.walk(directory): for basename in sorted(files): if fnmatch.fnmatch(basename, pattern): filename = os.path.join(root, basename) yield filename |
1 2 3 4 5 6 7 8 9 10 |
local_dir = '/mnt/share/etl/date/' for files in find_files(local_dir,'*.gz'): key = files[1:] try: awss3.upload(key,files) log_msg = ('uploading file: [{0}] to S3').format(files) log.write(log_msg) except Exception as e: log_msg = ('ERROR: {0} uploading file: [{0}] to S3').format(e,files) log.write(log_msg, 'error') |