python文件信息排序_python文件排序的方法总结

在python环境中提供两种排序方案：用库函数sorted()对字符串排序，它的对象是字符；用函数sort()对数字排序，它的对象是数字，如果读取文件的话，需要进行处理(把文件后缀名‘屏蔽')。

(1)首先：我测试的文件夹是/img/，里面的文件都是图片，如下图所示：

(2)测试库函数sorted()，直接贴出代码：

import numpy as np

import os

img_path='./img/'

img_list=sorted(os.listdir(img_path))#文件名按字母排序

img_nums=len(img_list)

for i in range(img_nums):

img_name=img_path+img_list[i]

print(img_name)

运行效果如下：

从图片可以清晰的看出，文件名是按字符排序的。

(3)测试函数sort()，代码：

import numpy as np

import os

img_path='./img/'

img_list=os.listdir(img_path)

img_list.sort()

img_list.sort(key = lambda x: int(x[:-4])) ##文件名按数字排序

img_nums=len(img_list)

for i in range(img_nums):

img_name=img_path+img_list[i]

print(img_name)

运行效果如下：

可以看出，文件名是按数字排序的；顺便提下，sort函数中用到了匿名函数(key = lambda x:int(x[:-4]))，其作用是将后缀名'.jpg'“屏蔽”(因为‘.jpg'是4个字符，所以[:-4]的含义是从文件名开始到倒数第四个字符为止)，具体看python的匿名函数和数组取值方式。

实例扩展：

import gzip

import os

from multiprocessing import Process, Queue, Pipe, current_process, freeze_support

from datetime import datetime

def sort_worker(input,output):

while True:

lines = input.get().splitlines()

element_set = {}

for line in lines:

if line.strip() == 'STOP':

return

try:

element = line.split(' ')[0]

if not element_set.get(element): element_set[element] = ''

except:

pass

sorted_element = sorted(element_set)

#print sorted_element

output.put('\n'.join(sorted_element))

def write_worker(input, pre):

os.system('mkdir %s'%pre)

i = 0

while True:

content = input.get()

if content.strip() == 'STOP':

return

write_sorted_bulk(content, '%s/%s'%(pre, i))

i += 1

def write_sorted_bulk(content, filename):

f = file(filename, 'w')

f.write(content)

f.close()

def split_sort_file(filename, num_sort = 3, buf_size = 65536*64*4):

t = datetime.now()

pre, ext = os.path.splitext(filename)

if ext == '.gz':

file_file = gzip.open(filename, 'rb')

else:

file_file = open(filename)

bulk_queue = Queue(10)

sorted_queue = Queue(10)

NUM_SORT = num_sort

sort_worker_pool = []

for i in range(NUM_SORT):

sort_worker_pool.append( Process(target=sort_worker, args=(bulk_queue, sorted_queue)) )

sort_worker_pool[i].start()

NUM_WRITE = 1

write_worker_pool = []

for i in range(NUM_WRITE):

write_worker_pool.append( Process(target=write_worker, args=(sorted_queue, pre)) )

write_worker_pool[i].start()

buf = file_file.read(buf_size)

sorted_count = 0

while len(buf):

end_line = buf.rfind('\n')

#print buf[:end_line+1]

bulk_queue.put(buf[:end_line+1])

sorted_count += 1

if end_line != -1:

buf = buf[end_line+1:] + file_file.read(buf_size)

else:

buf = file_file.read(buf_size)

for i in range(NUM_SORT):

bulk_queue.put('STOP')

for i in range(NUM_SORT):

sort_worker_pool[i].join()

for i in range(NUM_WRITE):

sorted_queue.put('STOP')

for i in range(NUM_WRITE):

write_worker_pool[i].join()

print 'elasped ', datetime.now() - t

return sorted_count

from heapq import heappush, heappop

from datetime import datetime

from multiprocessing import Process, Queue, Pipe, current_process, freeze_support

import os

class file_heap:

def __init__(self, dir, idx = 0, count = 1):

files = os.listdir(dir)

self.heap = []

self.files = {}

self.bulks = {}

self.pre_element = None

for i in range(len(files)):

file = files[i]

if hash(file) % count != idx: continue

input = open(os.path.join(dir, file))

self.files[i] = input

self.bulks[i] = ''

heappush(self.heap, (self.get_next_element_buffered(i), i))

def get_next_element_buffered(self, i):

if len(self.bulks[i]) < 256:

if self.files[i] is not None:

buf = self.files[i].read(65536)

if buf:

self.bulks[i] += buf

else:

self.files[i].close()

self.files[i] = None

end_line = self.bulks[i].find('\n')

if end_line == -1:

end_line = len(self.bulks[i])

element = self.bulks[i][:end_line]

self.bulks[i] = self.bulks[i][end_line+1:]

return element

def poppush_uniq(self):

while True:

element = self.poppush()

if element is None:

return None

if element != self.pre_element:

self.pre_element = element

return element

def poppush(self):

try:

element, index = heappop(self.heap)

except IndexError:

return None

new_element = self.get_next_element_buffered(index)

if new_element:

heappush(self.heap, (new_element, index))

return element

def heappoppush(dir, queue, idx = 0, count = 1):

heap = file_heap(dir, idx, count)

while True:

d = heap.poppush_uniq()

queue.put(d)

if d is None: return

def heappoppush2(dir, queue, count = 1):

heap = []

procs = []

queues = []

pre_element = None

for i in range(count):

q = Queue(1024)

q_buf = queue_buffer(q)

queues.append(q_buf)

p = Process(target=heappoppush, args=(dir, q_buf, i, count))

procs.append(p)

p.start()

queues = tuple(queues)

for i in range(count):

heappush(heap, (queues[i].get(), i))

while True:

try:

d, i= heappop(heap)

except IndexError:

queue.put(None)

for p in procs:

p.join()

return

else:

if d is not None:

heappush(heap,(queues[i].get(), i))

if d != pre_element:

pre_element = d

queue.put(d)

def merge_file(dir):

heap = file_heap( dir )

os.system('rm -f '+dir+'.merge')

fmerge = open(dir+'.merge', 'a')

element = heap.poppush_uniq()

fmerge.write(element+'\n')

while element is not None:

element = heap.poppush_uniq()

fmerge.write(element+'\n')

class queue_buffer:

def __init__(self, queue):

self.q = queue

self.rbuf = []

self.wbuf = []

def get(self):

if len(self.rbuf) == 0:

self.rbuf = self.q.get()

r = self.rbuf[0]

del self.rbuf[0]

return r

def put(self, d):

self.wbuf.append(d)

if d is None or len(self.wbuf) > 1024:

self.q.put(self.wbuf)

self.wbuf = []

def diff_file(file_old, file_new, file_diff, buf = 268435456):

print 'buffer size', buf

from file_split import split_sort_file

os.system('rm -rf '+ os.path.splitext(file_old)[0] )

os.system('rm -rf '+ os.path.splitext(file_new)[0] )

t = datetime.now()

split_sort_file(file_old,5,buf)

split_sort_file(file_new,5,buf)

print 'split elasped ', datetime.now() - t

os.system('cat %s/* | wc -l'%os.path.splitext(file_old)[0])

os.system('cat %s/* | wc -l'%os.path.splitext(file_new)[0])

os.system('rm -f '+file_diff)

t = datetime.now()

zdiff = open(file_diff, 'a')

old_q = Queue(1024)

new_q = Queue(1024)

old_queue = queue_buffer(old_q)

new_queue = queue_buffer(new_q)

h1 = Process(target=heappoppush2, args=(os.path.splitext(file_old)[0], old_queue, 3))

h2 = Process(target=heappoppush2, args=(os.path.splitext(file_new)[0], new_queue, 3))

h1.start(), h2.start()

old = old_queue.get()

new = new_queue.get()

old_count, new_count = 0, 0

while old is not None or new is not None:

if old > new or old is None:

zdiff.write('< '+new+'\n')

new = new_queue.get()

new_count +=1

elif old < new or new is None:

zdiff.write('> '+old+'\n')

old = old_queue.get()

old_count +=1

else:

old = old_queue.get()

new = new_queue.get()

print 'new_count:', new_count

print 'old_count:', old_count

print 'diff elasped ', datetime.now() - t

h1.join(), h2.join()

到此这篇关于python文件排序的方法总结的文章就介绍到这了,更多相关python文件排序都有哪些方法内容请搜索我们以前的文章或继续浏览下面的相关文章希望大家以后多多支持我们！

本文标题: python文件排序的方法总结

本文地址: http://www.cppcns.com/jiaoben/python/344867.html

python文件信息排序_python文件排序的方法总结相关推荐

python爬虫如何从一个页面进入另一个页面-Python爬虫信息输入及页面的切换方法...
实现网页的键盘输入操作 from selenium.webdriver.common.keys import Keys 动态网页有时需要将鼠标悬停在某个元素上,相应的列表选项才能显示出来. 而爬虫在工 ...
python数字排序_python数字排序
广告关闭腾讯云11.11云上盛惠 ,精选热门产品助力上云,云服务器首年88元起,买的越多返的越多,最高返5000元! 例如,你想要按照名字来对一列团队成员名单进行排序,或者想按照优先级来对一列项目进 ...
python数组排序返回索引_python列表排序返回索引
广告关闭腾讯云11.11云上盛惠 ,精选热门产品助力上云,云服务器首年88元起,买的越多返的越多,最高返5000元! 我需要对列表进行排序,然后返回一个列表,其中包含列表中排序项的索引... 我有一 ...
python文件的用法_Python文件读写常见用法总结
1. 读取文件 # !/usr/bin/env python # -*- coding:utf-8 -*- """ 文件读取三步骤: 1.打开文件 f=open(file ...
python文件输入符_python文件IO与file操作
1 标准输入输出IO - (1) 打印到屏幕 print() print(self, *args, sep=' ', end='n', file=None): 把传递的表达式转换成一个字符串表达式 ...
python 文档操作_Python 文件操作
一. Python 读写创建文件 Python中对文件,文件夹(文件操作函数)的操作需要涉及到OS 模块和 shutil 模块 . 一) . OS模块的基本操作方法功能 os.getcwd() ...
python生成word目录_Python 文件与目录操作方法总结
Python 有很多内置的模块和函数可用于文件的操作处理,这些函数都分布在几个模块上:如 os,os.path,shutil 和pathlib 等等.本文收集了许多您需要知道的函数,以便在 Pytho ...
python打开文件的句柄_python文件操作
一.文件操作打开文件时,需要指定文件路径和以何等方式打开文件,打开后,即可获取该文件句柄,日后通过此文件句柄对该文件操作. 打开文件的模式有: r ,只读模式[默认模式,文件必须存在,不存在则抛出异 ...
python文本处理实例_Python 文件处理的简单示例
这篇文章主要为大家详细介绍了Python 文件处理的简单示例,具有一定的参考价值,可以用来参考一下. 对python这个高级语言感兴趣的小伙伴,下面一起跟随512笔记的小编两巴掌来看看吧! 相关的AP ...

python文件信息排序_python文件排序的方法总结

python文件信息排序_python文件排序的方法总结相关推荐

最新文章

热门文章