shamashii
2017-11-21 22:36:24 +08:00
生成 110s,比较 120s,实验时感觉坑点竟然在于生成随机字符串效率,求改进
```
import timeit
def main():
    import h5py, cyrandom
    allchr =  "".join((chr(i) for i in range(33,127)))
    pspool = [[cyrandom.choice(allchr) for _ in range(cyrandom.randint(10, 100))] for x in range(100000)]
    
    chunkl = []    
    for _ in range(5000000):
        b1 = cyrandom.choice(pspool)
        cyrandom.shuffle(b1)
        chunkl.append(''.join(b1).encode('utf-8'))
        
    f = h5py.File('h5.h5','w')    
    for k in range(50000000//5000000):        
        l = [str(k).encode('utf-8')]
        # cyrandom.shuffle(chunkl)    
        print(k)        
        f.create_dataset(str(k), data=chunkl+l,)
    del chunkl
    f.close()
    
def query():
    import h5py
    f = h5py.File('h5.h5','a')
    wbw = set(f['0'].value)
    count = []
    for k in f.keys():
        print(k)
        for x in f[k].value:
            if x not in wbw:
                count.append(x)
    print(count)
    f.close()
    
print(timeit.timeit(main, number=1))
print(timeit.timeit(query, number=1))  
```