AlexNet/fix-big-imgnet.py
Laurent El Shafey 9fdd561586 Initial commit
2024-12-10 08:56:11 -08:00

40 lines
1.2 KiB
Python
Executable file

import os
import sys
from PIL import Image
from StringIO import StringIO
from util import *
src = '/ais/gobi3/u/ilya/jpg_valid_2010_85/'
dst = '/ais/gobi3/u/kriz/lsvrc-2010-jpg/'
BATCH_SIZE = 1024
def save_batch(c_strings, c_labels, c_wnids, out_b):
pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_labels, c_wnids))
return out_b + 1
if __name__ == "__main__":
c_strings = []
c_labels = []
c_wnids = []
out_b = 2000
for b in xrange(49):
failed = 0
strings, sizes, labels = unpickle(os.path.join(src, '%s' % b))
for s,l in zip(strings, labels):
try:
im = Image.open(StringIO(s)).convert('RGB')
c_strings += [s]
c_labels += [l[1]]
c_wnids += [l[0]]
if len(c_strings) == BATCH_SIZE:
out_b = save_batch(c_strings, c_labels, c_wnids, out_b)
c_strings = []
c_labels = []
c_wnids = []
except IOError,e:
failed += 1
print "Batch %d failed: %d" % (b, failed)
if len(c_strings) > 0:
save_batch(c_strings, c_labels, c_wnids, out_b)