diff --git a/.gitignore b/.gitignore index 7526d0eee..0cb954dae 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,6 @@ ckpt/* #pytest cache .cache/ + +#dataset dir +data diff --git a/darkflow/cython_utils/.gitignore b/darkflow/cython_utils/.gitignore new file mode 100644 index 000000000..ddaacd81f --- /dev/null +++ b/darkflow/cython_utils/.gitignore @@ -0,0 +1,2 @@ +*.so + diff --git a/darkflow/defaults.py b/darkflow/defaults.py index a54b2ec27..0cd4a79ea 100644 --- a/darkflow/defaults.py +++ b/darkflow/defaults.py @@ -35,6 +35,7 @@ def setDefaults(self): self.define('saveVideo', False, 'Records video from input video or camera') self.define('pbLoad', '', 'path to .pb protobuf file (metaLoad must also be specified)') self.define('metaLoad', '', 'path to .meta file generated during --savepb that corresponds to .pb file') + self.define('annotationformat','xml','format of annotation. xml or json available. default is xml(pascal_voc style)') def define(self, argName, default, description): self[argName] = default diff --git a/darkflow/net/yolo/data.py b/darkflow/net/yolo/data.py index f604bc1bf..1b05e2f27 100644 --- a/darkflow/net/yolo/data.py +++ b/darkflow/net/yolo/data.py @@ -1,4 +1,5 @@ from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml +from ...utils.annotation_json_parser import annotation_json_parser from numpy.random import permutation as perm from .predict import preprocess # from .misc import show @@ -15,7 +16,14 @@ def parse(self, exclusive = False): msg = 'Annotation directory not found {} .' exit('Error: {}'.format(msg.format(ann))) print('\n{} parsing {}'.format(meta['model'], ann)) - dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive) + + dumps = None + + if self.FLAGS.annotationformat == 'xml': + dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive) + elif self.FLAGS.annotationformat == 'json': + dumps = annotation_json_parser(ann, meta['labels'], exclusive) + return dumps diff --git a/darkflow/utils/annotation_json_parser.py b/darkflow/utils/annotation_json_parser.py new file mode 100644 index 000000000..d64a9d1eb --- /dev/null +++ b/darkflow/utils/annotation_json_parser.py @@ -0,0 +1,117 @@ +""" +json format parser +author: chadrick.kwag@gmail.com + +most part of the code is just copied from pascal_voc_clean_xml.py + +the format of the json file should be like the following example: + +{"imgfile": "0313.png", "w": 640, "h": 480, "objects": [{"rect": {"y1": 4, "y2": 144, "x1": 385, "x2": 587}, "name": "face"}]} + +the json file should be in a single line. +it is convenient to use the python's json module when creating these files. + +also, this parser checks the size comparison of x1/x2 and y1/y2. +when this size rule is broken and mapped to xn,yn,xx,yx, then it will cause and error during training. + +""" + +import json +import os +import sys +import glob + + + +def _pp(l): # pretty printing + for i in l: print('{}: {}'.format(i,l[i])) + +def annotation_json_parser(ANN, pick, exclusive = False): + # ANN = FLAGS.annotation -> annotation dir + # pick = meta['labels'] + + dumps= list() + cur_dir = os.getcwd() + os.chdir(ANN) + annotations = os.listdir('.') + annotations = glob.glob(str(annotations)+'*.json') + size = len(annotations) + + for i, file in enumerate(annotations): + # progress bar + sys.stdout.write('\r') + percentage = 1. * (i+1) / size + progress = int(percentage * 20) + bar_arg = [progress*'=', ' '*(19-progress), percentage*100] + bar_arg += [file] + sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg)) + sys.stdout.flush() + + # actual parsing + print("opening file {}".format(file)) + in_file = open(file) + + # the file contains zero padding and the actual json is in the first line + firstline = in_file.readline() + firstline = firstline.replace('\0','') + + root = json.loads(firstline) + + + imgfile = str(root['imgfile']) + + w = root['w'] + h = root['h'] + all = list() + objects = root['objects'] + for obj in objects: + current = list() + name = str(obj['name']) + if name not in pick: + print("{} not in pick".format(name)) + continue + + rect = obj['rect'] + + # xn = x1, xx = x2, yn = y1, yx = y2 + + xn = rect['x1'] + xx = rect['x2'] + yn = rect['y1'] + yx = rect['y2'] + + # safety check for min/max + + if xn>xx: + xx = rect['x1'] + xn = rect['x2'] + + if yn > yx : + yn = rect['y2'] + yx = rect['y1'] + + + current = [name,xn,yn,xx,yx] + all += [current] + + add = [[imgfile, [w, h, all]]] + dumps += add + in_file.close() + + # gather all stats + stat = dict() + for dump in dumps: + all = dump[1][2] + for current in all: + if current[0] in pick: + if current[0] in stat: + stat[current[0]]+=1 + else: + stat[current[0]] =1 + + print('\nStatistics:') + _pp(stat) + print('Dataset size: {}'.format(len(dumps))) + + os.chdir(cur_dir) + return dumps \ No newline at end of file diff --git a/docs/annotation_json_format.md b/docs/annotation_json_format.md new file mode 100644 index 000000000..8316a14e7 --- /dev/null +++ b/docs/annotation_json_format.md @@ -0,0 +1,33 @@ +# using json as an annotation format + +previously, darkflow only supported pascal-voc xml format. JSON seemed to be another good option as an annotation format and a parser for json format has been added. + +## how to use json parser during training + +`--annotationformat` option has been added to the `flow` command. If it is not specified or the user gives `xml` as a value, then it will use the pascal-voc xml parser. + +On the other hand, if the user gives `json` as a value, then it will utilize the json parser. + +## json format + +The json parser will parse the files inside the specified annotation dir according to the following format. + +- `imgfile`: the name of the image file which should be inside the specified `--images` dir +- `w`: the width of the image +- `h`: the height of the image +- `objects`: json array of the object info. Each object should have the following key-values: + - `rect`: json object which contains the 4 coordinates that specify the bounding box + - `y1`: one value of box's height + - `y2`: the other value of the box's height + - `x1`: one value of the box's width + - `x2`: the other value of the box's width + - `name`: the label for this box + +Here is an example: +``` +{"imgfile": "0313.png", "w": 640, "h": 480, "objects": [{"rect": {"y1": 4, "y2": 144, "x1": 385, "x2": 587}, "name": "face"}]} +``` + +# Notice + +the json parser will check the x1,x2 / y1,y2 comparison and it will correct it. \ No newline at end of file diff --git a/flow b/flow index e0239897d..6b053d227 100755 --- a/flow +++ b/flow @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 import sys from darkflow.cli import cliHandler