#!/usr/bin/env bash # ner-build.sh - one script to rule them all # all but by hand, create a list of values, and normalize the list # create a list of sentences echo "Step #1 of 7: creating a list of all sentences" ./bin/ner-carrels2sentences.sh # create a set of value/sentence pairs echo "Step #2 of 7: finding sentences containing values" ./bin/ner-find-and-disambiguate-values.sh # convert the TSV stream to CSV echo "Step #3 of 7: converting tsv to csv" ./bin/ner-tsv2csv.sh # convert csv to json echo "Step #4 of 7: converting CSV to JSON" ./bin/ner-csv2json.py > ./etc/annotations.json # split the data into training and testing versions echo "Step #5 of 7: splitting data set into trainging and testing" ./bin/ner-split.py # convert splits into spacy files echo "Step #6 of 7: converting JSON into spaCy objects" ./bin/ner-json2spacy.py # finally, train echo "Step #7 of 7: training; please be patient" ./bin/ner-train.py # done exit