#!/usr/bin/env python

# augment-articles.py - given the documents file created from topic modeling, update articles.csv with topic labels

# Eric Lease Morgan <emorgan@nd.edu>
# (c) University of Notre Dame; distributed under a GNU Public License

# April 3, 2025 - first investigations

# configure
PROVISIONS = './etc/articles.csv'
MAP        = { '0':'land', '1':'traditional law', '2':'rights', '3':'customs', '4':'higher law', '5':'indigenous' }
DOCUMENTS  = './etc/topic-model/documents.txt'
SHORTCIRCUIT = 2048

# require
from pandas import read_csv

# initialize
provisions = read_csv( PROVISIONS )
with open( DOCUMENTS ) as handle : documents = handle.read().splitlines()

# process each document; update provisions with topic labels
provisions[ 'topic']      = None
provisions[ 'proportion'] = None
for index, document in enumerate( documents ) :

	# we don't want the first line/document
	if index == 0 : continue
	
	# parse
	fields     = document.split( ' ' )
	key        = fields[ 0 ]
	file       = fields[ 2 ].split( '/' )[ 6 ]
	proportion = fields[ 3 ]
		
	# map
	label = MAP[ key ]
	
	# update
	row = provisions.index[ provisions[ 'file' ] == file ][ 0 ]
	provisions.at[ row,'topic'] = label
	provisions.at[ row,'proportion'] = proportion

	# continue, conditionally
	if index > SHORTCIRCUIT : break
	
# done
print( provisions.to_csv( index=False ) )
exit()
