#!/usr/bin/env python

# paragraphs2graph.py - given a CSV file, output a GML file

# Eric Lease Morgan <emorgan@nd.edu>
# (c) University of Notre Dame; distributed under a GNU Public License

# December 17, 2024 - first investigations


# configure
CSV = './etc/paragraphs-refined.csv'

# require
from pandas     import read_csv
from networkx   import DiGraph, write_gml
from sys        import stdout

# initialize
paragraphs = read_csv( CSV )
graph      = DiGraph()

# process each paragraph in the given CSV file
for index, paragraph in paragraphs.iterrows() :

	# parse
	item     = paragraph[ 'title' ]
	country  = item.split( '-' )[ 0 ]
	category = paragraph[ 'category' ]
	type     = paragraph[ 'type' ]
		
	# update
	graph.add_node( item )
	graph.add_node( country, type='country' )
	graph.add_node( category, type='category' )
	graph.add_node( type, type='type' )
	graph.add_edge( item, country )
	graph.add_edge( item, category )
	graph.add_edge( item, type )

# output and done
write_gml( graph, stdout.buffer )
exit()
