#!/usr/bin/env python

# types2graph.py - given a CSV file, output a GML file

# Eric Lease Morgan <emorgan@nd.edu>
# (c) University of Notre Dame; distributed under a GNU Public License

# December 17, 2024 - first investigations


# configure
CATEGORY = 'international'
CSV      = './etc/articles.csv'

# require
from networkx   import DiGraph, write_gml
from pandas     import read_csv
from re         import sub
from sys        import stdout

# initialize
paragraphs = read_csv( CSV )
graph      = DiGraph()

# process each paragraph in the given CSV file
for index, paragraph in paragraphs.iterrows() :

	# parse
	country  = paragraph[ 'author' ]
	category = paragraph[ 'category' ]
	type     = paragraph[ 'type' ]
	
	# filter
	if category != CATEGORY : continue
	
	# normalize
	country = sub( '\d', '', country)
	country = country.replace( '_', ' ' )
	country = sub( ' $', '', country )

	# update
	graph.add_node( country, type='country' )
	graph.add_node( type, type='type' )
	graph.add_edge( country, type, weight=1 )

# output and done
write_gml( graph, stdout.buffer )
exit()
