#!/usr/bin/env python # paragraphs2graph.py - given a CSV file, output a GML file # Eric Lease Morgan # (c) University of Notre Dame; distributed under a GNU Public License # December 17, 2024 - first investigations # configure CSV = './etc/paragraphs-refined.csv' # require from pandas import read_csv from networkx import DiGraph, write_gml from sys import stdout # initialize paragraphs = read_csv( CSV ) graph = DiGraph() # process each paragraph in the given CSV file for index, paragraph in paragraphs.iterrows() : # parse item = paragraph[ 'title' ] country = item.split( '-' )[ 0 ] category = paragraph[ 'category' ] type = paragraph[ 'type' ] # update graph.add_node( item ) graph.add_node( country, type='country' ) graph.add_node( category, type='category' ) graph.add_node( type, type='type' ) graph.add_edge( item, country ) graph.add_edge( item, category ) graph.add_edge( item, type ) # output and done write_gml( graph, stdout.buffer ) exit()