#!/usr/bin/env python # categories2edgess.py - given a CSV file, output an edges file for network analysis # Eric Lease Morgan # (c) University of Notre Dame; distributed under a GNU Public License # December 17, 2024 - first investigations # April 3, 2025 - modified for constitutions # configure CSV = './etc/articles.csv' COLUMNS = [ 'source', 'target', 'weight' ] # require from pandas import read_csv, DataFrame from re import sub from sys import stdout, argv, exit # get input if len( argv ) != 2 : exit( "Usage: " + argv[ 0 ] + " " ) filter = argv[ 1 ] # initialize paragraphs = read_csv( CSV ) # process each paragraph in the given CSV file; create a list of edges edges = [] for index, paragraph in paragraphs.iterrows() : # parse country = paragraph[ 'author' ] category = paragraph[ 'category' ] type = paragraph[ 'type' ] # filter if category != filter : continue # normalize country = sub( '\d', '', country) country = country.replace( '_', ' ' ) country = sub( ' $', '', country ) # update edges.append( [ country, type, 1 ] ) # output and done edges = DataFrame( edges, columns=COLUMNS ) print( edges.to_csv( sep='\t', index=False )) exit()