#!/usr/bin/env python

# ris2csv.py - given a list of ris file, output csv for the Reader

# Eric Lease Morgan <eric_morgan@infomotions.com>
# (c) Infomotions, LLC; distributed under a GNU Public License

# October 10, 2025 - first investigation; while at Saint-Genevive Library (Paris)


# configure
FILES    = './etc/ariadne.txt'
CACHE    = './www.ariadne.ac.uk/cache'
COLUMNS  = [ 'author', 'title', 'date', 'url', 'file' ]
METADATA = 'metadata.csv'

# require
from pandas  import DataFrame
from pathlib import Path
from shutil  import copyfile
from sys     import argv, exit, stderr
import string

# initialize
cache = Path( CACHE )

# given a file name, return bibliographics
def ris2tsv( index, file ) :
		
	# process each line in the given file; create a set of bibliographics
	bibliographics = {}
	with open( file ) as handle : data = handle.read()
	for line in data.splitlines() :
	
		name  = line[ 0:2 ]
		value = line[ 5: ]
		bibliographics.update( { name:value } )
	
	# parse
	author = bibliographics[ 'AU' ]
	title  = bibliographics[ 'T1' ]
	date   = bibliographics[ 'PY' ]
	url    = bibliographics[ 'L2' ]
	
	# create a source
	source = file.replace( '/citation.ris', '.html' )
	
	# create destination
	name        = author.split()[ -1 ]
	firstWord   = title.split()[ 0 ].lower()
	firstWord   = firstWord.translate( firstWord.maketrans( '', '', string.punctuation ) )
	destination = str( cache/( '-'.join( [ name, firstWord + '_' + str( index ), date ] ) + '.html' ) ).lower()
	
	# done
	return( [ author, title, date, url, source, destination ] )
	
# open and process each of the given files; create metadata
metadata = []
with open( FILES ) as handle : files = handle.read().splitlines()
for index, filename in enumerate( files ) :

	# debug
	stderr.write( '         item: ' + str( index ) + '\n' )
	stderr.write( '          ris: ' + filename + '\n' )
	
	# get the bibliographics
	[ author, title, date, url, source, destination ] = ris2tsv( index, filename )
	
	# parse some more
	file = Path( destination ).name
	
	# debug some more
	stderr.write( '       author: ' + author + '\n' )
	stderr.write( '        title: ' + title + '\n' )
	stderr.write( '         date: ' + date + '\n' )
	stderr.write( '       source: ' + source + '\n' )
	stderr.write( '  destination: ' + destination + '\n' )
	stderr.write( '         file: ' + file + '\n' )
	stderr.write( '          url: ' + url + '\n' )
	stderr.write( '\n' )
	
	# cache
	copyfile('./' + source, './'+ destination )
	
	# update
	metadata.append( [ author, title, str( date ), url, file ] )
			
# create dataframe, output CSV, and done
metadata = DataFrame( metadata, columns=COLUMNS )
with open ( cache/METADATA, 'w' ) as handle : handle.write( metadata.to_csv( index=False ) )
exit()