#!/usr/bin/env python # ris2csv.py - given a list of ris file, output csv for the Reader # Eric Lease Morgan # (c) Infomotions, LLC; distributed under a GNU Public License # October 10, 2025 - first investigation; while at Saint-Genevive Library (Paris) # configure FILES = './etc/ariadne.txt' CACHE = './www.ariadne.ac.uk/cache' COLUMNS = [ 'author', 'title', 'date', 'url', 'file' ] METADATA = 'metadata.csv' # require from pandas import DataFrame from pathlib import Path from shutil import copyfile from sys import argv, exit, stderr import string # initialize cache = Path( CACHE ) # given a file name, return bibliographics def ris2tsv( index, file ) : # process each line in the given file; create a set of bibliographics bibliographics = {} with open( file ) as handle : data = handle.read() for line in data.splitlines() : name = line[ 0:2 ] value = line[ 5: ] bibliographics.update( { name:value } ) # parse author = bibliographics[ 'AU' ] title = bibliographics[ 'T1' ] date = bibliographics[ 'PY' ] url = bibliographics[ 'L2' ] # create a source source = file.replace( '/citation.ris', '.html' ) # create destination name = author.split()[ -1 ] firstWord = title.split()[ 0 ].lower() firstWord = firstWord.translate( firstWord.maketrans( '', '', string.punctuation ) ) destination = str( cache/( '-'.join( [ name, firstWord + '_' + str( index ), date ] ) + '.html' ) ).lower() # done return( [ author, title, date, url, source, destination ] ) # open and process each of the given files; create metadata metadata = [] with open( FILES ) as handle : files = handle.read().splitlines() for index, filename in enumerate( files ) : # debug stderr.write( ' item: ' + str( index ) + '\n' ) stderr.write( ' ris: ' + filename + '\n' ) # get the bibliographics [ author, title, date, url, source, destination ] = ris2tsv( index, filename ) # parse some more file = Path( destination ).name # debug some more stderr.write( ' author: ' + author + '\n' ) stderr.write( ' title: ' + title + '\n' ) stderr.write( ' date: ' + date + '\n' ) stderr.write( ' source: ' + source + '\n' ) stderr.write( ' destination: ' + destination + '\n' ) stderr.write( ' file: ' + file + '\n' ) stderr.write( ' url: ' + url + '\n' ) stderr.write( '\n' ) # cache copyfile('./' + source, './'+ destination ) # update metadata.append( [ author, title, str( date ), url, file ] ) # create dataframe, output CSV, and done metadata = DataFrame( metadata, columns=COLUMNS ) with open ( cache/METADATA, 'w' ) as handle : handle.write( metadata.to_csv( index=False ) ) exit()