Skip to content

Commit edf8ef1

Browse files
author
Laurens Rietveld
committed
refactored untablinker
1 parent 81135ab commit edf8ef1

File tree

4 files changed

+52
-30
lines changed

4 files changed

+52
-30
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
*marked.ttl
21
.project
32
.pydevproject
3+
output/*

config.ini

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ format = n3 ;N3 is supported by serializer -and- parser. Turtle is only supporte
44

55
[paths]
66
;File mask for annotated XLS files that will be converted to RDF (Turtle)
7+
;Paths are relative to the file being executed (in the src dir)
78
srcMask = ../input/*_marked.xls
89
;Target folder for RDF (Turtle) files
910
targetFolder = ../output/

input/simple_marked.xls

8 KB
Binary file not shown.

src/untablinker.py

+50-29
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import logging
1717
import os
1818
import sys
19+
import shutil
1920
try:
2021
import rdfextras#@UnusedImport
2122
except ImportError, e:
@@ -36,11 +37,11 @@ class UnTabLinker(object):
3637
'owl':Namespace('http://www.w3.org/2002/07/owl#')
3738
}
3839

39-
def __init__(self, filename, config, level = logging.DEBUG):
40+
def __init__(self, directory, config, level = logging.DEBUG):
4041
"""TabLinker constructor
4142
4243
Keyword arguments:
43-
filename -- String containing the name of the current Excel file being examined
44+
directory -- String containing the name turtle file
4445
config -- Configuration object, loaded from .ini file
4546
level -- A logging level as defined in the logging module
4647
"""
@@ -52,19 +53,48 @@ def __init__(self, filename, config, level = logging.DEBUG):
5253
self.log.debug('Loading and parsing file')
5354
self.graph.parse(filename, format=config.get('general', 'format'))
5455

55-
5656
plugin.register('sparql', rdflib.query.Processor,'rdfextras.sparql.processor', 'Processor')
5757
plugin.register('sparql', rdflib.query.Result,'rdfextras.sparql.query', 'SPARQLQueryResult')
5858

59-
self.wbk = xlwt.Workbook()
60-
#currently, we assume only 1 sheet per file
61-
self.sheet = self.wbk.add_sheet('sheet 1')
59+
6260

6361

64-
def convertToXls(self):
62+
def saveFiles(self, directory):
6563
"""
6664
Convert data in rdf to excel
65+
66+
Keyword arguments:
67+
directory -- Directory to save files in
68+
"""
69+
70+
#Get file names from dataset
71+
72+
73+
74+
self.saveFile(directory, "filename")
75+
76+
77+
78+
def saveFile(self, directory, filename):
79+
"""
80+
Retrieve information from graph, and save file
81+
"""
82+
self.wbk = xlwt.Workbook()
83+
84+
85+
#Retrieve sheets for this file
86+
87+
#Save sheet in file object
88+
self.addSheetToXls("sheetname")
89+
90+
self.wbk.save(directory + basename + '.xls')
91+
92+
def addSheetToXls(self, sheetName):
93+
"""
94+
Get values for this sheet, and store in excel object
6795
"""
96+
self.sheet = self.wbk.add_sheet(sheetName)
97+
6898
#Get the row IDs from the RDF set
6999
queryResult = self.graph.query(
70100
"""SELECT DISTINCT ?cell ?value
@@ -78,32 +108,15 @@ def convertToXls(self):
78108
initNs=self.namespaces
79109
)
80110
if (len(queryResult) == 0):
81-
self.log.error("No rows found in rdf set. Exiting...")
82-
quit()
111+
self.log.error("No rows found for sheet {0}".format(sheetName))
112+
83113
#Loop through cells and add values to excel
84114
for resultRow in queryResult.result:
85115
cell, value = resultRow
86116
col, row = self.cellname2index(cell)
87117
self.sheet.write(row, col, value)
88-
89-
90-
def addRowToXLS(self, rowID):
91-
queryResult = self.graph.query(
92-
"""SELECT DISTINCT ?col ?value
93-
WHERE {
94-
?node <http://www.data2semantics.org/core/row> """ + str(rowID) + """ .
95-
?node
96-
?node <http://www.data2semantics.org/core/col> ?col .
97-
} LIMIT 10""",
98-
#Can't use prefix d2s. This produces parsing error (event though namespace is defined).
99-
#A bug in the query parser I guess
100-
#Also, dont use [] in this query processor...
101-
initNs=self.namespaces
102-
)
103-
for resultRow in queryResult.result:
104-
col, value = resultRow
105-
self.sheet.write(rowID - 1, self.excel2num(col), value)
106118

119+
107120
def cellname2index(self, cellname):
108121
matches = re.search('([A-Z]*)([0-9]*)',cellname)
109122
if (len(matches.groups()) != 2 ):
@@ -157,10 +170,18 @@ def checkArg() :
157170
logging.info("Found {0} files to convert.".format(len(files)))
158171

159172
unLinker = UnTabLinker(filename, config, logLevel)
160-
unLinker.convertToXls()
173+
161174
basename = os.path.basename(filename)
162175
basename = re.search('(.*)\.ttl',basename).group(1)
163-
unLinker.wbk.save(config.get('paths', 'targetFolder') + basename + '.xls')
176+
directory = config.get('paths', 'targetFolder') + basename + "/"
177+
178+
if (os.path.isdir(directory)) :
179+
logging.debug('Output dir {0} already exists. Deleting'.format(directory))
180+
shutil.rmtree(directory)
181+
logging.debug('Creating dir {0}'.format(directory))
182+
os.makedirs(directory)
183+
unLinker.saveFiles(directory)
184+
164185

165186
logging.info("Done")
166187

0 commit comments

Comments
 (0)