-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLedger_parser.py
146 lines (118 loc) · 4.18 KB
/
Ledger_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Defines the Ledger grammar to be used when parsing a file. A tree
visitor is also defined to extract relevant information. Last, a
black-box simple function is provided.
See README.md for details.
@author: Etienne Monier <etienne.monier@enseeiht.fr>
@license: CC-BY-NC-SA
@since: 2021-01-27
"""
# flake8: noqa: E501
import parsimonious.grammar
import parsimonious.nodes
# The grammar beggins with the basic parts of the language
grammar = r"""
expr = (comment / command / transaction / emptyline)*
"""
# Comments
grammar += r"""
comment = ~r"^"m comment_char ~r".*$"m
comment_char = ~r"(;|#|\%|\||\*)"
"""
# Commands begin with a keyword followed by a value
grammar += r"""
command = (account_def / payee_def / tag_def / currency_def / include_def)
account_def = ~r"^account"m ws+ account
payee_def = ~r"^payee"m ws+ payee
tag_def = ~r"^tag"m ws+ tag
currency_def = ~r"^commodity"m ws+ commodity
include_def = ~r"^include"m ws+ filename
"""
# Transaction
#
# Three kinds of transaction:
# - user (normal),
# - automatic (beggins with a "="),
# - periodic (beggins with a "~")
grammar += r"""
transaction = user_transaction / autom_transaction / period_transaction
user_transaction = tran_header ("\n" (posting / (indent tran_note)))+
autom_transaction = ~r"^= /"m ap_tran_regex "/" stab* ("\n" posting)+
period_transaction = ~r"^~ /"m ap_tran_regex "/" stab* ("\n" posting)+
tran_header = tran_date aux_date? stab+ state? payee (hard_sep tran_note)? stab*
tran_note = comment_char (tag_text / note_text)
note_text = ~r"[^\n:]*"
tag_text = stab* (metadata_tag / metadata_value) stab*
metadata_tag = (":" tag)+ ":"
metadata_value = tag ":" stab+ ~r"[^\n]+"
posting = indent account (hard_sep amount)? (hard_sep tran_note)? stab*
ap_tran_regex = ~"[^\/]+"
state = ~r"([*!][ \t]+)?"
"""
# Amounts, curency and numbers
#
# Ledger admits three kinds of currency:
# - €10
# - 10 EUR
# - 10 "a currency with spaces"
grammar += r"""
amount = (currency number) / (number " " currency_name) / (number " " currency_name_long) / number
currency = ~r"[$£¥€¢]"
currency_name = ~r"[A-Za-z]+"
currency_name_long = ~r"\"[^\"]+\""
number = ~r"-?\d+(,\d{3})*(\.\d{1,2})?"
"""
# Dates
#
# Available dates for the moment:
# DD/MM/YYYY, DD-MM-YYYY, YYYY/MM/DD, YYYY-MM-DD
grammar += r"""
tran_date = ~"^"m date
aux_date = "=" date
date = en_date / (fr_date)
en_date = year date_sep month_day date_sep month_day
fr_date = month_day date_sep month_day date_sep year
month_day = ~r"\d{2}"
year = ~r"\d{4}"
date_sep = ~r"[/-]"
"""
# Command values (account, payee and co.) [^;#\%\|\*\n]+(?=( {2}|\t|$))
grammar += r"""
account = ~r"([^;#\%\|\*\n\t ]|(?<! ) )+[^;#\%\|\*\n\t ]"m
payee = account
tag = ~r"[A-Za-z0-9]+"
commodity = currency_name_long / currency_name / currency
filename = ~r"[\w\d _\\/\.\-\(\):]+"
"""
# Separators, white space and tabs
#
# Ledger defines two types of indentation:
# - indent at the begining of the line
# - hard separator between account and amount, for e.g.
grammar += r"""
ws = ~"\s*"
stab = ~"[ \t]"
indent = ~"^ "m stab*
hard_sep = ~r"[ {2}\t]" stab*
emptyline = ws+
"""
def Ledger_parser(filename):
"""Reads a Ledger file located at FILENAME to extract relevant
informations.
Arguments
---------
filename: str
The location of the file to be read.
Returns
-------
dict
A dictionnary containing the extracted relevant information.
"""
# Open file to be parsed.
with open(filename) as file:
content = file.read()
# Construct grammar object
Grammar = parsimonious.grammar.Grammar(grammar)
# Parse content.
tree = Grammar.parse(content)
return tree