pricedb/fetch_rates.py

113 lines
2.9 KiB
Python
Raw Permalink Normal View History

2020-12-22 21:53:32 +01:00
#!/usr/bin/env python3
# Copyright 2020 Midgard
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Configure me
FUNDS = {
# To get these URLs, you'll need to go to the fund page and find the frame that has the actual
# data and take its URL
"PREFERRED_SYMBOL": "https://www.tijd.be/customers/mediafin.be/funds_tijd/123/Fund/456789?t="
}
DIRECTORY = "/var/local/rate_fetcher"
# Currencies fetched are always all those that are published by the ECB, currently
# AUD, BGN, BRL, CAD, CHF, CNY, CZK, DKK, GBP, HKD, HRK, HUF, IDR, ILS, INR, ISK,
# JPY, KRW, MXN, MYR, NOK, NZD, PHP, PLN, RON, RUB, SEK, SGD, THB, TRY, USD, ZAR
import requests
import os.path
import re
from typing import NamedTuple
class LogItem(NamedTuple):
symbol: str
date: str
value: str
def get_ecb():
r = requests.get("https://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml")
m = re.search(r"<Cube time='([0-9-]+)'>", r.text)
assert m
date = m.group(1)
for item in re.findall(r"<Cube currency='([A-Z]+)' rate='([0-9.]+)'/>", r.text):
symbol, value = item
yield LogItem(
symbol=symbol,
date=date,
value="{:.5f}".format(1.0 / float(value)),
)
def get_fund(symbol, url):
r = requests.get(url)
m = re.search(r"Actuele NIW op ([0-9]{1,2})/([0-9]{1,2})/([0-9]{4}).*?<span>([0-9]+),([0-9]+)</span>", r.text, re.MULTILINE|re.DOTALL)
assert m
groups = m.groups()
return LogItem(
symbol=symbol,
date="{2}-{1:0>2}-{0:0>2}".format(*groups),
value="{3}.{4}".format(*groups),
)
def get_funds():
return [
get_fund(symbol, url)
for symbol, url in FUNDS.items()
]
START_OF_STREAM = 0
CURRENT_POSITION = 1
END_OF_STREAM = 2
def last_line(fh, max_line_length=80):
"""Simple and stupid way to read the last line of a file"""
try:
fh.seek(-max_line_length, END_OF_STREAM)
except OSError:
fh.seek(0, START_OF_STREAM)
lines = fh.readlines()
return lines[-1] if lines else None
def format_logline(entry: LogItem):
return f"P {entry.date} 18:00:00 {entry.symbol}{entry.value}"
def log(entry: LogItem):
filename = os.path.join(DIRECTORY, "{symbol}.log".format(symbol=entry.symbol))
try:
with open(filename, "rb") as fh:
if entry.date.encode() in (last_line(fh) or ""):
return
except FileNotFoundError:
pass
with open(filename, "a") as fh:
print(format_logline(entry), file=fh)
def main():
for entry in [*get_ecb(), *get_funds()]:
log(entry)
if __name__ == '__main__':
main()