pricedb/fetch_rates.py

#!/usr/bin/env python3

# Copyright 2020 Midgard
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Configure me
FUNDS = {
	# To get these URLs, you'll need to go to the fund page and find the frame that has the actual
	# data and take its URL
	"PREFERRED_SYMBOL": "https://www.tijd.be/customers/mediafin.be/funds_tijd/123/Fund/456789?t="
}
DIRECTORY = "/var/local/rate_fetcher"
# Currencies fetched are always all those that are published by the ECB, currently
# AUD, BGN, BRL, CAD, CHF, CNY, CZK, DKK, GBP, HKD, HRK, HUF, IDR, ILS, INR, ISK,
# JPY, KRW, MXN, MYR, NOK, NZD, PHP, PLN, RON, RUB, SEK, SGD, THB, TRY, USD, ZAR


import requests
import os.path
import re
from typing import NamedTuple

class LogItem(NamedTuple):
	symbol: str
	date: str
	value: str


def get_ecb():
	r = requests.get("https://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml")
	m = re.search(r"<Cube time='([0-9-]+)'>", r.text)
	assert m
	date = m.group(1)

	for item in re.findall(r"<Cube currency='([A-Z]+)' rate='([0-9.]+)'/>", r.text):
		symbol, value = item
		yield LogItem(
			symbol=symbol,
			date=date,
			value="{:.5f}".format(1.0 / float(value)),
		)


def get_fund(symbol, url):
	r = requests.get(url)
	m = re.search(r"Actuele NIW op ([0-9]{1,2})/([0-9]{1,2})/([0-9]{4}).*?<span>([0-9]+),([0-9]+)</span>", r.text, re.MULTILINE|re.DOTALL)
	assert m
	groups = m.groups()

	return LogItem(
		symbol=symbol,
		date="{2}-{1:0>2}-{0:0>2}".format(*groups),
		value="{3}.{4}".format(*groups),
	)


def get_funds():
	return [
		get_fund(symbol, url)
		for symbol, url in FUNDS.items()
	]


START_OF_STREAM = 0
CURRENT_POSITION = 1
END_OF_STREAM = 2

def last_line(fh, max_line_length=80):
	"""Simple and stupid way to read the last line of a file"""
	try:
		fh.seek(-max_line_length, END_OF_STREAM)
	except OSError:
		fh.seek(0, START_OF_STREAM)
	lines = fh.readlines()
	return lines[-1] if lines else None


def format_logline(entry: LogItem):
	return f"P {entry.date} 18:00:00 {entry.symbol}  € {entry.value}"


def log(entry: LogItem):
	filename = os.path.join(DIRECTORY, "{symbol}.log".format(symbol=entry.symbol))
	try:
		with open(filename, "rb") as fh:
			if entry.date.encode() in (last_line(fh) or ""):
				return
	except FileNotFoundError:
		pass
	with open(filename, "a") as fh:
		print(format_logline(entry), file=fh)


def main():
	for entry in [*get_ecb(), *get_funds()]:
		log(entry)


if __name__ == '__main__':
	main()