pricedb/fetch_rates.py

#!/usr/bin/env python3

# Copyright 2020 Midgard
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Configure me
FUNDS = {
	# To get these URLs, you'll need to go to the fund page and find the frame that has the actual
	# data and take its URL
	"PREFERRED_SYMBOL": "https://www.tijd.be/customers/mediafin.be/funds_tijd/123/Fund/456789?t="
}
DIRECTORY = "/var/local/rate_fetcher"
# Currencies fetched are always all those that are published by the ECB, currently
# AUD, BGN, BRL, CAD, CHF, CNY, CZK, DKK, GBP, HKD, HRK, HUF, IDR, ILS, INR, ISK,
# JPY, KRW, MXN, MYR, NOK, NZD, PHP, PLN, RON, RUB, SEK, SGD, THB, TRY, USD, ZAR


import requests
import os.path
import re
from typing import NamedTuple

class LogItem(NamedTuple):
	symbol: str
	date: str
	value: str


def get_ecb():
	r = requests.get("https://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml")
	m = re.search(r"<Cube time='([0-9-]+)'>", r.text)
	assert m
	date = m.group(1)

	for item in re.findall(r"<Cube currency='([A-Z]+)' rate='([0-9.]+)'/>", r.text):
		symbol, value = item
		yield LogItem(
			symbol=symbol,
			date=date,
			value="{:.5f}".format(1.0 / float(value)),
		)


def get_fund(symbol, url):
	r = requests.get(url)
	m = re.search(r"Actuele NIW op ([0-9]{1,2})/([0-9]{1,2})/([0-9]{4}).*?<span>([0-9]+),([0-9]+)</span>", r.text, re.MULTILINE|re.DOTALL)
	assert m
	groups = m.groups()

	return LogItem(
		symbol=symbol,
		date="{2}-{1:0>2}-{0:0>2}".format(*groups),
		value="{3}.{4}".format(*groups),
	)


def get_funds():
	return [
		get_fund(symbol, url)
		for symbol, url in FUNDS.items()
	]


START_OF_STREAM = 0
CURRENT_POSITION = 1
END_OF_STREAM = 2

def last_line(fh, max_line_length=80):
	"""Simple and stupid way to read the last line of a file"""
	try:
		fh.seek(-max_line_length, END_OF_STREAM)
	except OSError:
		fh.seek(0, START_OF_STREAM)
	lines = fh.readlines()
	return lines[-1] if lines else None


def format_logline(entry: LogItem):
	return f"P {entry.date} 18:00:00 {entry.symbol}  € {entry.value}"


def log(entry: LogItem):
	filename = os.path.join(DIRECTORY, "{symbol}.log".format(symbol=entry.symbol))
	try:
		with open(filename, "rb") as fh:
			if entry.date.encode() in (last_line(fh) or ""):
				return
	except FileNotFoundError:
		pass
	with open(filename, "a") as fh:
		print(format_logline(entry), file=fh)


def main():
	for entry in [*get_ecb(), *get_funds()]:
		log(entry)


if __name__ == '__main__':
	main()
Initial commit 2020-12-22 21:53:32 +01:00			`#!/usr/bin/env python3`

			`# Copyright 2020 Midgard`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`


			`# Configure me`
			`FUNDS = {`
			`# To get these URLs, you'll need to go to the fund page and find the frame that has the actual`
			`# data and take its URL`
			`"PREFERRED_SYMBOL": "https://www.tijd.be/customers/mediafin.be/funds_tijd/123/Fund/456789?t="`
			`}`
			`DIRECTORY = "/var/local/rate_fetcher"`
			`# Currencies fetched are always all those that are published by the ECB, currently`
			`# AUD, BGN, BRL, CAD, CHF, CNY, CZK, DKK, GBP, HKD, HRK, HUF, IDR, ILS, INR, ISK,`
			`# JPY, KRW, MXN, MYR, NOK, NZD, PHP, PLN, RON, RUB, SEK, SGD, THB, TRY, USD, ZAR`


			`import requests`
			`import os.path`
			`import re`
			`from typing import NamedTuple`

			`class LogItem(NamedTuple):`
			`symbol: str`
			`date: str`
			`value: str`


			`def get_ecb():`
			`r = requests.get("https://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml")`
			`m = re.search(r"<Cube time='([0-9-]+)'>", r.text)`
			`assert m`
			`date = m.group(1)`

			`for item in re.findall(r"<Cube currency='([A-Z]+)' rate='([0-9.]+)'/>", r.text):`
			`symbol, value = item`
			`yield LogItem(`
			`symbol=symbol,`
			`date=date,`
			`value="{:.5f}".format(1.0 / float(value)),`
			`)`


			`def get_fund(symbol, url):`
			`r = requests.get(url)`
			`m = re.search(r"Actuele NIW op ([0-9]{1,2})/([0-9]{1,2})/([0-9]{4}).*?<span>([0-9]+),([0-9]+)</span>", r.text, re.MULTILINE\|re.DOTALL)`
			`assert m`
			`groups = m.groups()`

			`return LogItem(`
			`symbol=symbol,`
			`date="{2}-{1:0>2}-{0:0>2}".format(*groups),`
			`value="{3}.{4}".format(*groups),`
			`)`


			`def get_funds():`
			`return [`
			`get_fund(symbol, url)`
			`for symbol, url in FUNDS.items()`
			`]`


			`START_OF_STREAM = 0`
			`CURRENT_POSITION = 1`
			`END_OF_STREAM = 2`

			`def last_line(fh, max_line_length=80):`
			`"""Simple and stupid way to read the last line of a file"""`
			`try:`
			`fh.seek(-max_line_length, END_OF_STREAM)`
			`except OSError:`
			`fh.seek(0, START_OF_STREAM)`
			`lines = fh.readlines()`
			`return lines[-1] if lines else None`


			`def format_logline(entry: LogItem):`
			`return f"P {entry.date} 18:00:00 {entry.symbol} € {entry.value}"`


			`def log(entry: LogItem):`
			`filename = os.path.join(DIRECTORY, "{symbol}.log".format(symbol=entry.symbol))`
			`try:`
			`with open(filename, "rb") as fh:`
			`if entry.date.encode() in (last_line(fh) or ""):`
			`return`
			`except FileNotFoundError:`
			`pass`
			`with open(filename, "a") as fh:`
			`print(format_logline(entry), file=fh)`


			`def main():`
			`for entry in [get_ecb(), get_funds()]:`
			`log(entry)`


			`if __name__ == '__main__':`
			`main()`