summaryrefslogtreecommitdiffstats
path: root/skripti/rš_glasbena_oprema.py
blob: 8d53702c922850cc67b908eb204190e08d0f790b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/python3
import feedparser
import yt_dlp
from ollama import Client
from bs4 import BeautifulSoup
client = Client(host='http://splet.4a.si:80')
model = "llama2:13b-chat-fp16"
prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16,"
def opreme():
	r = []
	for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries:
		oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []}
		for author in entry.authors:
			oprema["authors"].append(author.name)
		summary = BeautifulSoup(entry.summary, features="html.parser")
		body = None
		for i in summary.findAll("div"):
			if "class" in i.attrs:
				if "field-name-body" in i.attrs["class"]:
					body = i
					break
				if ""
		if body == None:
			raise Exception("body is None in " + entry.link)
		body = body.text.replace("\r", "")
		while "\n\n" in body:
			body = body.replace("\n\n", "\n")
		r.append(oprema)
	return r
if __name__ == "__main__":
	opreme()