1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
#!/usr/bin/python3
import feedparser
import yt_dlp
from ollama import Client
from bs4 import BeautifulSoup
client = Client(host='http://splet.4a.si:80')
model = "llama2:13b-chat-fp16"
prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16,"
def opreme():
r = []
for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries:
oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []}
for author in entry.authors:
oprema["authors"].append(author.name)
summary = BeautifulSoup(entry.summary, features="html.parser")
body = None
for i in summary.findAll("div"):
if "class" in i.attrs:
if "field-name-body" in i.attrs["class"]:
body = i
break
if ""
if body == None:
raise Exception("body is None in " + entry.link)
body = body.text.replace("\r", "")
while "\n\n" in body:
body = body.replace("\n\n", "\n")
r.append(oprema)
return r
if __name__ == "__main__":
opreme()
|