diff options
Diffstat (limited to 'skripti/rš_glasbena_oprema.py')
-rw-r--r-- | skripti/rš_glasbena_oprema.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/skripti/rš_glasbena_oprema.py b/skripti/rš_glasbena_oprema.py new file mode 100644 index 0000000..8d53702 --- /dev/null +++ b/skripti/rš_glasbena_oprema.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 +import feedparser +import yt_dlp +from ollama import Client +from bs4 import BeautifulSoup +client = Client(host='http://splet.4a.si:80') +model = "llama2:13b-chat-fp16" +prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16," +def opreme(): + r = [] + for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries: + oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []} + for author in entry.authors: + oprema["authors"].append(author.name) + summary = BeautifulSoup(entry.summary, features="html.parser") + body = None + for i in summary.findAll("div"): + if "class" in i.attrs: + if "field-name-body" in i.attrs["class"]: + body = i + break + if "" + if body == None: + raise Exception("body is None in " + entry.link) + body = body.text.replace("\r", "") + while "\n\n" in body: + body = body.replace("\n\n", "\n") + r.append(oprema) + return r +if __name__ == "__main__": + opreme()
\ No newline at end of file |