summaryrefslogtreecommitdiffstats
path: root/skripti/rš_glasbena_oprema.py
diff options
context:
space:
mode:
Diffstat (limited to 'skripti/rš_glasbena_oprema.py')
-rw-r--r--skripti/rš_glasbena_oprema.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/skripti/rš_glasbena_oprema.py b/skripti/rš_glasbena_oprema.py
new file mode 100644
index 0000000..8d53702
--- /dev/null
+++ b/skripti/rš_glasbena_oprema.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python3
+import feedparser
+import yt_dlp
+from ollama import Client
+from bs4 import BeautifulSoup
+client = Client(host='http://splet.4a.si:80')
+model = "llama2:13b-chat-fp16"
+prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16,"
+def opreme():
+ r = []
+ for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries:
+ oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []}
+ for author in entry.authors:
+ oprema["authors"].append(author.name)
+ summary = BeautifulSoup(entry.summary, features="html.parser")
+ body = None
+ for i in summary.findAll("div"):
+ if "class" in i.attrs:
+ if "field-name-body" in i.attrs["class"]:
+ body = i
+ break
+ if ""
+ if body == None:
+ raise Exception("body is None in " + entry.link)
+ body = body.text.replace("\r", "")
+ while "\n\n" in body:
+ body = body.replace("\n\n", "\n")
+ r.append(oprema)
+ return r
+if __name__ == "__main__":
+ opreme() \ No newline at end of file