root/data-source-rss.scm

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
(define-module (data-source-rss)
  #:use-module (srfi srfi-19)           ;date
  #:use-module (sxml simple)
  #:use-module (file-utils)
  #:export (data-source-rss))


(define (data-source-rss year file)
  (if (string-suffix? ".sxml" file)
      (sxml->data-source-rss year (call-with-input-file file read) #f)
      (let ()
        (define sxml (xml->sxml (get-file-contents file)))
        (define title (get-title sxml))
        (define entries (get-entries sxml))
        (sxml->data-source-rss year entries title))))

(define (get-title sxml)
    (define feed (assoc-ref sxml 'http://www.w3.org/2005/Atom:feed))
    (define title (assoc-ref feed 'http://www.w3.org/2005/Atom:title))
    (car title))

(define (get-entries sxml)
  (define feed (assoc-ref sxml 'http://www.w3.org/2005/Atom:feed))
  (define entries (filter
                   (lambda (e)
                     (and (list? e)
                          (eq? (car e) 'http://www.w3.org/2005/Atom:entry)))
                   feed))
  entries)

(define (sxml->data-source-rss year entries title)
  ;; TODO shift year based on calendar
  (let* ((start-date (make-date 0 0 0 0 1 1 year 0))
         (end-date (make-date 0 0 0 0 31 12 (+ year 1) 0)))
    (define data (filter identity
                         (map (lambda (e)
                                ;; try to guess title from gitlab
                                ;; reverse order if we have a title
                                (if title
                                    (or (web-entry->data title e)
                                        (catch-all (lambda () (gitlab-entry->data e))))
                                    (or (catch-all (lambda () (gitlab-entry->data e)))
                                        (web-entry->data title e))))
                              entries)))
    data))

(define (identity x) x)

(define (gitlab-entry->data e)
  (define link (assoc-ref e 'http://www.w3.org/2005/Atom:link))
  (if link
      (let ()
        (define link-attrs (assoc-ref link '@))
        (define link-href (car (assoc-ref link-attrs 'href)))
        (define updated (car (assoc-ref e 'http://www.w3.org/2005/Atom:updated)))  ;2021-07-06T17:37:59Z
        `((project . ,(gitlab-projectname link-href))
          (link . ,(gitlab-project-href link-href))
          (original-link . ,link-href)
          (date . ,(string->date updated "~Y-~m-~dT~H:~M:~S~z"))))
      #f))

;;; THIS CODE SUCKS WTF
(define (gitlab-projectname str)
  "Convert \"https://gitlab.gnome.org/World/podcasts/-/issues/73#note_1198570\" to \"podcasts\"
# case for two groups
https://gitlab.gnome.org/World/Rust/libhandy-rs/-/issues/16 -> libhandy-rs
"

  (define prefix 9)                     ; https://x
  (define slash1 (string-index str #\/ prefix))
  (define slash2 (string-index str #\/ (+ slash1 1)))
  (define slash3 (string-index str #\/ (+ slash2 1)))
  (define slash4 (string-index str #\/ (+ slash3 1)))
  (define name (substring str (+ slash2 1) (or slash3 (string-length str))))
  (define name2 (substring str (+ slash3 1) (or slash4 (string-length str))))
  (if (string= name2 "-")
      name
      name2)
  )

(define (gitlab-project-href str)
  "Convert \"https://gitlab.gnome.org/World/podcasts/-/issues/73#note_1198570\"
to
 \"https://gitlab.gnome.org/World/podcasts\"
"

  (define prefix 9)                     ; https://x
  (define slash1 (string-index str #\/ prefix))
  (define slash2 (string-index str #\/ (+ slash1 1)))
  (define slash3 (string-index str #\/ (+ slash2 1)))
  (define slash4 (string-index str #\/ (+ slash3 1)))
  (define slash5 (string-index str #\/ (+ slash4 1)))
  (define name  (substring str (+ slash2 1) (or slash3 (string-length str))))
  (define name2 (substring str (+ slash3 1) (or slash4 (string-length str))))
  (define name3 (substring str (+ slash4 1) (or slash5 (string-length str))))
  (cond ((string= name  "-") (substring str 0 (or slash2 (string-length str))))
        ((string= name2 "-") (substring str 0 (or slash3 (string-length str))))
        ((string= name3 "-") (substring str 0 (or slash4 (string-length str))))
        (#t str)))


(define (web-entry->data title e)
  (define link (assoc-ref e 'http://www.w3.org/2005/Atom:link))
  (if link
      (let ()
        (define link-attrs (assoc-ref link '@))
        (define link-href (car (assoc-ref link-attrs 'href)))
        (define updated (car (assoc-ref e 'http://www.w3.org/2005/Atom:updated)))  ;2021-07-06T17:37:59Z
        `((project . ,title)
          (link . ,link-href)
          (date . ,(string->date updated "~Y-~m-~dT~H:~M:~S~z"))))
      #f))

(define (catch-all thunk)
  (with-exception-handler
    (lambda (exn)
      ;; (format (current-error-port)
      ;;         "Uncaught exception: ~s\n" exn)
      #f)
    thunk
    #:unwind? #t))