root/download-gitlab-rss.scm

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
(use-modules
 (srfi srfi-1)           ;dedup lists
 (srfi srfi-19)           ;date
 (sxml simple))

(define (update-gitlab-rss url alias)
  "Download and cache to disk a paginated rss feed until a cached entry is detected."
  (define filename (string-append alias ".new.rss.sxml.tmp"))
  (define acc-filename (string-append alias ".rss.sxml"))
  (define acc-filename-tmp (string-append alias ".rss.sxml.tmp"))
  (define old-entries
    (if (file-exists? acc-filename)
        (let ()
          (define contents (call-with-input-file acc-filename read))
          (get-entries-old contents))
        '()))
  (define new-entries (get-more-entries url filename old-entries 0))
  (define merged-entries (delete-duplicates
                          (append old-entries new-entries)
                          (lambda (a b)
                            (eq? (car (assoc-ref a 'http://www.w3.org/2005/Atom:id))
                                 (car (assoc-ref b 'http://www.w3.org/2005/Atom:id))))))
  (call-with-output-file acc-filename-tmp
    (lambda (port) (write merged-entries port)))
  (rename-file acc-filename-tmp acc-filename)
  (delete-file filename))


(define (get-more-entries url filename old-entries offset)
  (system* "wget" (string-append url "?offset=" (number->string offset)) "-O" filename)
  (let ()
    (define new-entries
      (let ()
        (define contents (call-with-input-file filename xml->sxml))
        (get-entries contents)))
    (define found-old-entry?
      (find (lambda (e)
              (define id (car (assoc-ref e 'http://www.w3.org/2005/Atom:id)))
              (find (lambda (o)
                      (equal? id
                              (car (assoc-ref o 'http://www.w3.org/2005/Atom:id))))
                    old-entries))
            new-entries))
    (define result (append new-entries old-entries))
    (if found-old-entry?
        result
        (if (null? new-entries)
            result
            (begin
              (sleep 2)
              (get-more-entries url filename result (+ offset (length new-entries))))))))

(define (get-entries sxml)
  (define feed (assoc-ref sxml 'http://www.w3.org/2005/Atom:feed))
  (define entries (filter
                    (lambda (e)
                      (and (list? e)
                           (eq? (car e) 'http://www.w3.org/2005/Atom:entry)))
                    feed))
  entries)

(define (get-entries-old feed)
  (define entries (filter
                    (lambda (e)
                      (and (list? e)
                           (eq? (car e) 'http://www.w3.org/2005/Atom:entry)))
                    feed))
  entries)

(define (string-substitute str search substitute start-recursive)
  (define found (string-contains str search))
  (define result
    (if found
        (cons #t (string-replace str
                                 substitute
                                 found (+ found (string-length search))))
        (cons #f str)))
  (if (and (car result) start-recursive)
      (string-substitute (cdr result) search substitute start-recursive)
      (cdr result)))

(define (main args)
  (define url (car (cdr args)))
  (define alias (car (cdr (cdr args))))
  (update-gitlab-rss  url alias))