-
Notifications
You must be signed in to change notification settings - Fork 0
/
tracker-doc
executable file
·186 lines (136 loc) · 5.73 KB
/
tracker-doc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env bb
;; This file is part of Dirk Kutscher's datatracker-publications
;; project and is released under the GPL-v3 License:
;; http://www.gnu.org/licenses/gpl-3.0.html
(require '[babashka.curl :as curl])
(require '[babashka.pods :as pods])
(require '[clojure.string :as string])
(require '[babashka.cli :as cli])
(pods/load-pod 'retrogradeorbit/bootleg "0.1.9") ; provides Hickory
(require '[pod.retrogradeorbit.bootleg.utils :as utils])
(require '[pod.retrogradeorbit.bootleg.enlive :as enlive])
(require '[pod.retrogradeorbit.hickory.select :as s])
(defn assoc-same-value
"Associates each key in a set of keys with the same value in a map."
[keys value & [initial-map]]
(reduce (fn [acc k] (assoc acc k value))
(or initial-map {})
keys))
(defn fetch-html [user-id]
(let [url (str "https://datatracker.ietf.org/person/" user-id)]
; (println "Fetching" url)
(let [
response (curl/get url)]
(:body response))))
(defn parse-html [html-content]
(utils/convert-to html-content :hickory))
(defn element-text [element]
;; Assuming the text is under the :content key, adjust as necessary
(when element
(str/join "" (map :content element))))
(defn extract-rfcs [parsed-html]
(let [rfcs-section (first (s/select parsed-html [:h2#rfcs-1 + "table"]))
rows (s/select rfcs-section ["tr"])]
(map #(element-text (s/select % ["td"]))
(drop 1 rows)))) ;; Assuming first row is headers
(def tr-selector (s/tag :tr))
(def td-selector (s/tag :td))
(def a-selector (s/tag :a))
(def rfc-selector
(s/descendant (s/tag :tbody)
(s/descendant tr-selector
(s/descendant td-selector a-selector))))
(defn get-rfc-names [td-elements]
(set (filter some?
(mapcat (fn [td-element]
(map (fn [child]
(when (and (= (:tag child) :a)
(re-find #"/doc/rfc[0-9]+/" (:href (:attrs child))))
(let [href (:href (:attrs child))
rfc-match (re-find #"/doc/(rfc[0-9]+)/" href)]
(when rfc-match
(second rfc-match)))))
(:content td-element)))
td-elements))))
(defn extract-rfc-names [parsed-html]
(let [
td-elements (s/select rfc-selector parsed-html)
all-td-elements (s/select
(s/descendant (s/tag :tbody)
(s/descendant tr-selector
td-selector))
parsed-html)
;; Filter to keep only td elements that contain an a tag with href containing "/doc/rfc"
rfc-td-elements (filter (fn [td-element]
(some (fn [child]
(and (= (:tag child) :a)
(re-find #"/doc/rfc" (:href (:attrs child)))))
(:content td-element)))
all-td-elements)]
(get-rfc-names rfc-td-elements)))
(defmulti draft-selector (fn [tag] tag))
(defmethod draft-selector :id-active [_]
(s/follow-adjacent (s/id :drafts-1) (s/tag :ul)))
(defmethod draft-selector :id-expired [_]
(s/follow-adjacent (s/id :drafts-1) (s/tag :ul) (s/tag :h2) (s/tag :ul)))
(defn extract-draft-names [tag parsed-html]
(let [selector (draft-selector tag)
ul-element (first (s/select selector parsed-html))
a-tags (s/select (s/descendant (s/tag :a)) ul-element)]
(map #(string/trim (-> % :content first)) a-tags)))
(defn process-and-print [parsed-html option extractor-fn]
(when option
(->> (extractor-fn parsed-html)
(clojure.string/join " ")
println)))
(def cli-opts {:coerce {:help :boolean
:rfcs :boolean
:active-ids :boolean
:expired-ids :boolean
:all-ids :boolean
:all :boolean}
:args->opts [:user-id]})
(def doctype-keys
(disj (set (keys (:coerce cli-opts))) :help :all :all-ids))
(def id-keys
[:active-ids :expired-ids])
(defn only-key? [col k]
(empty? (disj (set (keys col)) k)))
(def help-msg
"Usage: tracker-doc <options> user-id
Options
--rfcs get RFCs
--active-ids get active Internet Drafts
--expired-ids get expired Internet Drafts
--all-ids get all Internet Drafts
--all get all RFCs and Internet Drafts (default)")
(defn main [& arg-list]
(let [
args (cli/parse-args (first arg-list) cli-opts)
opts (:opts args)
user-id (:user-id opts)
no-options? (only-key? opts :user-id)
options (if (or no-options? (:all opts))
(assoc-same-value doctype-keys true opts)
(if (:all-ids opts)
(assoc-same-value id-keys true opts)
opts))
]
; (println "Options: " options)
(cond
(:help options) (do
(println help-msg)
(System/exit 0))
(empty? user-id) (do
(println "User ID is required.")
(System/exit 1))
:default (do
; (println "User ID:" user-id)
(let [html-content (fetch-html user-id)
parsed-html (parse-html html-content)]
(process-and-print parsed-html (:rfcs options) extract-rfc-names)
(process-and-print parsed-html (:active-ids options) (partial extract-draft-names :id-active))
(process-and-print parsed-html (:expired-ids options) (partial extract-draft-names :id-expired)))))))
(defn -main [& args]
(main args))
(main *command-line-args*)