rrg92 commited on
Commit
2ede869
1 Parent(s): 3ae9e56

Add some description

Browse files
Files changed (1) hide show
  1. index.html +230 -219
index.html CHANGED
@@ -1,220 +1,231 @@
1
- <html>
2
- <head>
3
-
4
- <script type="text/javascript">
5
- let CURRENT_CONTENT;
6
- let parser = new DOMParser()
7
-
8
- function FormatDiscordMessage(html){
9
- let dom = parser.parseFromString(html, "text/html");
10
- CURRENT_CONTENT.dom = dom;
11
-
12
- let allChilds = dom.querySelectorAll("body > *");
13
-
14
- let FullContent = [];
15
-
16
- let buffContent = []
17
- let buffLinks = []
18
- let authorName= [];
19
- let edition;
20
-
21
- let flushContent = function(){
22
- console.log("line break found!");
23
-
24
- let AllText = buffContent.join("").trim();
25
- let links = buffLinks.join(",");
26
-
27
- if(!links)
28
- return;
29
-
30
- FullContent.push({
31
- text: AllText
32
- ,links
33
- ,authors: authorName.join(",")
34
- ,edition
35
- });
36
-
37
-
38
-
39
- buffContent = []
40
- buffLinks = [];
41
- authorName = []
42
- }
43
-
44
- for(let c of allChilds){
45
- let text;
46
-
47
- if(c.tagName == "A")
48
- buffLinks.push(c.href);
49
-
50
-
51
- if(c.tagName.at(0) == 'H'){
52
-
53
- let editionMatch = c.textContent.match(/#\d+/g);
54
-
55
- if(editionMatch){
56
- edition = parseInt( editionMatch[0].replace('#',''))
57
- }
58
-
59
- continue;
60
- }
61
-
62
- if(c.classList.contains("mention"))
63
- authorName.push(c.textContent);
64
-
65
- text = c.textContent;
66
-
67
- if(text)
68
- buffContent.push(text);
69
-
70
- if(/\!?\s*\n+/.test(text)){
71
- flushContent();
72
- }
73
- }
74
-
75
-
76
- // last
77
- if(buffContent){
78
- flushContent();
79
- }
80
-
81
-
82
- return { dom, content: FullContent };
83
- }
84
-
85
- function ProcessPastedMessage(){
86
- let res = FormatDiscordMessage(CURRENT_CONTENT.html)
87
-
88
- CURRENT_CONTENT.result = res;
89
-
90
- let out = document.querySelector("#result");
91
-
92
- let xDoc = document.implementation.createDocument(null, "highlights");
93
- let rootDoc = xDoc.querySelector("highlights");
94
-
95
- let Stats = {
96
- total: 0
97
- ,edition: null
98
- };
99
-
100
- Stats.edition = res.content[0].edition;
101
-
102
- for(let [i,high] of res.content.entries()){
103
- let xHigh = xDoc.createElement("highlight");
104
-
105
- let xAutor = xDoc.createElement("author");
106
- let xLinks = xDoc.createElement("links");
107
- let xEdition = xDoc.createElement("edition");
108
- let xText = xDoc.createElement("text");
109
-
110
- xAutor.textContent = high.authors
111
- xLinks.textContent = high.links
112
- xEdition.textContent = high.edition
113
- xText.textContent = high.text
114
-
115
- xHigh.appendChild(xText);
116
- xHigh.appendChild(xLinks);
117
- xHigh.appendChild(xEdition);
118
- xHigh.appendChild(xAutor);
119
-
120
-
121
- rootDoc.appendChild(xHigh)
122
-
123
- Stats.total++
124
-
125
- }
126
-
127
-
128
- document.querySelector("#stats").innerHTML = `Stats: total = ${Stats.total}, edition = ${Stats.edition}`
129
-
130
- let serializer = new XMLSerializer();
131
- out.innerHTML = serializer.serializeToString(xDoc);
132
- }
133
-
134
-
135
- function ProcessPasted(content){
136
-
137
- navigator.clipboard.read(["text/html"])
138
- .then( async (content) => {
139
-
140
- console.log("content", content[0].types)
141
-
142
- let contentTypes = content[0].types;
143
- let plainText = await (await content[0].getType("text/plain")).text();
144
- let html = null;
145
-
146
- if(contentTypes.includes("text/html")){
147
- htmlContent = await content[0].getType("text/html");
148
- console.log("html:", htmlContent);
149
-
150
- html = await htmlContent.text();
151
-
152
- CURRENT_CONTENT = {
153
- html: await htmlContent.text()
154
- ,text: plainText
155
- }
156
-
157
- } else {
158
- console.log("NotContainsHtml");
159
- }
160
-
161
-
162
- CURRENT_CONTENT = {
163
- html
164
- ,text: plainText
165
- }
166
-
167
- setTimeout(ProcessPastedMessage, 100)
168
-
169
- })
170
-
171
- return false;
172
- }
173
-
174
- addEventListener("paste", ProcessPasted);
175
-
176
-
177
- </script>
178
- <style>
179
- textarea {
180
- width: 100%;
181
- }
182
-
183
- .container {
184
- display: flex;
185
- flex-direction: row;
186
- }
187
-
188
- .container > div {
189
- width: 50%;
190
- height: 70vh;
191
- padding: 5px;
192
- }
193
-
194
- .container textarea {
195
- height: 100%;
196
- }
197
- </style>
198
-
199
- </head>
200
- <body>
201
- <div>
202
- <p id="stats"></p>
203
- </div>
204
- <div class="container">
205
- <div>
206
- <p>Content</p>
207
- <textarea></textarea>
208
- </div>
209
-
210
- <div>
211
- <p>XML</p>
212
- <textarea readonly id="result"></textarea>
213
- </div>
214
- </div>
215
-
216
-
217
-
218
-
219
- </body>
 
 
 
 
 
 
 
 
 
 
 
220
  </html>
 
1
+ <html>
2
+ <head>
3
+
4
+ <script type="text/javascript">
5
+ let CURRENT_CONTENT;
6
+ let parser = new DOMParser()
7
+
8
+ function FormatDiscordMessage(html){
9
+ let dom = parser.parseFromString(html, "text/html");
10
+ CURRENT_CONTENT.dom = dom;
11
+
12
+ let allChilds = dom.querySelectorAll("body > *");
13
+
14
+ let FullContent = [];
15
+
16
+ let buffContent = []
17
+ let buffLinks = []
18
+ let authorName= [];
19
+ let edition;
20
+
21
+ let flushContent = function(){
22
+ console.log("line break found!");
23
+
24
+ let AllText = buffContent.join("").trim();
25
+ let links = buffLinks.join(",");
26
+
27
+ if(!links)
28
+ return;
29
+
30
+ FullContent.push({
31
+ text: AllText
32
+ ,links
33
+ ,authors: authorName.join(",")
34
+ ,edition
35
+ });
36
+
37
+
38
+
39
+ buffContent = []
40
+ buffLinks = [];
41
+ authorName = []
42
+ }
43
+
44
+ for(let c of allChilds){
45
+ let text;
46
+
47
+ if(c.tagName == "A")
48
+ buffLinks.push(c.href);
49
+
50
+
51
+ if(c.tagName.at(0) == 'H'){
52
+
53
+ let editionMatch = c.textContent.match(/#\d+/g);
54
+
55
+ if(editionMatch){
56
+ edition = parseInt( editionMatch[0].replace('#',''))
57
+ }
58
+
59
+ continue;
60
+ }
61
+
62
+ if(c.classList.contains("mention"))
63
+ authorName.push(c.textContent);
64
+
65
+ text = c.textContent;
66
+
67
+ if(text)
68
+ buffContent.push(text);
69
+
70
+ if(/\!?\s*\n+/.test(text)){
71
+ flushContent();
72
+ }
73
+ }
74
+
75
+
76
+ // last
77
+ if(buffContent){
78
+ flushContent();
79
+ }
80
+
81
+
82
+ return { dom, content: FullContent };
83
+ }
84
+
85
+ function ProcessPastedMessage(){
86
+ let res = FormatDiscordMessage(CURRENT_CONTENT.html)
87
+
88
+ CURRENT_CONTENT.result = res;
89
+
90
+ let out = document.querySelector("#result");
91
+
92
+ let xDoc = document.implementation.createDocument(null, "highlights");
93
+ let rootDoc = xDoc.querySelector("highlights");
94
+
95
+ let Stats = {
96
+ total: 0
97
+ ,edition: null
98
+ };
99
+
100
+ Stats.edition = res.content[0].edition;
101
+
102
+ for(let [i,high] of res.content.entries()){
103
+ let xHigh = xDoc.createElement("highlight");
104
+
105
+ let xAutor = xDoc.createElement("author");
106
+ let xLinks = xDoc.createElement("links");
107
+ let xEdition = xDoc.createElement("edition");
108
+ let xText = xDoc.createElement("text");
109
+
110
+ xAutor.textContent = high.authors
111
+ xLinks.textContent = high.links
112
+ xEdition.textContent = high.edition
113
+ xText.textContent = high.text
114
+
115
+ xHigh.appendChild(xText);
116
+ xHigh.appendChild(xLinks);
117
+ xHigh.appendChild(xEdition);
118
+ xHigh.appendChild(xAutor);
119
+
120
+
121
+ rootDoc.appendChild(xHigh)
122
+
123
+ Stats.total++
124
+
125
+ }
126
+
127
+
128
+ document.querySelector("#stats").innerHTML = `Stats: total = ${Stats.total}, edition = ${Stats.edition}`
129
+
130
+ let serializer = new XMLSerializer();
131
+ out.innerHTML = serializer.serializeToString(xDoc);
132
+ }
133
+
134
+
135
+ function ProcessPasted(content){
136
+
137
+ navigator.clipboard.read(["text/html"])
138
+ .then( async (content) => {
139
+
140
+ console.log("content", content[0].types)
141
+
142
+ let contentTypes = content[0].types;
143
+ let plainText = await (await content[0].getType("text/plain")).text();
144
+ let html = null;
145
+
146
+ if(contentTypes.includes("text/html")){
147
+ htmlContent = await content[0].getType("text/html");
148
+ console.log("html:", htmlContent);
149
+
150
+ html = await htmlContent.text();
151
+
152
+ CURRENT_CONTENT = {
153
+ html: await htmlContent.text()
154
+ ,text: plainText
155
+ }
156
+
157
+ } else {
158
+ console.log("NotContainsHtml");
159
+ }
160
+
161
+
162
+ CURRENT_CONTENT = {
163
+ html
164
+ ,text: plainText
165
+ }
166
+
167
+ setTimeout(ProcessPastedMessage, 100)
168
+
169
+ })
170
+
171
+ return false;
172
+ }
173
+
174
+ addEventListener("paste", ProcessPasted);
175
+
176
+
177
+ </script>
178
+ <style>
179
+ textarea {
180
+ width: 100%;
181
+ }
182
+
183
+ .container {
184
+ display: flex;
185
+ flex-direction: row;
186
+ }
187
+
188
+ .container > div {
189
+ width: 50%;
190
+ height: 70vh;
191
+ padding: 5px;
192
+ }
193
+
194
+ .container textarea {
195
+ height: 100%;
196
+ }
197
+ </style>
198
+
199
+ </head>
200
+ <body>
201
+ <p>This is a simple parser of Community Highlights, posted weekly in Huging Face Discord</p>
202
+ <p>Community Highlights is a valuable information. While Hugging Face dont provide an official list (via some API), use that tool to parse and import anywhere</p>
203
+ <p>Use it to transform content into something best to be parsed (for example, to import in some database, or blog)</p>
204
+ <p>Instrunctions></p>
205
+ <ol>
206
+ <li>Open Discord in some Broswer (just opening in browser works)</li>
207
+ <li>Go to desired Community Highlights message. Select all message and Copy</li>
208
+ <li>Paste on Content Field</li>
209
+ <li>Then, a parsed data must be generate in side input, in XML format. That format is better to you import anyhwere</li>
210
+ </ol>
211
+ <p>TODO: JSON Support, API import</p>
212
+ <div>
213
+ <p id="stats"></p>
214
+ </div>
215
+ <div class="container">
216
+ <div>
217
+ <p>Content</p>
218
+ <textarea></textarea>
219
+ </div>
220
+
221
+ <div>
222
+ <p>XML</p>
223
+ <textarea readonly id="result"></textarea>
224
+ </div>
225
+ </div>
226
+
227
+
228
+
229
+
230
+ </body>
231
  </html>