Minsky
latexMarkup.cc
Go to the documentation of this file.
1 /*
2  @copyright Steve Keen 2013
3  @author Russell Standish
4  This file is part of Minsky.
5 
6  Minsky is free software: you can redistribute it and/or modify it
7  under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  Minsky is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with Minsky. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include "latexMarkup.h"
21 #include <map>
22 #include <vector>
23 using namespace std;
24 
25 #include <ctype.h>
26 #include <string.h>
27 
28 namespace
29 {
30  // pair<string, string>
31  struct Symbol
32  {
33  const char* first, *second;
34  }
35  symbolData[]={
36  {"#","#"},
37  {"$","$"},
38  {"%","%"},
39  {"&","&amp;"},
40  {"_","_"},
41  {"{","{"},
42  {"}","}"},
43  {"euro","€"},
44  {"alpha","α"},
45  {"beta","β"},
46  {"gamma","γ"},
47  {"delta","δ"},
48  {"epsilon","∈"},
49  {"varepsilon","ε"},
50  {"zeta","ζ"},
51  {"eta","η"},
52  {"theta","θ"},
53  {"iota","ι"},
54  {"kappa","κ"},
55  {"lambda","λ"},
56  {"mu","μ"},
57  {"nu","ν"},
58  {"xi","ξ"},
59  {"pi","π"},
60  {"varpi","ϖ"},
61  {"rho","ρ"},
62  {"varrho","ϱ"},
63  {"sigma","σ"},
64  {"varsigma","ς"},
65  {"tau","τ"},
66  {"upsilon","υ"},
67  {"phi","φ"},
68  {"varphi","ϕ"},
69  {"chi","χ"},
70  {"omega","ω"},
71  {"Gamma","Γ"},
72  {"Delta","Δ"},
73  {"Theta","Θ"},
74  {"Lambda","Λ"},
75  {"Xi","Ξ"},
76  {"Pi","Π"},
77  {"Sigma","Σ"},
78  {"Upsilon","Υ"},
79  {"Phi","Φ"},
80  {"Psi","Ψ"},
81  {"Omega","Ω"},
82  {"dag","†"},
83  {"ddag","‡"},
84  {"S","§"},
85  {"P","¶"},
86  {"copyright","©"},
87  {"pounds","£"},
88  {"ldots","…"},
89  {"cdots","⋯"},
90  {"vdots","⋮"},
91  {"ddots","⋱"},
92  {"pm","±"},
93  {"mp","∓"},
94  {"times","×"},
95  {"div","÷"},
96  {"ast","∗"},
97  {"star","⋆"},
98  {"circ","∘"},
99  {"bullet","•"},
100  {"cdot","·"},
101  {"cap","∩"},
102  {"cup","∪"},
103  {"uplus","⊎"},
104  {"sqcap","⊓"},
105  {"sqcup","⊔"},
106  {"vee","∨"},
107  {"wedge","∧"},
108  {"setminus","∖"},
109  {"wr","≀"},
110  {"diamond","⋄"},
111  {"bigtriangleup","△"},
112  {"bigtriangledown","▽"},
113  {"triangleleft","◃"},
114  {"triangleright","▹"},
115  {"lhd","◁"},
116  {"rhd","▷"},
117  {"oplus","⊕"},
118  {"ominus","⊖"},
119  {"otimes","⊗"},
120  {"oslash","⊘"},
121  {"odot","⊙"},
122  {"bigcirc","◯"},
123  {"dagger","†"},
124  {"ddagger","‡"},
125  {"amalg","∐"},
126  {"leq","≤"},
127  {"prec","≺"},
128  {"preceq","≼"},
129  {"ll","≪"},
130  {"subset","⊂"},
131  {"subseteq","⊆"},
132  {"sqsubset","⊏"},
133  {"sqsubseteq","⊑"},
134  {"in","∈"},
135  {"vdash","⊢"},
136  {"geq","≥"},
137  {"succ","≻"},
138  {"succeq","≽"},
139  {"gg","≫"},
140  {"supset","⊃"},
141  {"supseteq","⊇"},
142  {"sqsupset","⊐"},
143  {"sqsupseteq","⊒"},
144  {"ni","∋"},
145  {"dashv","⊣"},
146  {"equiv","≡"},
147  {"sim","∼"},
148  {"simeq","≃"},
149  {"asymp","≍"},
150  {"approx","≈"},
151  {"cong","≅"},
152  {"neq","≠"},
153  {"doteq","≐"},
154  {"notin","∉"},
155  {"models","⊧"},
156  {"perp","⊥"},
157  {"mid","∣"},
158  {"parallel","∥"},
159  {"bowtie","⋈"},
160  {"Join","⋈"},
161  {"smile","⌣"},
162  {"frown","⌢"},
163  {"propto","∝"},
164  {"leftarrow","←"},
165  {"Leftarrow","⇐"},
166  {"rightarrow","→"},
167  {"Rightarrow","⇒"},
168  {"leftrightarrow","↔"},
169  {"Leftrightarrow","⇔"},
170  {"mapsto","↦"},
171  {"hookleftarrow","↩"},
172  {"leftharpoonup","↼"},
173  {"leftharpoondown","↽"},
174  {"rightleftharpoons","⇌"},
175  {"longleftarrow","←"},
176  {"Longleftarrow","⇐"},
177  {"longrightarrow","→"},
178  {"Longrightarrow","⇒"},
179  {"longleftrightarrow","↔"},
180  {"Longleftrightarrow","⇔"},
181  {"lfloor","⌊"},
182  {"rfloor","⌋"},
183  {"lceil","⌈"},
184  {"rceil","⌉"},
185  {"langle","⟨"},
186  {"rangle","⟩"},
187  {"longmapsto","↦"},
188  {"hookrightarrow","↪"},
189  {"rightharpoonup","⇀"},
190  {"rightharpoondown","⇁"},
191  {"leadsto","↝"},
192  {"uparrow","↑"},
193  {"Uparrow","⇑"},
194  {"downarrow","↓"},
195  {"Downarrow","⇓"},
196  {"updownarrow","↕"},
197  {"Updownarrow","⇕"},
198  {"nearrow","↗"},
199  {"searrow","↘"},
200  {"swarrow","↙"},
201  {"nwarrow","↖"},
202  {"aleph","ℵ"},
203  {"imath","ı"},
204  {"ell","ℓ"},
205  {"wp","℘"},
206  {"Re","ℜ"},
207  {"Im","ℑ"},
208  {"mho","℧"},
209  {"prime","′"},
210  {"emptyset","∅"},
211  {"nabla","∇"},
212  {"surd","√"},
213  {"top","⊤"},
214  {"bot","⊥"},
215  {"angle","∠"},
216  {"forall","∀"},
217  {"exists","∃"},
218  {"neg","¬"},
219  {"flat","♭"},
220  {"natural","♮"},
221  {"sharp","♯"},
222  {"backslash","\\"},
223  {"partial","∂"},
224  {"infty","∞"},
225  {"Box","□"},
226  {"Diamond","◇"},
227  {"triangle","▵"},
228  {"clubsuit","♣"},
229  {"diamondsuit","♢"},
230  {"heartsuit","♥"},
231  {"spadesuit","♠"},
232  {"sum","∑"},
233  {"prod","∏"},
234  {"coprod","∐"},
235  {"int","∫"},
236  {"oint","∮"},
237  {"bigcap","⋂"},
238  {"bigcup","⋃"},
239  {"bigvee","⋁"},
240  {"bigwedge","⋀"},
241  {"`a","à"},
242  {"`{a}","à"},
243  {"`A","À"},
244  {"`{A}","À"},
245  {"`e","è"},
246  {"`{e}","è"},
247  {"`E","È"},
248  {"`{E}","È"},
249  {"`i","ì"},
250  {"`{i}","ì"},
251  {"`I","Ì"},
252  {"`{I}","Ì"},
253  {"`n","ǹ"},
254  {"`{n}","ǹ"},
255  {"`N","Ǹ"},
256  {"`{N}","Ǹ"},
257  {"`o","ò"},
258  {"`{o}","ò"},
259  {"`O","Ò"},
260  {"`{O}","Ò"},
261  {"`u","ù"},
262  {"`{u}","ù"},
263  {"`U","Ù"},
264  {"`{U}","Ù"},
265  {"`w","ẁ"},
266  {"`{w}","ẁ"},
267  {"`W","Ẁ"},
268  {"`{W}","Ẁ"},
269  {"`y","ỳ"},
270  {"`{y}","ỳ"},
271  {"`Y","Ỳ"},
272  {"`{Y}","Ỳ"},
273  {"'a","á"},
274  {"'{a}","á"},
275  {"'A","Á"},
276  {"'{A}","Á"},
277  {"'c","ć"},
278  {"'{c}","ć"},
279  {"'C","Ć"},
280  {"'{C}","Ć"},
281  {"'e","é"},
282  {"'{e}","é"},
283  {"'E","É"},
284  {"'{E}","É"},
285  {"'g","ǵ"},
286  {"'{g}","ǵ"},
287  {"'G","Ǵ"},
288  {"'{G}","Ǵ"},
289  {"'i","í"},
290  {"'{i}","í"},
291  {"'I","Í"},
292  {"'{I}","Í"},
293  {"'k","ḱ"},
294  {"'{k}","ḱ"},
295  {"'K","Ḱ"},
296  {"'{K}","Ḱ"},
297  {"'l","ĺ"},
298  {"'{l}","ĺ"},
299  {"'L","Ĺ"},
300  {"'{L}","Ĺ"},
301  {"'m","ḿ"},
302  {"'{m}","ḿ"},
303  {"'M","Ḿ"},
304  {"'{M}","Ḿ"},
305  {"'n","ń"},
306  {"'{n}","ń"},
307  {"'N","Ń"},
308  {"'{N}","Ń"},
309  {"'o","ó"},
310  {"'{o}","ó"},
311  {"'O","Ó"},
312  {"'{O}","Ó"},
313  {"'p","ṕ"},
314  {"'{p}","ṕ"},
315  {"'P","Ṕ"},
316  {"'{P}","Ṕ"},
317  {"'r","ŕ"},
318  {"'{r}","ŕ"},
319  {"'R","Ŕ"},
320  {"'{R}","Ŕ"},
321  {"'s","ś"},
322  {"'{s}","ś"},
323  {"'S","Ś"},
324  {"'{S}","Ś"},
325  {"'u","ú"},
326  {"'{u}","ú"},
327  {"'U","Ú"},
328  {"'{U}","Ú"},
329  {"'w","ẃ"},
330  {"'{w}","ẃ"},
331  {"'W","Ẃ"},
332  {"'{W}","Ẃ"},
333  {"'y","ý"},
334  {"'{y}","ý"},
335  {"'Y","Ý"},
336  {"'{Y}","Ý"},
337  {"'z","ź"},
338  {"'{z}","ź"},
339  {"'Z","Ź"},
340  {"'{Z}","Ź"},
341  {"^a","â"},
342  {"^{a}","â"},
343  {"^A","Â"},
344  {"^{A}","Â"},
345  {"^c","ĉ"},
346  {"^{c}","ĉ"},
347  {"^C","Ĉ"},
348  {"^{C}","Ĉ"},
349  {"^e","ê"},
350  {"^{e}","ê"},
351  {"^E","Ê"},
352  {"^{E}","Ê"},
353  {"^g","ĝ"},
354  {"^{g}","ĝ"},
355  {"^G","Ĝ"},
356  {"^{G}","Ĝ"},
357  {"^h","ĥ"},
358  {"^{h}","ĥ"},
359  {"^H","Ĥ"},
360  {"^{H}","Ĥ"},
361  {"^i","î"},
362  {"^{i}","î"},
363  {"^I","Î"},
364  {"^{I}","Î"},
365  {"^j","ĵ"},
366  {"^{j}","ĵ"},
367  {"^J","Ĵ"},
368  {"^{J}","Ĵ"},
369  {"^o","ô"},
370  {"^{o}","ô"},
371  {"^O","Ô"},
372  {"^{O}","Ô"},
373  {"^s","ŝ"},
374  {"^{s}","ŝ"},
375  {"^S","Ŝ"},
376  {"^{S}","Ŝ"},
377  {"^u","û"},
378  {"^{u}","û"},
379  {"^U","Û"},
380  {"^{U}","Û"},
381  {"^w","ŵ"},
382  {"^{w}","ŵ"},
383  {"^W","Ŵ"},
384  {"^{W}","Ŵ"},
385  {"^y","ŷ"},
386  {"^{y}","ŷ"},
387  {"^Y","Ŷ"},
388  {"^{Y}","Ŷ"},
389  {"^z","ẑ"},
390  {"^{z}","ẑ"},
391  {"^Z","Ẑ"},
392  {"^{Z}","Ẑ"},
393  {"\"a","ä"},
394  {"\"{a}","ä"},
395  {"\"A","Ä"},
396  {"\"{A}","Ä"},
397  {"\"e","ë"},
398  {"\"{e}","ë"},
399  {"\"E","Ë"},
400  {"\"{E}","Ë"},
401  {"\"h","ḧ"},
402  {"\"{h}","ḧ"},
403  {"\"H","Ḧ"},
404  {"\"{H}","Ḧ"},
405  {"\"i","ï"},
406  {"\"{i}","ï"},
407  {"\"I","Ï"},
408  {"\"{I}","Ï"},
409  {"\"o","ö"},
410  {"\"{o}","ö"},
411  {"\"O","Ö"},
412  {"\"{O}","Ö"},
413  {"\"t","ẗ"},
414  {"\"{t}","ẗ"},
415  {"\"u","ü"},
416  {"\"{u}","ü"},
417  {"\"U","Ü"},
418  {"\"{U}","Ü"},
419  {"\"w","ẅ"},
420  {"\"{w}","ẅ"},
421  {"\"W","Ẅ"},
422  {"\"{W}","Ẅ"},
423  {"\"x","ẍ"},
424  {"\"{x}","ẍ"},
425  {"\"X","Ẍ"},
426  {"\"{X}","Ẍ"},
427  {"\"y","ÿ"},
428  {"\"{y}","ÿ"},
429  {"\"Y","Ÿ"},
430  {"\"{Y}","Ÿ"},
431  {"~a","ã"},
432  {"~{a}","ã"},
433  {"~A","Ã"},
434  {"~{A}","Ã"},
435  {"~e","ẽ"},
436  {"~{e}","ẽ"},
437  {"~E","Ẽ"},
438  {"~{E}","Ẽ"},
439  {"~i","ĩ"},
440  {"~{i}","ĩ"},
441  {"~I","Ĩ"},
442  {"~{I}","Ĩ"},
443  {"~n","ñ"},
444  {"~{n}","ñ"},
445  {"~N","Ñ"},
446  {"~{N}","Ñ"},
447  {"~o","õ"},
448  {"~{o}","õ"},
449  {"~O","Õ"},
450  {"~{O}","Õ"},
451  {"~u","ũ"},
452  {"~{u}","ũ"},
453  {"~U","Ũ"},
454  {"~{U}","Ũ"},
455  {"~v","ṽ"},
456  {"~{v}","ṽ"},
457  {"~V","Ṽ"},
458  {"~{V}","Ṽ"},
459  {"~y","ỹ"},
460  {"~y","ỹ"},
461  {"~{Y}","Ỹ"},
462  {"~{Y}","Ỹ"},
463  {"=a","ā"},
464  {"={a}","ā"},
465  {"=A","Ā"},
466  {"={A}","Ā"},
467  {"=e","ē"},
468  {"={e}","ē"},
469  {"=E","Ē"},
470  {"={E}","Ē"},
471  {"=g","ḡ"},
472  {"={g}","ḡ"},
473  {"=G","Ḡ"},
474  {"={G}","Ḡ"},
475  {"=i","ī"},
476  {"={i}","ī"},
477  {"=I","Ī"},
478  {"={I}","Ī"},
479  {"=o","ō"},
480  {"={o}","ō"},
481  {"=O","Ō"},
482  {"={O}","Ō"},
483  {"=u","ū"},
484  {"={u}","ū"},
485  {"=U","Ū"},
486  {"={U}","Ū"},
487  {"=y","ȳ"},
488  {"={y}","ȳ"},
489  {"=Y","Ȳ"},
490  {"={Y}","Ȳ"},
491  {".a","ȧ"},
492  {".{a}","ȧ"},
493  {".A","Ȧ"},
494  {".{A}","Ȧ"},
495  {".b","ḃ"},
496  {".{b}","ḃ"},
497  {".B","Ḃ"},
498  {".{B}","Ḃ"},
499  {".c","ċ"},
500  {".{c}","ċ"},
501  {".C","Ċ"},
502  {".{C}","Ċ"},
503  {".d","ḋ"},
504  {".{d}","ḋ"},
505  {".D","Ḋ"},
506  {".{D}","Ḋ"},
507  {".e","ė"},
508  {".{e}","ė"},
509  {".E","̇E"},
510  {".{E}","Ė"},
511  {".f","ḟ"},
512  {".{f}","ḟ"},
513  {".F","Ḟ"},
514  {".{F}","Ḟ"},
515  {".g","ġ"},
516  {".{g}","ġ"},
517  {".G","Ġ"},
518  {".{G}","Ġ"},
519  {".h","ḣ̇"},
520  {".{h}","ḣ̇"},
521  {".H","Ḣ̇"},
522  {".{H}","Ḣ̇"},
523  {".i","i"},
524  {".{i}","i"},
525  {".I","I"},
526  {".{I}","I"},
527  {".m","ṁ"},
528  {".{m}","ṁ"},
529  {".M","Ṁ"},
530  {".{M}","Ṁ"},
531  {".n","ṅ"},
532  {".{n}","ṅ"},
533  {".N","Ṅ"},
534  {".{N}","Ṅ"},
535  {".o","ȯ"},
536  {".{o}","ȯ"},
537  {".O","Ȯ"},
538  {".{O}","Ȯ"},
539  {".p","ṗ"},
540  {".{p}","ṗ"},
541  {".P","Ṗ"},
542  {".{P}","Ṗ"},
543  {".r","ṙ"},
544  {".{r}","ṙ"},
545  {".R","Ṙ"},
546  {".{R}","Ṙ"},
547  {".s","ṡ"},
548  {".{s}","ṡ"},
549  {".S","Ṡ"},
550  {".{S}","Ṡ"},
551  {".t","ṫ"},
552  {".{t}","ṫ"},
553  {".T","Ṫ"},
554  {".{T}","Ṫ"},
555  {".w","ẇ"},
556  {".{w}","ẇ"},
557  {".W","Ẇ"},
558  {".{W}","Ẇ"},
559  {".x","ẋ"},
560  {".{x}","ẋ"},
561  {".X","Ẋ"},
562  {".{X}","Ẋ"},
563  {".y","ẏ"},
564  {".{y}","ẏ"},
565  {".Y","Ẏ"},
566  {".{Y}","Ẏ"},
567  {".z","ż"},
568  {".{z}","ż"},
569  {".Z","Ż"},
570  {".{Z}","Ż"},
571  {"u{a}","ă"},
572  {"u{A}","Ă"},
573  {"u{e}","ĕ"},
574  {"u{E}","Ĕ"},
575  {"u{g}","ğ"},
576  {"u{G}","Ğ"},
577  {"u{i}","ĭ"},
578  {"u{I}","Ĭ"},
579  {"u{o}","ŏ"},
580  {"u{O}","Ŏ"},
581  {"u{u}","ŭ"},
582  {"u{U}","Ŭ"},
583  {"v{a}","ǎ"},
584  {"v{A}","Ǎ"},
585  {"v{c}","č"},
586  {"v{C}","Č"},
587  {"v{d}","ď"},
588  {"v{D}","Ď"},
589  {"v{e}","ě"},
590  {"v{E}","Ě"},
591  {"v{g}","ǧ"},
592  {"v{G}","Ǧ"},
593  {"v{h}","ȟ"},
594  {"v{H}","Ȟ"},
595  {"v{i}","ǐ"},
596  {"v{I}","Ǐ"},
597  {"v{j}","ǰ"},
598  {"v{J}","ǰ"},
599  {"v{k}","ǩ"},
600  {"v{K}","Ǩ"},
601  {"v{n}","ň"},
602  {"v{N}","Ň"},
603  {"v{o}","ǒ"},
604  {"v{O}","Ǒ"},
605  {"v{r}","ř"},
606  {"v{R}","Ř"},
607  {"v{s}","š"},
608  {"v{S}","Š"},
609  {"v{t}","ť"},
610  {"v{T}","Ť"},
611  {"v{u}","ǔ"},
612  {"v{U}","Ǔ"},
613  {"v{z}","ž"},
614  {"v{Z}","Ž"},
615  {"H{u}","ű"},
616  {"H{U}","Ű"},
617  {"H{o}","ő"},
618  {"H{O}","Ő"},
619  {"c{c}","ç"},
620  {"c{C}","Ç"},
621  {"c{e}","ȩ"},
622  {"c{E}","Ȩ"},
623  {"c{g}","ģ"},
624  {"c{G}","Ģ"},
625  {"c{k}","ķ"},
626  {"c{K}","Ķ"},
627  {"c{l}","ļ"},
628  {"c{L}","Ļ"},
629  {"c{n}","ņ"},
630  {"c{N}","Ņ"},
631  {"c{r}","ŗ"},
632  {"c{R}","Ŗ"},
633  {"c{s}","ş"},
634  {"c{S}","Ş"},
635  {"c{t}","ţ"},
636  {"c{T}","Ţ"},
637  {"oe","œ"},
638  {"OE","Œ"},
639  {"ae","æ"},
640  {"AE","Æ"},
641  {"aa","å"},
642  {"AA","Å"},
643  {"o","ø"},
644  {"O","Ø"},
645  {"l","ł"},
646  {"L","Ł"},
647  {"ss","ß"},
648  {"th","þ"},
649  {"TH","Þ"},
650  {"dh","ð"},
651  {"DH","Ð"}
652  };
653 
654  map<string,string> populateSymbols(Symbol symbs[], size_t nSyms)
655  {
656  map<string,string> r;
657  for (Symbol* s=symbs; s<symbs+nSyms; ++s)
658  r[s->first]=s->second;
659  return r;
660  }
661 
662  map<string,string> latexSymbols=populateSymbols
663  (symbolData, sizeof(symbolData)/sizeof(symbolData[0]));
664 
665  // extracts a LaTeX token from input, which points to the character
666  // after the '\' leadin character. input is adjusted to refer to the
667  // next character in the input
668  string parseLaTeXSym(const char*& input)
669  {
670  string r;
671  // treat some accented characters as a single symbol (brace form not accepted)
672  if (strchr("`'^\"~=.",*input) && *(input+1)!='{')
673  {
674  r.assign(input,2);
675  input+=2;
676  return r;
677  }
678 
679  // lettered accents with braces
680  if (strchr("`'^\"~=.uvHtcdb",*input) && *(input+1)=='{')
681  {
682  const char* end=strchr(input, '}');
683  if (end!=NULL)
684  {
685  r.assign(input,end-input+1);
686  input=end+1;
687  }
688  return r;
689  }
690 
691  // symbols that stand for themselves
692  if (strchr("#$%&_{}",*input))
693  return string(input++,1);
694 
695  // normal LaTeX token processing
696  while (*input!='\0' && isalpha(*input))
697  r+=*input++;
698  // if the next character is a whitespace, swallow the character;
699  if (isspace(*input)) input++;
700  return r;
701  }
702 
703  // return multibyte string corresponding to a single UTF8 character,
704  // and advance input buffer
705  string utf8char(const char*& input)
706  {
707  string r;
708  const char lead=*input;
709  if ((lead&0xC0)==0xC0) r+=*input++; // multibyte byte sequence
710  if ((lead&0xE0)==0xE0) r+=*input++; // 3 or more
711  if ((lead&0xF8)==0xF0) r+=*input++; // 4 bytes
712  if ((lead&0xC0)!=0xC0) // defang any single byte chars
713  r+=minsky::defang(*input++);
714  else
715  r+=*input++;
716  return r;
717  }
718 
719  // stucture to represent the returned string as it is being built
720  struct Result: public string
721  {
722  vector<string> stack;
723  vector<bool> popMore; // if true, then pop will pop the next item of stack
724 
725  Result(const string& x=""): string(x) {}
726 
727  void push_back(const string& x,bool popAgain=false)
728  {
729  stack.push_back(x);
730  popMore.push_back(popAgain);
731  }
732 
733  // pushes x onto the stack iff the next character is a brace,
734  // otherwise constructs the markup pair
735  void push(const string& x, const char*& input)
736  {
737  if (x=="{")
738  {
739  push_back(x); // preserve bare braced pairs
740  return;
741  }
742  if (*input=='{')
743  if (x=="rm") // fake an rm tag by deitalicising
744  {
745  input++;
746  }
747  else
748  {
749  if (!x.empty())
750  *this+="<"+x+">";
751  push_back(x);
752  input++;
753  }
754  else if (*input=='\\')
755  {
756  push_back(x,x!="{");
757  if (!x.empty())
758  *this+="<"+x+">";
759  processLaTeX(input);
760  return;
761  }
762  else if (x=="rm")
763  *this+=utf8char(input);
764  else if (!x.empty())
765  *this+=string("<")+x+">"+utf8char(input)+"</"+x+">";
766  }
767 
768  void pop()
769  {
770  if (stack.empty()) return;
771  do
772  {
773  if (!stack.back().empty() && stack.back()!="{")
774  *this+="</"+stack.back()+">";
775  stack.pop_back();
776  popMore.pop_back();
777  } while (!stack.empty() && !popMore.empty() && popMore.back());
778  }
779 
780  void process1arg(const string& tag, const char*& input)
781  {
782  if (*input=='{' || !tag.empty())
783  push(tag, input);
784  else
785  *this+=*input++;
786  }
787 
788  // extract the contents of an optional argument (contained with [])
789  static string parseOpt(const char*& input)
790  {
791  string r;
792  if (*input=='[') ++input;
793  for (; *input!=']' && *input!='\0'; ++input)
794  r+=*input;
795  if (*input==']') ++input;
796  return r;
797  }
798 
799  void processLaTeX(const char*& input)
800  {
801  const string token=parseLaTeXSym(++input);
802  const map<string,string>::const_iterator repl=latexSymbols.find(token);
803  if (repl!=latexSymbols.end())
804  *this+=repl->second;
805  else if (token=="mathit" || token=="mathcal")
806  process1arg("i", input);
807  else if (token=="mathrm" || token=="mathsf")
808  process1arg("rm", input);
809  else if (token=="mathbf")
810  process1arg("b", input);
811  else if (token=="mathtt")
812  process1arg("tt", input);
813  else if (token=="sqrt")
814  {
815  if (*input=='[')
816  {
817  const string index=parseOpt(input);
818  *this+="<small><sup>"+index+"</sup></small>";
819  }
820  *this+=latexSymbols["surd"];
821  process1arg("", input);
822  }
823  else if (token=="verb")
824  {
825  // next character is the delimiter
826  *this+="<tt>";
827  const char delim=*input++;
828  for (; *input!='\0' && *input!=delim; ++input)
829  *this+=minsky::defang(*input);
830  *this+="</tt>";
831  ++input;
832  }
833  else
834  *this+="\\"+token; //unknown token, leave it as is
835  }
836 
837  };
838 }
839 
840 namespace minsky
841 {
842  string defang(char c)
843  {
844  switch (c)
845  {
846  case '<': return "&lt;";
847  case '>': return "&gt;";
848  case '&': return "&amp;";
849  case '\'': return "&apos;";
850  case '\"': return "&quot;";
851  default: return string{&c,1};
852  }
853  }
854 
855  string latexToPangoNonItalicised(const char* input)
856  {
857  if (input[0]=='\0')
858  return ""; // do not wrap with italic environment
859  Result r;
860  while (*input!='\0')
861  switch (*input)
862  {
863  case '\\':
864  r.processLaTeX(input);
865  break;
866  case '_':
867  r.push("sub", ++input);
868  break;
869  case '^':
870  r.push("sup", ++input);
871  break;
872  case '{':
873  r.push("{", ++input);
874  break;
875  case '}':
876  input++;
877  r.pop();
878  break;
879  default:
880  r+=utf8char(input);
881  break;
882  }
883 
884  // take care of mismatched braces
885  while (!r.stack.empty()) r.pop();
886  return r;
887  }
888 
889 }
string defang(char c)
Definition: latexMarkup.cc:842
void push_back(const string &x, bool popAgain=false)
Definition: latexMarkup.cc:727
Definition: input.py:1
string parseLaTeXSym(const char *&input)
Definition: latexMarkup.cc:668
STL namespace.
struct anonymous_namespace{latexMarkup.cc}::Symbol symbolData[]
static string parseOpt(const char *&input)
Definition: latexMarkup.cc:789
Creation and access to the minskyTCL_obj object, which has code to record whenever Minsky&#39;s state cha...
Definition: constMap.h:22
map< string, string > populateSymbols(Symbol symbs[], size_t nSyms)
Definition: latexMarkup.cc:654
string utf8char(const char *&input)
Definition: latexMarkup.cc:705
string latexToPangoNonItalicised(const char *input)
Definition: latexMarkup.cc:855
map< string, string > latexSymbols
Definition: latexMarkup.cc:662
void push(const string &x, const char *&input)
Definition: latexMarkup.cc:735
void process1arg(const string &tag, const char *&input)
Definition: latexMarkup.cc:780