% Copyright (c) 2005 Jonathan Fine % License: GPL version 2 or (at your option) any later version. % $Source: /cvsroot/pytex/pytex/tex2tok/_tex2tok.tex,v $ % usage: '\LEX{story} \catcode`|=0 % for document control sequences \catcode`{=1 \catcode`}=2 \catcode`$=3 \catcode`&=4 \catcode`#=6 \catcode`^=7 \catcode`_=8 \catcode`@=11 % useful constants \let\bgroup { \let\egroup } \chardef\zero 0 \chardef\one 1 \def\?{\let\space = } \? % define \space \let\? \undefined % clean up afterwards \chardef\tokfile 16 % change if you wish \def\loop % loop through sequence of tokens % assign to \value the meaning of the next token {% \futurelet\value\loop@A } \def\loop@continue % pick up the next token, and continue the loop % required, for dealing with space and brace characters {% \afterassignment\loop \let\@temp = } \def\loop@A % is the next token unexpandable? % always, unexpandable tokens are characters % always, control sequences are expandable (or undefined) % sometimes, a character may be expandable {% \expandafter\ifx\noexpand\value\value \expandafter\loop@char \else \expandafter\loop@macro \fi } \def\loop@macro % suppress implicit \outer token at end of file {\expandafter\loop@macro@A\noexpand} \long\def\loop@macro@A #1% % #1 - expandable token, picked up from input stream % execute the token, and resume the loop % if the token is undefined, call { \ifx #1\undefined \process@undefined #1% \expandafter\loop \else \expandafter #1% \fi } \long\def\process@undefined #1% % #1 - undefined control sequence % default value - can be redefined {% \immediate\write\tokfile{\string #1}% } \def\endloop % when read by \loop, this ends its execution {} \def\loop@char % process next token, which is a character % might be letter, other, brace or whatever % afterwards, continue with the loop {% % some tricky code to deal with common cases at high speed \ifcase % first deal with the common cases, at high speed \ifcat a\value \one\fi % it's a letter \ifcat ?\value \one\fi % it's punctuation, digit, etc. \zero \expandafter\loop@char@A % it's something else \or % case of letter or digit \expandafter\loop@char@default \fi } \def\loop@char@default #1% % #1 - a character we can pick up in this way {% \immediate\write\tokfile{#1}% \loop } \def\loop@char@A % next token is a special character % pick it up, and process it % we already have the \ {% \ifx\value \ifx\value\@space \immediate\write16{ \@percent} \else \immediate\write16{% \expandafter\@char@strip\meaning\value \@percent }% \fi } \def\empty{} \def\loop@char@A {% \edef\temp {% \ifx\value\space \if11 \fi \else\ifx\value\bgroup +\string{% \else\ifx\value\egroup -\string}% \fi\fi\fi } \ifx\temp\empty \expandafter\loop@char@default \else \immediate\write\tokfile{\temp}% \expandafter\loop@continue \fi } \def |LEX #1% {% \begingroup \let|par\undefined % blank lines in input document file \let\LEX\undefined % in case it appears in document file \catcode`!=12 % restore to normal value \catcode`|=12 % restore to normal value \catcode`@=12 % restore to normal value \chardef\tokfile 15 \def\next{\immediate\openout\tokfile \jobname.tok }% \expandafter\next \expandafter\loop \input #1 % space to terminate file name \endloop \immediate\closeout\tokfile \endgroup \end } \dump