"====================================================================== | | Smalltalk Tokenizer. | | $Revision: 1.7.5$ | $Date: 2000/05/28 16:56:52$ | $Author: pb$ | ======================================================================" "====================================================================== | | Copyright 1988-92, 1994-95, 1999, 2000 Free Software Foundation, Inc. | Written by Steve Byrne. | | This file is part of GNU Smalltalk. | | GNU Smalltalk is free software; you can redistribute it and/or modify it | under the terms of the GNU General Public License as published by the Free | Software Foundation; either version 2, or (at your option) any later version. | | GNU Smalltalk is distributed in the hope that it will be useful, but WITHOUT | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | details. | | You should have received a copy of the GNU General Public License along with | GNU Smalltalk; see the file COPYING. If not, write to the Free Software | Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ======================================================================" Tokenizer subclass: #STTok instanceVariableNames: '' classVariableNames: 'LexMethods LexExtra' poolDictionaries: '' category: 'System-Compiler' ! !STTok class methodsFor: 'initialization'! initialize LexMethods := Array new: 128. LexExtra := Array new: 128. LexMethods atAllPut: #ignoreTok:. LexMethods at: (Character tab asciiValue) put: #whiteTok:. LexMethods at: (Character nl asciiValue) put: #whiteTok:. LexMethods at: (Character newPage asciiValue) put: #whiteTok:. LexMethods at: (Character cr asciiValue) put: #whiteTok:. #( ($ $ whiteTok:) ($! $! binOpTok:) ($" $" commentTok:) ($# $# oneCharTok: STSharpTok) ($$ $$ charLitTok:) ($% $& binOpTok:) ($' $' stringTok:) ($( $( oneCharTok: STOpenParenTok) ($) $) oneCharTok: STCloseParenTok) ($* $- binOpTok:) ($. $. oneCharTok: STDotTok) ($/ $/ binOpTok:) ($0 $9 numberTok:) ($: $: colonTok:) ($; $; oneCharTok: STSemiTok) ($< $@ binOpTok:) ($A $Z idTok:) ($[ $[ oneCharTok: STOpenBracketTok) ($\ $\ binOpTok:) ($] $] oneCharTok: STCloseBracketTok) ($^ $^ oneCharTok: STUpArrowTok) ($_ $_ oneCharTok: STAssignTok) ($a $z idTok:) ($| $| binOpTok:) ($~ $~ binOpTok:) ) do: [ :range | self initRange: range ]. ! initRange: aRange | method | method := aRange at: 3. (aRange at: 1) asciiValue to: (aRange at: 2) asciiValue do: [ :ch | LexMethods at: ch put: method. aRange size = 4 ifTrue: [ LexExtra at: ch put: (aRange at: 4) ] ] ! ! !STTok methodsFor: 'token parsing'! atEndToken ^STBangTok new: self line ! tokenize: ch ^self perform: (LexMethods at: ch asciiValue) with: ch ! whiteTok: aChar "Gobble the char and return nothing" ^nil ! colonTok: aChar | ch | ch := self peekChar. ch == $= ifTrue: [ self nextChar. ^STAssignTok new: self line ] ifFalse: [ ^STColonTok new: self line ] ! charLitTok: aChar "Called with aChar == $$" ^STLiteralTok new: self line value: self nextChar ! idTok: aChar self putBack: aChar. ^self parseIdent ! commentTok: aChar | bs ch | bs := WriteStream on: (String new: 50). [ ch := self nextChar. ch == $" ifTrue: [ self peekChar == $" ifTrue: [ self nextChar. bs nextPut: $" ] ifFalse: [ ^STCommentTok new: self line value: bs contents ] ]. bs nextPut: ch. ] repeat ! stringTok: aChar | bs ch | bs := WriteStream on: (String new: 10). [ ch := self nextChar. ch == $' ifTrue: [ self peekChar == $' ifTrue: [ self nextChar ] ifFalse: [ ^STStringTok new: self line value: bs contents ] ]. bs nextPut: ch. ] repeat ! oneCharTok: aChar ^(Smalltalk at: (LexExtra at: aChar value) asSymbol) new: line ! binOpTok: aChar | bs ch cont | bs := WriteStream on: (String new: 2). ch := self peekChar. aChar == $< ifTrue: [ ch == $p ifTrue: [ (self primitiveTok: ch) ifTrue: [ ^STPrimitiveStartTok new: self line ] ] ]. bs nextPut: aChar. (self isSpecial: ch) ifTrue: [ bs nextPut: self nextChar ] ifFalse: [ ((aChar == $-) and: [ ch isDigit ]) ifTrue: [ ^self numberTok: aChar ] ]. cont := bs contents. cont = '!' ifTrue: [ ^STBangTok new: self line ]. cont = '|' ifTrue: [ ^STVerticalBarTok new: self line ]. ^STBinopTok new: self line value: cont ! numberTok: aChar | mantissaParsed isNegative dotSeen base exponent scale ch num | mantissaParsed := isNegative := dotSeen := false. base := 10. exponent := 0. scale := 0. ch := aChar. self putBack: ch. "written with peeking in mind" ch == $- ifFalse: [ "could be radix" num := self parseDigits: ch base: 10. ch := self peekChar. ch == $r ifTrue: [ base := num truncated. self nextChar. "skip over 'r'" ch := self peekChar ] ifFalse: [ mantissaParsed := true ] ]. " here we've either a) parsed base, an 'r' and are sitting on the following character b) parsed the integer part of the mantissa, and are sitting on the char following it, or c) parsed nothing and are sitting on a - sign. " ch == $- ifTrue: [ mantissaParsed ifTrue: [ ^self error: 'malformed number' ]. isNegative := true. self nextChar. "skip '-'" ch := self peekChar ]. (self isDigit: ch base: base) ifTrue: [ num := self parseDigits: ch base: base. mantissaParsed := true ]. ch := self peekChar. ch == $. ifTrue: [ mantissaParsed ifFalse: [ ^self error: 'malformed number' ]. self nextChar. ch := self peekChar. ch isDigit ifTrue: [ dotSeen := true. self parseFraction: ch num: num asFloat "for speed" base: base asFloat return: [ :n :s | num := n. scale := s ] ] ifFalse: [ self putBack: $.. ch := $. ] ]. (ch == $e or: [ ch == $d or: [ ch == $q ]]) ifTrue: [ self nextChar. "skip over 'e'" ch := self peekChar. ch == $- ifTrue: [ self nextChar. exponent := self parseDigits: self peekChar base: 10. exponent := exponent negated ] ifFalse: [ exponent := self parseDigits: ch base: 10 ] ]. dotSeen ifTrue: [ num := num / scale ]. exponent ~= 0 ifTrue: [ num := num * (base raisedToInteger: exponent) ]. isNegative ifTrue: [ num := num negated ]. ^STLiteralTok new: self line value: num ! ! !STTok methodsFor: 'utility methods'! isSpecial: ch "SBB -- removed bang 25-Apr-93 02:09:55 ; was causing bin op lexer to get greedy with !!" ^'%&*+,-/<=>?@\|~' includes: ch ! parseDigits: ch base: base | c num | c := ch. num := 0. [ c notNil and: [ self isDigit: c base: base ] ] whileTrue: [ num := num * base + c digitValue. self nextChar. c := self peekChar ]. ^num ! parseFraction: ch num: num base: base return: aBlock | c scale result | c := ch. scale := 1.0. result := num. [ c notNil and: [ self isDigit: c base: base ] ] whileTrue: [ result := result * base + c digitValue asFloat. self nextChar. c := self peekChar. scale := scale * base. ]. aBlock value: result value: scale ! isIdentChar: aChar last: lastChar aChar isNil ifTrue: [ ^false ]. aChar isLetter ifTrue: [ ^true ]. aChar isDigit ifTrue: [ ^true ]. aChar == $: ifTrue: [ ^lastChar ~~ aChar ]. ^false ! isDigit: aChar base: base aChar isNil ifTrue: [ ^false ]. base <= 10 ifTrue: [ aChar isDigit ifFalse: [ ^false ]. ^(aChar value - $0 value) < base ]. ^aChar isUppercase ifTrue: [ (aChar value - $A value) < (base - 10) ] ifFalse: [ aChar isDigit ] ! ! !STTok methodsFor: 'private'! parseIdent | s colonsSeen ch lastChar id | s := WriteStream on: (String new: 1). colonsSeen := 0. lastChar := nil. [ ch := self peekChar. self isIdentChar: ch last: lastChar ] whileTrue: [ s nextPut: ch. lastChar := ch. (ch == $:) ifTrue: [ colonsSeen := colonsSeen + 1 ]. self nextChar ]. id := s contents. colonsSeen > 0 ifTrue: [ lastChar == $: ifFalse: [ ^self error: 'Bad keyword identifier' ]. colonsSeen == 1 ifTrue: [ ^STKeywordTok new: self line value: id ] ifFalse: [ ^STSymbolKeywordTok new: self line value: id ] ] ifFalse: [ ^STIdentifierTok new: self line value: id ]. ! primitiveTok: aChar | id | id := self idTok: self nextChar. ^id value = 'primitive:' ! ! STTok initialize! " | f tok | f := STTok on: 'builtins.st'. [ tok := f next. tok notNil ] whileTrue"": [ tok inspect ]"" ! | fileNameStream fileNames f tok | fileNameStream := FileStream popen: 'ls examples/[A-Z]*.st' dir: 'r'. fileNames := TokenStream onStream: fileNameStream. fileNames do: [ :name | Transcript nextPutAll: 'Processing '; nextPutAll: name; nl. f := STTok on: name. [ tok := f next. tok notNil ] whileTrue. f close ] ! "